Skip to content

Commit

Permalink
reorganized.
Browse files Browse the repository at this point in the history
  • Loading branch information
philoushka committed Mar 18, 2014
1 parent 678169b commit bf2a748
Show file tree
Hide file tree
Showing 8 changed files with 206 additions and 1 deletion.
87 changes: 87 additions & 0 deletions Console/FileOps.cs
@@ -0,0 +1,87 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace LargeFileSplitter
{
public class FileOps
{
public int NumFilesCreated { get; set; }
private IEnumerable<string> AllLinesToWrite { get; set; }
SplitJob Job { get; set; }
/// <summary>
/// Read the source file and split into
/// </summary>
/// <param name="job"></param>

public FileOps(SplitJob job)
{
this.Job = job;
AllLinesToWrite = ReadFile(Job.FileToSplit);
Job.TotalLines = AllLinesToWrite.Count();
}
public void SplitFile()
{
var sw = Stopwatch.StartNew();

if (AllLinesToWrite.Any() == false) return;

WriteFiles();
sw.Stop();
Console.WriteLine("Time Spent (s): " + sw.Elapsed.TotalSeconds.ToString());
}

/// <summary>
/// Read the contents of the file from disk.
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
public IEnumerable<string> ReadFile(string filePath)
{
try
{
return File.ReadLines(filePath).ToList();
}
catch (System.OutOfMemoryException)
{
Console.WriteLine("We encountered an OutOfMemoryException. Switching to a slower method of splitting.");
return File.ReadLines(filePath);
}
}


/// <summary>
/// Given the large collection of strings read from disk, split into the appropriate number of files. If the source file is empty, tilt immediately.
/// </summary>
public void WriteFiles()
{
if (AllLinesToWrite.Any() == false) return;
string header = AllLinesToWrite.First();
foreach (int i in Enumerable.Range(1, Job.NumFilesToCreate))
{
FileInfo baseFile = new FileInfo(Job.FileToSplit);
string newFile = Path.Combine(baseFile.DirectoryName, string.Format("{0}-{1}", i, baseFile.Name));

//always skip the header, the batches previously taken + 1 for the header.
//First iteration, skip none.
int skip = ((i-1) * Job.LinesPerFile + 1);

List<string> newFileContents = AllLinesToWrite.Skip(skip).Take(Job.LinesPerFile).ToList();

//write the file when there are new contents to be written. we may have the case where we don't need to create empty files if the user
//has specified more files than we need.
if (newFileContents.Any())
{
newFileContents.Insert(0, header);
File.WriteAllLines(newFile, newFileContents);
NumFilesCreated++;
}
}
}

}
}
57 changes: 57 additions & 0 deletions Console/LargeFileSplitter.csproj
@@ -0,0 +1,57 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{93322CCA-D6F7-4F88-91E5-6FE42DC3131A}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>LargeFileSplitter</RootNamespace>
<AssemblyName>LargeFileSplitter</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="FileOps.cs" />
<Compile Include="SplitJob.cs" />
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>
File renamed without changes.
36 changes: 36 additions & 0 deletions Console/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("LargeFileSplitter")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("LargeFileSplitter")]
[assembly: AssemblyCopyright("Copyright © 2014")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]

// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]

// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("1c2c5879-e40d-4b54-bd63-94adad84f609")]

// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]
25 changes: 25 additions & 0 deletions Console/SplitJob.cs
@@ -0,0 +1,25 @@
using System;
namespace LargeFileSplitter
{
public class SplitJob
{
/// <summary>
/// The user's choice for the max number of files to create.
/// </summary>
public int NumFilesToCreate { get; set; }
/// <summary>
/// The large file on disk to be split.
/// </summary>
public string FileToSplit { get; set; }

/// <summary>
/// A calculated value determining how many lines per file. It's approximately TotalLines/NumFilesToCreate with rounding up to avoid situations where lines are missed.
/// </summary>
public int LinesPerFile { get { return Convert.ToInt32(Math.Ceiling((this.TotalLines / this.NumFilesToCreate))); } } //Ceiling to avoid missing fractions of lines

/// <summary>
/// The number of lines that the large file contains.
/// </summary>
public decimal TotalLines { get; set; }
}
}
2 changes: 1 addition & 1 deletion LargeFileSplitter.sln
Expand Up @@ -3,7 +3,7 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2013
VisualStudioVersion = 12.0.30110.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LargeFileSplitter", "LargeFileSplitter.csproj", "{93322CCA-D6F7-4F88-91E5-6FE42DC3131A}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LargeFileSplitter", "Console\LargeFileSplitter.csproj", "{93322CCA-D6F7-4F88-91E5-6FE42DC3131A}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down
Binary file added readme/cli.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added readme/console.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit bf2a748

Please sign in to comment.