Skip to content

Commit

Permalink
Optimizing the way we are generating JSON files
Browse files Browse the repository at this point in the history
  • Loading branch information
ayende committed Jun 23, 2011
1 parent 84dd369 commit 9b198b5
Show file tree
Hide file tree
Showing 12 changed files with 174 additions and 15 deletions.
7 changes: 6 additions & 1 deletion ETL/Raven.Etl.sln
@@ -1,8 +1,9 @@

Microsoft Visual Studio Solution File, Format Version 11.00
# Visual Studio 2010
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Raven.StackOverflow.Etl", "Raven.StackOverflow.Etl\Raven.StackOverflow.Etl.csproj", "{3CA2ED91-8A67-4CD4-B728-91470E3CE97C}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Uploader", "Uploader\Uploader.csproj", "{E4D12C01-659E-4BA9-8B87-D967C175FBF0}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x86 = Debug|x86
Expand All @@ -13,6 +14,10 @@ Global
{3CA2ED91-8A67-4CD4-B728-91470E3CE97C}.Debug|x86.Build.0 = Debug|x86
{3CA2ED91-8A67-4CD4-B728-91470E3CE97C}.Release|x86.ActiveCfg = Release|x86
{3CA2ED91-8A67-4CD4-B728-91470E3CE97C}.Release|x86.Build.0 = Release|x86
{E4D12C01-659E-4BA9-8B87-D967C175FBF0}.Debug|x86.ActiveCfg = Debug|x86
{E4D12C01-659E-4BA9-8B87-D967C175FBF0}.Debug|x86.Build.0 = Debug|x86
{E4D12C01-659E-4BA9-8B87-D967C175FBF0}.Release|x86.ActiveCfg = Release|x86
{E4D12C01-659E-4BA9-8B87-D967C175FBF0}.Release|x86.Build.0 = Release|x86
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
7 changes: 2 additions & 5 deletions ETL/Raven.StackOverflow.Etl/Generic/SimplePipelineExecutor.cs
@@ -1,13 +1,10 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Collections.Generic;
using Rhino.Etl.Core;
using Rhino.Etl.Core.Enumerables;
using Rhino.Etl.Core.Operations;
using Rhino.Etl.Core.Pipelines;

namespace ETL
namespace Raven.StackOverflow.Etl.Generic
{
public class SimplePipelineExecutor : AbstractPipelineExecuter
{
Expand Down
1 change: 0 additions & 1 deletion ETL/Raven.StackOverflow.Etl/Posts/CommentsProcess.cs
Expand Up @@ -5,7 +5,6 @@
//-----------------------------------------------------------------------
using System;
using System.IO;
using ETL;
using Raven.Database;
using Raven.StackOverflow.Etl.Generic;
using Rhino.Etl.Core;
Expand Down
2 changes: 1 addition & 1 deletion ETL/Raven.StackOverflow.Etl/Posts/PostsProcess.cs
Expand Up @@ -24,7 +24,7 @@ public PostsProcess(string inputPath, string outputPath)

protected override void Initialize()
{
PipelineExecuter = new SingleThreadedPipelineExecuter();
PipelineExecuter = new SimplePipelineExecutor();
Register(new XmlRowOperationFile(Path.Combine(inputPath, "posts.xml")));
Register(new RowToDatabase("Posts", doc => "posts/" + doc["Id"], _outputPath));
}
Expand Down
1 change: 0 additions & 1 deletion ETL/Raven.StackOverflow.Etl/Posts/VotesProcess.cs
Expand Up @@ -5,7 +5,6 @@
//-----------------------------------------------------------------------
using System;
using System.IO;
using ETL;
using Raven.StackOverflow.Etl.Generic;
using Rhino.Etl.Core;
using Rhino.Etl.Core.Pipelines;
Expand Down
1 change: 0 additions & 1 deletion ETL/Raven.StackOverflow.Etl/Users/BadgesProcess.cs
Expand Up @@ -5,7 +5,6 @@
//-----------------------------------------------------------------------
using System;
using System.IO;
using ETL;
using Raven.StackOverflow.Etl.Generic;
using Rhino.Etl.Core;
using Rhino.Etl.Core.Pipelines;
Expand Down
1 change: 0 additions & 1 deletion ETL/Raven.StackOverflow.Etl/Users/UsersProcess.cs
Expand Up @@ -5,7 +5,6 @@
//-----------------------------------------------------------------------
using System;
using System.IO;
using ETL;
using Raven.StackOverflow.Etl.Generic;
using Rhino.Etl.Core;
using Rhino.Etl.Core.Pipelines;
Expand Down
8 changes: 4 additions & 4 deletions ETL/Raven.StackOverflow.Etl/XMLToFileCommand.cs
Expand Up @@ -31,11 +31,11 @@ public void Run()

var processes = new EtlProcess[]
{
new UsersProcess(InputDirectory, OutputDirectory),
new BadgesProcess(InputDirectory, OutputDirectory),
// new UsersProcess(InputDirectory, OutputDirectory),
// new BadgesProcess(InputDirectory, OutputDirectory),
new PostsProcess(InputDirectory, OutputDirectory),
new VotesProcess(InputDirectory, OutputDirectory),
new CommentsProcess(InputDirectory, OutputDirectory)
// new VotesProcess(InputDirectory, OutputDirectory),
//new CommentsProcess(InputDirectory, OutputDirectory)
};
Parallel.ForEach(processes, GenerateJsonDocuments);
}
Expand Down
67 changes: 67 additions & 0 deletions ETL/Uploader/Program.cs
@@ -0,0 +1,67 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading;

namespace Uploader
{
class Program
{
static void Main(string[] args)
{
if(args.Length < 2)
{
Console.WriteLine("Usage: uploader.exe [raven url] [directory]");
return;
}

var ravenUrl = args[0];
foreach (var directory in args.Skip(1))
{
foreach (var file in Directory.GetFiles(directory, "*.json"))
{

var sp = Stopwatch.StartNew();
HttpWebResponse webResponse;
while (true)
{

var httpWebRequest = (HttpWebRequest)WebRequest.Create(new Uri(new Uri(ravenUrl), "bulk_docs"));
httpWebRequest.Method = "POST";
using (var requestStream = httpWebRequest.GetRequestStream())
{
var readAllBytes = File.ReadAllBytes(file);
requestStream.Write(readAllBytes, 0, readAllBytes.Length);
}
try
{
webResponse = (HttpWebResponse)httpWebRequest.GetResponse();
webResponse.Close();
break;
}
catch (WebException e)
{
Console.WriteLine(e.Message);
Console.WriteLine("{0} - {1}", Path.GetFileName(file), sp.Elapsed);
webResponse = e.Response as HttpWebResponse;
if (webResponse != null)
{
Console.WriteLine("Http Status {0}", webResponse.StatusCode);
Console.WriteLine(new StreamReader(webResponse.GetResponseStream()).ReadToEnd());
return;
}
return;
}
}
var timeSpan = sp.Elapsed;
Console.WriteLine("{0} - {1} - {2}", Path.GetFileName(file), timeSpan, webResponse.StatusCode);
}

}
}
}
}
36 changes: 36 additions & 0 deletions ETL/Uploader/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("Uploader")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("Microsoft")]
[assembly: AssemblyProduct("Uploader")]
[assembly: AssemblyCopyright("Copyright © Microsoft 2011")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]

// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]

// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("ac6cf9e7-44c8-454e-becc-3194ddf1dbd1")]

// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]
57 changes: 57 additions & 0 deletions ETL/Uploader/Uploader.csproj
@@ -0,0 +1,57 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">x86</Platform>
<ProductVersion>8.0.30703</ProductVersion>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{E4D12C01-659E-4BA9-8B87-D967C175FBF0}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>Uploader</RootNamespace>
<AssemblyName>Uploader</AssemblyName>
<TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
<TargetFrameworkProfile>Client</TargetFrameworkProfile>
<FileAlignment>512</FileAlignment>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
<PlatformTarget>x86</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
<PlatformTarget>x86</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>
1 change: 1 addition & 0 deletions Raven.Tests/Raven.Tests.csproj
Expand Up @@ -180,6 +180,7 @@
<Compile Include="Bugs\IteratingTwice.cs" />
<Compile Include="Bugs\Iulian\CanReadEntityWithUrlId.cs" />
<Compile Include="Bugs\LinqOnDictionary.cs" />
<Compile Include="Bugs\LuceneIndexing.cs" />
<Compile Include="Bugs\LuceneQueryShouldNotModifyDynamicDocument.cs" />
<Compile Include="Bugs\ManyDocumentsViaDTC.cs" />
<Compile Include="Bugs\MassivelyMultiTenant.cs" />
Expand Down

0 comments on commit 9b198b5

Please sign in to comment.