diff --git a/ListArticleToArticleLinks/ListArticleToArticleLinks.csproj b/ListArticleToArticleLinks/ListArticleToArticleLinks.csproj new file mode 100644 index 0000000..f99906b --- /dev/null +++ b/ListArticleToArticleLinks/ListArticleToArticleLinks.csproj @@ -0,0 +1,63 @@ + + + + Debug + x86 + 8.0.30703 + 2.0 + {A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC} + Exe + Properties + ListArticleToArticleLinks + ListArticleToArticleLinks + v4.0 + Client + 512 + + + x86 + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + x86 + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + + + + + + + + + + + + + + + {F9AF62EB-FCC2-4739-8C6B-436A67D3A835} + Wikipedia SQL dump parser + + + + + \ No newline at end of file diff --git a/ListArticleToArticleLinks/Program.cs b/ListArticleToArticleLinks/Program.cs new file mode 100644 index 0000000..155c7a4 --- /dev/null +++ b/ListArticleToArticleLinks/Program.cs @@ -0,0 +1,30 @@ +using System; +using System.Linq; +using WpSqlDumpParser; +using WpSqlDumpParser.EntityCollections; +using WpSqlDumpParser.IO; + +namespace ListArticleToArticleLinks +{ + static class Program + { + static void Main() + { + // path, where the dumps will be downloaded + CachingStream.CachePath = @"C:\Wikipedia dumps"; + + // we won't need other pages, so there's no need to load them into memory + Pages.Instance.Limiter = pages => pages.Where(p => p.Namespace == Namespaces.Article); + + var pageLinks = PageLinks.Instance.Get("enwiki", DumpsManager.GetLastDumpDate("enwiki")); + + var articleToArticleLinks = + pageLinks.Where( + pl => pl.From != null // because of page limiter above, this will give only links from articles + && pl.ToNamespace == Namespaces.Article); // only links to articles + + foreach (var link in articleToArticleLinks) + Console.WriteLine("{0}->{1}", link.From.Title, link.ToTitle); + } + } +} \ No newline at end of file diff --git a/ListArticleToArticleLinks/Properties/AssemblyInfo.cs b/ListArticleToArticleLinks/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..7762e6a --- /dev/null +++ b/ListArticleToArticleLinks/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("ListArticleToArticleLinks")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("ListArticleToArticleLinks")] +[assembly: AssemblyCopyright("Copyright © 2012")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("a0cace9f-fc97-41c8-9f5a-79fac989e651")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/Wikipedia SQL dump parser.sln b/Wikipedia SQL dump parser.sln index a508bc6..01ec305 100644 --- a/Wikipedia SQL dump parser.sln +++ b/Wikipedia SQL dump parser.sln @@ -9,10 +9,12 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Wikipedia total image size" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Examples", "Examples", "{C4612810-A8A4-4D9C-8DC2-E7AA85D64352}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChineseCategoryLinks", "TotalImageSizeExample\ChineseCategoryLinks.csproj", "{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}" -EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Wikipedia language networks", "Wikipedia language networks\Wikipedia language networks.csproj", "{2BEDB211-24A7-45B9-AF41-A4B4AA94A0D7}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ListArticleToArticleLinks", "ListArticleToArticleLinks\ListArticleToArticleLinks.csproj", "{A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChineseCategoryLinks", "ChineseCategoryLinks\ChineseCategoryLinks.csproj", "{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -53,16 +55,6 @@ Global {1F2DA208-8AA6-4A56-9AF9-54076BBD0975}.Release|Mixed Platforms.Build.0 = Release|x86 {1F2DA208-8AA6-4A56-9AF9-54076BBD0975}.Release|x86.ActiveCfg = Release|x86 {1F2DA208-8AA6-4A56-9AF9-54076BBD0975}.Release|x86.Build.0 = Release|x86 - {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|Any CPU.ActiveCfg = Debug|x86 - {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|Mixed Platforms.ActiveCfg = Debug|x86 - {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|Mixed Platforms.Build.0 = Debug|x86 - {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|x86.ActiveCfg = Debug|x86 - {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|x86.Build.0 = Debug|x86 - {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|Any CPU.ActiveCfg = Release|x86 - {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|Mixed Platforms.ActiveCfg = Release|x86 - {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|Mixed Platforms.Build.0 = Release|x86 - {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|x86.ActiveCfg = Release|x86 - {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|x86.Build.0 = Release|x86 {2BEDB211-24A7-45B9-AF41-A4B4AA94A0D7}.Debug|Any CPU.ActiveCfg = Debug|x86 {2BEDB211-24A7-45B9-AF41-A4B4AA94A0D7}.Debug|Mixed Platforms.ActiveCfg = Debug|x86 {2BEDB211-24A7-45B9-AF41-A4B4AA94A0D7}.Debug|Mixed Platforms.Build.0 = Debug|x86 @@ -73,11 +65,32 @@ Global {2BEDB211-24A7-45B9-AF41-A4B4AA94A0D7}.Release|Mixed Platforms.Build.0 = Release|x86 {2BEDB211-24A7-45B9-AF41-A4B4AA94A0D7}.Release|x86.ActiveCfg = Release|x86 {2BEDB211-24A7-45B9-AF41-A4B4AA94A0D7}.Release|x86.Build.0 = Release|x86 + {A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Debug|Any CPU.ActiveCfg = Debug|x86 + {A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Debug|Mixed Platforms.ActiveCfg = Debug|x86 + {A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Debug|Mixed Platforms.Build.0 = Debug|x86 + {A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Debug|x86.ActiveCfg = Debug|x86 + {A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Debug|x86.Build.0 = Debug|x86 + {A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Release|Any CPU.ActiveCfg = Release|x86 + {A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Release|Mixed Platforms.ActiveCfg = Release|x86 + {A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Release|Mixed Platforms.Build.0 = Release|x86 + {A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Release|x86.ActiveCfg = Release|x86 + {A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Release|x86.Build.0 = Release|x86 + {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|Any CPU.ActiveCfg = Debug|x86 + {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|Mixed Platforms.ActiveCfg = Debug|x86 + {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|Mixed Platforms.Build.0 = Debug|x86 + {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|x86.ActiveCfg = Debug|x86 + {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|x86.Build.0 = Debug|x86 + {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|Any CPU.ActiveCfg = Release|x86 + {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|Mixed Platforms.ActiveCfg = Release|x86 + {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|Mixed Platforms.Build.0 = Release|x86 + {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|x86.ActiveCfg = Release|x86 + {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|x86.Build.0 = Release|x86 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(NestedProjects) = preSolution + {A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC} = {C4612810-A8A4-4D9C-8DC2-E7AA85D64352} {1E1CFCB0-6631-4857-ACD7-384E19E1DA6B} = {C4612810-A8A4-4D9C-8DC2-E7AA85D64352} EndGlobalSection EndGlobal