Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
added sample to list article to article links
  • Loading branch information
svick committed Jan 29, 2012
1 parent a24d5c2 commit a43ac16
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 12 deletions.
63 changes: 63 additions & 0 deletions ListArticleToArticleLinks/ListArticleToArticleLinks.csproj
@@ -0,0 +1,63 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">x86</Platform>
<ProductVersion>8.0.30703</ProductVersion>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>ListArticleToArticleLinks</RootNamespace>
<AssemblyName>ListArticleToArticleLinks</AssemblyName>
<TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
<TargetFrameworkProfile>Client</TargetFrameworkProfile>
<FileAlignment>512</FileAlignment>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
<PlatformTarget>x86</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
<PlatformTarget>x86</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Wikipedia SQL dump parser\Wikipedia SQL dump parser.csproj">
<Project>{F9AF62EB-FCC2-4739-8C6B-436A67D3A835}</Project>
<Name>Wikipedia SQL dump parser</Name>
</ProjectReference>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>
30 changes: 30 additions & 0 deletions ListArticleToArticleLinks/Program.cs
@@ -0,0 +1,30 @@
using System;
using System.Linq;
using WpSqlDumpParser;
using WpSqlDumpParser.EntityCollections;
using WpSqlDumpParser.IO;

namespace ListArticleToArticleLinks
{
static class Program
{
static void Main()
{
// path, where the dumps will be downloaded
CachingStream.CachePath = @"C:\Wikipedia dumps";

// we won't need other pages, so there's no need to load them into memory
Pages.Instance.Limiter = pages => pages.Where(p => p.Namespace == Namespaces.Article);

var pageLinks = PageLinks.Instance.Get("enwiki", DumpsManager.GetLastDumpDate("enwiki"));

var articleToArticleLinks =
pageLinks.Where(
pl => pl.From != null // because of page limiter above, this will give only links from articles
&& pl.ToNamespace == Namespaces.Article); // only links to articles

foreach (var link in articleToArticleLinks)
Console.WriteLine("{0}->{1}", link.From.Title, link.ToTitle);
}
}
}
36 changes: 36 additions & 0 deletions ListArticleToArticleLinks/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("ListArticleToArticleLinks")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("ListArticleToArticleLinks")]
[assembly: AssemblyCopyright("Copyright © 2012")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]

// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]

// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("a0cace9f-fc97-41c8-9f5a-79fac989e651")]

// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]
37 changes: 25 additions & 12 deletions Wikipedia SQL dump parser.sln
Expand Up @@ -9,10 +9,12 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Wikipedia total image size"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Examples", "Examples", "{C4612810-A8A4-4D9C-8DC2-E7AA85D64352}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChineseCategoryLinks", "TotalImageSizeExample\ChineseCategoryLinks.csproj", "{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Wikipedia language networks", "Wikipedia language networks\Wikipedia language networks.csproj", "{2BEDB211-24A7-45B9-AF41-A4B4AA94A0D7}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ListArticleToArticleLinks", "ListArticleToArticleLinks\ListArticleToArticleLinks.csproj", "{A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChineseCategoryLinks", "ChineseCategoryLinks\ChineseCategoryLinks.csproj", "{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -53,16 +55,6 @@ Global
{1F2DA208-8AA6-4A56-9AF9-54076BBD0975}.Release|Mixed Platforms.Build.0 = Release|x86
{1F2DA208-8AA6-4A56-9AF9-54076BBD0975}.Release|x86.ActiveCfg = Release|x86
{1F2DA208-8AA6-4A56-9AF9-54076BBD0975}.Release|x86.Build.0 = Release|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|Any CPU.ActiveCfg = Debug|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|Mixed Platforms.ActiveCfg = Debug|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|Mixed Platforms.Build.0 = Debug|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|x86.ActiveCfg = Debug|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|x86.Build.0 = Debug|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|Any CPU.ActiveCfg = Release|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|Mixed Platforms.ActiveCfg = Release|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|Mixed Platforms.Build.0 = Release|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|x86.ActiveCfg = Release|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|x86.Build.0 = Release|x86
{2BEDB211-24A7-45B9-AF41-A4B4AA94A0D7}.Debug|Any CPU.ActiveCfg = Debug|x86
{2BEDB211-24A7-45B9-AF41-A4B4AA94A0D7}.Debug|Mixed Platforms.ActiveCfg = Debug|x86
{2BEDB211-24A7-45B9-AF41-A4B4AA94A0D7}.Debug|Mixed Platforms.Build.0 = Debug|x86
Expand All @@ -73,11 +65,32 @@ Global
{2BEDB211-24A7-45B9-AF41-A4B4AA94A0D7}.Release|Mixed Platforms.Build.0 = Release|x86
{2BEDB211-24A7-45B9-AF41-A4B4AA94A0D7}.Release|x86.ActiveCfg = Release|x86
{2BEDB211-24A7-45B9-AF41-A4B4AA94A0D7}.Release|x86.Build.0 = Release|x86
{A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Debug|Any CPU.ActiveCfg = Debug|x86
{A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Debug|Mixed Platforms.ActiveCfg = Debug|x86
{A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Debug|Mixed Platforms.Build.0 = Debug|x86
{A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Debug|x86.ActiveCfg = Debug|x86
{A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Debug|x86.Build.0 = Debug|x86
{A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Release|Any CPU.ActiveCfg = Release|x86
{A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Release|Mixed Platforms.ActiveCfg = Release|x86
{A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Release|Mixed Platforms.Build.0 = Release|x86
{A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Release|x86.ActiveCfg = Release|x86
{A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC}.Release|x86.Build.0 = Release|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|Any CPU.ActiveCfg = Debug|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|Mixed Platforms.ActiveCfg = Debug|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|Mixed Platforms.Build.0 = Debug|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|x86.ActiveCfg = Debug|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Debug|x86.Build.0 = Debug|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|Any CPU.ActiveCfg = Release|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|Mixed Platforms.ActiveCfg = Release|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|Mixed Platforms.Build.0 = Release|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|x86.ActiveCfg = Release|x86
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B}.Release|x86.Build.0 = Release|x86
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{A9CE01F9-D7DE-41DF-ADA2-B1057CD245AC} = {C4612810-A8A4-4D9C-8DC2-E7AA85D64352}
{1E1CFCB0-6631-4857-ACD7-384E19E1DA6B} = {C4612810-A8A4-4D9C-8DC2-E7AA85D64352}
EndGlobalSection
EndGlobal

0 comments on commit a43ac16

Please sign in to comment.