Skip to content
Permalink
Browse files

include subject listing command line app for future maintainence

  • Loading branch information...
ruarai committed Jun 30, 2018
1 parent 32e8569 commit 38680fa04056ed83116de208d91ffdf2eb64c447
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8" ?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.2" />
</startup>
</configuration>
@@ -0,0 +1,86 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using HtmlAgilityPack;
using Newtonsoft.Json;
using RestSharp;

namespace SubjectLister
{
class Program
{
//What year/semester you are fetching for
//This should still include year long subjects
private const int Year = 2018;
private const int Semester = 2;

//Number of pages to fetch
//Needs to be determined manually for each new semester, just look at
//https://handbook.unimelb.edu.au/search?query=&year=2018&types%5B%5D=subject&level_type%5B%5D=all&study_periods%5B%5D=semester_2&study_periods%5B%5D=year_long&area_of_study=all&faculty=all&department=all
//manually to determine this (changing year/semester in the url, of course)
private const int Pages = 118;



static void Main(string[] args)
{
//ugly url to perform the search
string url =
$"search?query=&faculty=all&department=all&year={Year}&area_of_study=all&types%5B%5D=subject&level_type%5B%5D=all&study_periods%5B%5D=semester_{Semester}&study_periods%5B%5D=year_long&sort=external_code%7Casc&page=";

//create a client to perform the scraping
RestClient c = new RestClient("https://handbook.unimelb.edu.au/");

//It actually needs a useragent, surprisingly
c.UserAgent =
"agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36";

List<string> codes = new List<string>();

//Fetch and parse every page
for (int i = 1; i <= Pages; i++)
{
RestRequest request = new RestRequest(url + i);

request.Method = Method.GET;
request.AddHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
request.AddHeader("accept-encoding", "gzip");

var r = c.Execute(request);

if (!r.IsSuccessful)
{
Console.WriteLine("failed to find page?");
break;
}

HtmlDocument d = new HtmlDocument();

d.LoadHtml(r.Content);

var results = d.DocumentNode.QuerySelectorAll(".search-results__accordion-item");

foreach (var result in results)
{
string name = result.QuerySelector(".search-results__accordion-title").InnerText;

string code = result.QuerySelector(".search-results__accordion-code").InnerText;

name = name.Replace(code, "");

string full = code.Trim() + " " + name.Trim();

codes.Add(full);

Console.WriteLine(full);
}
}

//Writes the output to a JSON file
File.WriteAllText($"codes_{Year}sem{Semester}.json", JsonConvert.SerializeObject(codes));
}
}
}
@@ -0,0 +1,36 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("SubjectLister")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("SubjectLister")]
[assembly: AssemblyCopyright("Copyright © 2017")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]

// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]

// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("171dbaed-e598-4af8-a9cd-1a0d5a743caf")]

// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]
@@ -0,0 +1,78 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{171DBAED-E598-4AF8-A9CD-1A0D5A743CAF}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>SubjectLister</RootNamespace>
<AssemblyName>SubjectLister</AssemblyName>
<TargetFrameworkVersion>v4.5.2</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="HtmlAgilityPack, Version=1.4.9.0, Culture=neutral, PublicKeyToken=bd319b19eaf3b43a, processorArchitecture=MSIL">
<HintPath>..\packages\HtmlAgilityPack.CssSelectors.1.0.2\lib\net45\HtmlAgilityPack.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="HtmlAgilityPack.CssSelectors, Version=1.0.0.0, Culture=neutral, processorArchitecture=MSIL">
<HintPath>..\packages\HtmlAgilityPack.CssSelectors.1.0.2\lib\net45\HtmlAgilityPack.CssSelectors.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="Newtonsoft.Json, Version=10.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
<HintPath>..\packages\Newtonsoft.Json.10.0.3\lib\net45\Newtonsoft.Json.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="RestSharp, Version=106.1.0.0, Culture=neutral, PublicKeyToken=598062e77f915f75, processorArchitecture=MSIL">
<HintPath>..\packages\RestSharp.106.1.0\lib\net452\RestSharp.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Web" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Net.Http" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
<None Include="packages.config" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>
@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="HtmlAgilityPack" version="1.6.5" targetFramework="net452" />
<package id="HtmlAgilityPack.CssSelectors" version="1.0.2" targetFramework="net452" />
<package id="Newtonsoft.Json" version="10.0.3" targetFramework="net452" />
<package id="RestSharp" version="106.1.0" targetFramework="net452" />
</packages>
@@ -10,6 +10,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
LICENSE.md = LICENSE.md
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SubjectLister", "SubjectLister\SubjectLister.csproj", "{171DBAED-E598-4AF8-A9CD-1A0D5A743CAF}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -20,6 +22,10 @@ Global
{645BD307-E19D-467E-BE74-70AFB7AF3999}.Debug|Any CPU.Build.0 = Debug|Any CPU
{645BD307-E19D-467E-BE74-70AFB7AF3999}.Release|Any CPU.ActiveCfg = Release|Any CPU
{645BD307-E19D-467E-BE74-70AFB7AF3999}.Release|Any CPU.Build.0 = Release|Any CPU
{171DBAED-E598-4AF8-A9CD-1A0D5A743CAF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{171DBAED-E598-4AF8-A9CD-1A0D5A743CAF}.Debug|Any CPU.Build.0 = Debug|Any CPU
{171DBAED-E598-4AF8-A9CD-1A0D5A743CAF}.Release|Any CPU.ActiveCfg = Release|Any CPU
{171DBAED-E598-4AF8-A9CD-1A0D5A743CAF}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

0 comments on commit 38680fa

Please sign in to comment.
You can’t perform that action at this time.