Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
matthiasfeist committed Apr 9, 2023
1 parent 69333de commit dc540d3
Show file tree
Hide file tree
Showing 11 changed files with 645 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
*.cache.json
output/*

# Logs
logs
*.log
Expand Down Expand Up @@ -102,3 +105,5 @@ dist

# TernJS port file
.tern-port

.DS_Store
1 change: 1 addition & 0 deletions .prettierrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
15 changes: 15 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
const getSkolverketData = require("./lib/getSkolverketData.js");
const mapAndClean = require("./lib/mapAndClean.js");
const mergeSchools = require("./lib/mergeSchools.js");
const geocodeSchools = require("./lib/geocodeSchools.js");
const writeOsmFiles = require("./lib/writeOsmFiles.js");
require("dotenv").config();

async function main() {
const rawData = await getSkolverketData();
const cleanedData = mapAndClean(rawData);
const mergedSchools = mergeSchools(cleanedData);
const geocodedSchools = await geocodeSchools(mergedSchools);
writeOsmFiles(geocodedSchools);
}
main();
41 changes: 41 additions & 0 deletions lib/cache.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
const fs = require("fs");
const path = require("path");

module.exports = class Cache {
cacheFilePath;
map;

constructor(id) {
this.cacheFilePath = path.resolve(__dirname, id + ".cache.json");
this.map = new Map();

if (fs.existsSync(this.cacheFilePath)) {
this.map = new Map(
JSON.parse(fs.readFileSync(this.cacheFilePath, { encoding: "utf-8" }))
);
}
}

save() {
fs.writeFileSync(
this.cacheFilePath,
JSON.stringify(Array.from(this.map), null, 2)
);
}

length() {
return Array.from(this.map.keys()).length;
}

set(key, data) {
this.map.set(key, data);
}

has(key) {
return this.map.has(key);
}

get(key) {
return this.map.get(key);
}
};
54 changes: 54 additions & 0 deletions lib/geocodeSchools.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
const axios = require("axios");
const cliProgress = require("cli-progress");
const Cache = require("./cache");

module.exports = async function (schoolsInput) {
if (!process.env.HERE_API_KEY) {
console.log("❌ HERE Api key not found. Please check the .env file!");
process.exit(1);
}

const cache = new Cache("geocode-here");
const progressBar = new cliProgress.SingleBar(
{ etaBuffer: 300, clearOnComplete: true },
cliProgress.Presets.shades_classic
);

console.log(
"Geocoding school adresses (because Skolverket's geo coordinates can't be trusted."
);
if (cache.length() > 0) {
console.log(
"💡 Cached data is avaiable from a previous run. Only geocoding new addresses!"
);
}
progressBar.start(schoolsInput.length, 0);
for (let i = 0; i < schoolsInput.length; i++) {
progressBar.update(i + 1);

const school = schoolsInput[i];
const query = `${school.street} ${school.housenr}, ${school.postcode} ${school.city}`;

if (!cache.has(query)) {
const hereResponse = await axios.get(
"https://geocode.search.hereapi.com/v1/geocode",
{ params: { q: query, apiKey: process.env.HERE_API_KEY, timeout: 10 } }
);
cache.set(query, hereResponse.data);

// save the cache every so often to not start from the very beginning
// if something breaks or the script get interrupted by the user.
if (i % 100 === 0) cache.save();
}

const geolocationReponse = cache.get(query);
if (geolocationReponse.items && geolocationReponse.items.length > 0) {
school.lat = geolocationReponse.items[0].position.lat;
school.lng = geolocationReponse.items[0].position.lng;
}
}

progressBar.stop();
cache.save();
return schoolsInput;
};
51 changes: 51 additions & 0 deletions lib/getSkolverketData.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
const axios = require("axios");
const cliProgress = require("cli-progress");
const Cache = require("./cache");

module.exports = async function () {
const cache = new Cache("skolverket");
const progressBar = new cliProgress.SingleBar(
{ etaBuffer: 300, clearOnComplete: true },
cliProgress.Presets.shades_classic
);

console.log("Downloading data index from Skolverket");
const listResponse = await axios.get(
"https://api.skolverket.se/skolenhetsregistret/v1/skolenhet"
);

console.log("Downloading data for the individual schools from Skolverket...");
if (cache.length() > 0) {
console.log(
"💡 Cached data is avaiable from a previous run. Downloading only new schools!"
);
}
progressBar.start(listResponse.data.Skolenheter.length, 0);

const result = [];
for (let i = 0; i < listResponse.data.Skolenheter.length; i++) {
progressBar.update(i + 1);
const school = listResponse.data.Skolenheter[i];
const enhetskod = school.Skolenhetskod;

const cachedResult = cache.get(enhetskod);
if (cachedResult) {
result.push(cachedResult);
} else {
const schoolResponse = await axios.get(
`https://api.skolverket.se/skolenhetsregistret/v1/skolenhet/${enhetskod}`,
{ timeout: 30 }
);
result.push(schoolResponse.data);
cache.set(enhetskod, schoolResponse.data);

// save the cache every so often to not start from the very beginning
// if something breaks or the script get interrupted by the user.
if (i % 200 === 0) cache.save();
}
}

progressBar.stop();
cache.save();
return result;
};
144 changes: 144 additions & 0 deletions lib/mapAndClean.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
function capitalizeWords(str) {
return str
.toLowerCase()
.split(" ")
.map((word) => {
switch (word) {
case "ab":
return "AB";
case "kommun":
return "kommun";
case "skola":
return "skola";
case "gymnasium":
"gymnasium";
}
return word.charAt(0).toUpperCase() + word.slice(1);
})
.join(" ");
}

function splitAddress(addr) {
const regex = /(\d*\D+[^A-Z]) ([^a-z]?\D*\d+.*)/;
const result = addr.match(regex);
if (result && result.length === 3) {
return [result[1].trim(), result[2].trim()];
}
return [addr, ""];
}

function cleanSchoolName(name) {
// to find all the F-3 or 1-9 and remove them
const regex = /[F\d]\s?-\s?[\d]/gm;
return name.replace(regex, "").trim();
}

module.exports = function (schoolsInput) {
console.log("Filtering and reformatting Skolverket data.");
console.log(" > Skolenheter in input: " + schoolsInput.length);

const result = schoolsInput
// first filter out all "skolenheter" we're not interested in
.filter((element) => {
const school = element.SkolenhetInfo;
if (!school.Besoksadress.GeoData.Koordinat_WGS84_Lat) return false;
if (!school.Besoksadress.GeoData.Koordinat_WGS84_Lng) return false;
if (
splitAddress(school.Besoksadress.Adress.trim().toLowerCase())[0] ===
"box"
)
return false;
if (school.Status !== "Aktiv") return false;
if (school.Skolenhetstyp !== "Skolenhet") return false;

// filter all school types that we're not interested in
if (
school.Skolformer.filter((skolform) =>
[
"Grundskola",
"Gymnasieskola",
"Forskoleklass",
"Grundsameskola",
].includes(skolform.type)
).length === 0
) {
return false;
}
return true;
})

// now we unify the data structure
.map((element) => {
const school = element.SkolenhetInfo;
const name = capitalizeWords(
cleanSchoolName(school.SkolaNamn ? school.SkolaNamn : school.Namn)
);
const lat = school.Besoksadress.GeoData.Koordinat_WGS84_Lat;
const lng = school.Besoksadress.GeoData.Koordinat_WGS84_Lng;
const addressSplit = splitAddress(school.Besoksadress.Adress.trim());
let street = addressSplit[0];
let housenr = addressSplit[1];
const city = capitalizeWords(school.Besoksadress.Ort.trim());

// parse out the grades and the level
const grades = new Set();
school.Skolformer.forEach((schooltype) => {
if (schooltype.type === "Forskoleklass") grades.add(0);
if (
schooltype.type === "Grundskola" ||
schooltype.type === "Grundsameskola"
) {
if (schooltype.Ak1) grades.add(1);
if (schooltype.Ak2) grades.add(2);
if (schooltype.Ak3) grades.add(3);
if (schooltype.Ak4) grades.add(4);
if (schooltype.Ak5) grades.add(5);
if (schooltype.Ak6) grades.add(6);
if (schooltype.Ak7) grades.add(7);
if (schooltype.Ak8) grades.add(8);
if (schooltype.Ak9) grades.add(9);
}
});
const iscedLevel = new Set();
school.Skolformer.forEach((schooltype) => {
if (schooltype.type === "Forskoleklass") iscedLevel.add(0);
if (
(schooltype.type === "Grundskola" ||
schooltype.type === "Grundsameskola") &&
(grades.has(1) ||
grades.has(3) ||
grades.has(4) ||
grades.has(5) ||
grades.has(6))
) {
iscedLevel.add(1);
}
if (
schooltype.type === "Grundskola" &&
(grades.has(7) || grades.has(8) || grades.has(9))
) {
iscedLevel.add(2);
}
if (schooltype.type === "Gymnasieskola") iscedLevel.add(3);
});

return {
name,
lat,
lng,
street,
housenr,
city,
postcode: school.Besoksadress.Postnr.trim(),
operator: capitalizeWords(school.Huvudman.Namn),
ref: [parseInt(school.Skolenhetskod)],
grades: Array.from(grades),
iscedLevel: Array.from(iscedLevel),
kommun: school.Kommun.Namn,
website: school.Webbadress,
};
});

console.log(" > Skolenheter after filter and cleanup: " + result.length);
return result;
};
23 changes: 23 additions & 0 deletions lib/mergeSchools.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
module.exports = function (schoolsInput) {
console.log("Merging school units into single schools.");
console.log(" > Skolenheter in input: " + schoolsInput.length);

const result = new Map();
for (let i = 0; i < schoolsInput.length; i++) {
const schoolElement = schoolsInput[i];
const mapKey = schoolElement.name + "/" + schoolElement.city;

if (result.has(mapKey)) {
const existingSchool = result.get(mapKey);
existingSchool.ref.push(...schoolElement.ref);
existingSchool.grades.push(...schoolElement.grades);
existingSchool.iscedLevel.push(...schoolElement.iscedLevel);
} else {
result.set(mapKey, schoolElement);
}
}

const resultArr = Array.from(result.values());
console.log(" > Schools after merging: " + resultArr.length);
return resultArr;
};

0 comments on commit dc540d3

Please sign in to comment.