-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
69333de
commit dc540d3
Showing
11 changed files
with
645 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
const getSkolverketData = require("./lib/getSkolverketData.js"); | ||
const mapAndClean = require("./lib/mapAndClean.js"); | ||
const mergeSchools = require("./lib/mergeSchools.js"); | ||
const geocodeSchools = require("./lib/geocodeSchools.js"); | ||
const writeOsmFiles = require("./lib/writeOsmFiles.js"); | ||
require("dotenv").config(); | ||
|
||
async function main() { | ||
const rawData = await getSkolverketData(); | ||
const cleanedData = mapAndClean(rawData); | ||
const mergedSchools = mergeSchools(cleanedData); | ||
const geocodedSchools = await geocodeSchools(mergedSchools); | ||
writeOsmFiles(geocodedSchools); | ||
} | ||
main(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
const fs = require("fs"); | ||
const path = require("path"); | ||
|
||
module.exports = class Cache { | ||
cacheFilePath; | ||
map; | ||
|
||
constructor(id) { | ||
this.cacheFilePath = path.resolve(__dirname, id + ".cache.json"); | ||
this.map = new Map(); | ||
|
||
if (fs.existsSync(this.cacheFilePath)) { | ||
this.map = new Map( | ||
JSON.parse(fs.readFileSync(this.cacheFilePath, { encoding: "utf-8" })) | ||
); | ||
} | ||
} | ||
|
||
save() { | ||
fs.writeFileSync( | ||
this.cacheFilePath, | ||
JSON.stringify(Array.from(this.map), null, 2) | ||
); | ||
} | ||
|
||
length() { | ||
return Array.from(this.map.keys()).length; | ||
} | ||
|
||
set(key, data) { | ||
this.map.set(key, data); | ||
} | ||
|
||
has(key) { | ||
return this.map.has(key); | ||
} | ||
|
||
get(key) { | ||
return this.map.get(key); | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
const axios = require("axios"); | ||
const cliProgress = require("cli-progress"); | ||
const Cache = require("./cache"); | ||
|
||
module.exports = async function (schoolsInput) { | ||
if (!process.env.HERE_API_KEY) { | ||
console.log("❌ HERE Api key not found. Please check the .env file!"); | ||
process.exit(1); | ||
} | ||
|
||
const cache = new Cache("geocode-here"); | ||
const progressBar = new cliProgress.SingleBar( | ||
{ etaBuffer: 300, clearOnComplete: true }, | ||
cliProgress.Presets.shades_classic | ||
); | ||
|
||
console.log( | ||
"Geocoding school adresses (because Skolverket's geo coordinates can't be trusted." | ||
); | ||
if (cache.length() > 0) { | ||
console.log( | ||
"💡 Cached data is avaiable from a previous run. Only geocoding new addresses!" | ||
); | ||
} | ||
progressBar.start(schoolsInput.length, 0); | ||
for (let i = 0; i < schoolsInput.length; i++) { | ||
progressBar.update(i + 1); | ||
|
||
const school = schoolsInput[i]; | ||
const query = `${school.street} ${school.housenr}, ${school.postcode} ${school.city}`; | ||
|
||
if (!cache.has(query)) { | ||
const hereResponse = await axios.get( | ||
"https://geocode.search.hereapi.com/v1/geocode", | ||
{ params: { q: query, apiKey: process.env.HERE_API_KEY, timeout: 10 } } | ||
); | ||
cache.set(query, hereResponse.data); | ||
|
||
// save the cache every so often to not start from the very beginning | ||
// if something breaks or the script get interrupted by the user. | ||
if (i % 100 === 0) cache.save(); | ||
} | ||
|
||
const geolocationReponse = cache.get(query); | ||
if (geolocationReponse.items && geolocationReponse.items.length > 0) { | ||
school.lat = geolocationReponse.items[0].position.lat; | ||
school.lng = geolocationReponse.items[0].position.lng; | ||
} | ||
} | ||
|
||
progressBar.stop(); | ||
cache.save(); | ||
return schoolsInput; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
const axios = require("axios"); | ||
const cliProgress = require("cli-progress"); | ||
const Cache = require("./cache"); | ||
|
||
module.exports = async function () { | ||
const cache = new Cache("skolverket"); | ||
const progressBar = new cliProgress.SingleBar( | ||
{ etaBuffer: 300, clearOnComplete: true }, | ||
cliProgress.Presets.shades_classic | ||
); | ||
|
||
console.log("Downloading data index from Skolverket"); | ||
const listResponse = await axios.get( | ||
"https://api.skolverket.se/skolenhetsregistret/v1/skolenhet" | ||
); | ||
|
||
console.log("Downloading data for the individual schools from Skolverket..."); | ||
if (cache.length() > 0) { | ||
console.log( | ||
"💡 Cached data is avaiable from a previous run. Downloading only new schools!" | ||
); | ||
} | ||
progressBar.start(listResponse.data.Skolenheter.length, 0); | ||
|
||
const result = []; | ||
for (let i = 0; i < listResponse.data.Skolenheter.length; i++) { | ||
progressBar.update(i + 1); | ||
const school = listResponse.data.Skolenheter[i]; | ||
const enhetskod = school.Skolenhetskod; | ||
|
||
const cachedResult = cache.get(enhetskod); | ||
if (cachedResult) { | ||
result.push(cachedResult); | ||
} else { | ||
const schoolResponse = await axios.get( | ||
`https://api.skolverket.se/skolenhetsregistret/v1/skolenhet/${enhetskod}`, | ||
{ timeout: 30 } | ||
); | ||
result.push(schoolResponse.data); | ||
cache.set(enhetskod, schoolResponse.data); | ||
|
||
// save the cache every so often to not start from the very beginning | ||
// if something breaks or the script get interrupted by the user. | ||
if (i % 200 === 0) cache.save(); | ||
} | ||
} | ||
|
||
progressBar.stop(); | ||
cache.save(); | ||
return result; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
function capitalizeWords(str) { | ||
return str | ||
.toLowerCase() | ||
.split(" ") | ||
.map((word) => { | ||
switch (word) { | ||
case "ab": | ||
return "AB"; | ||
case "kommun": | ||
return "kommun"; | ||
case "skola": | ||
return "skola"; | ||
case "gymnasium": | ||
"gymnasium"; | ||
} | ||
return word.charAt(0).toUpperCase() + word.slice(1); | ||
}) | ||
.join(" "); | ||
} | ||
|
||
function splitAddress(addr) { | ||
const regex = /(\d*\D+[^A-Z]) ([^a-z]?\D*\d+.*)/; | ||
const result = addr.match(regex); | ||
if (result && result.length === 3) { | ||
return [result[1].trim(), result[2].trim()]; | ||
} | ||
return [addr, ""]; | ||
} | ||
|
||
function cleanSchoolName(name) { | ||
// to find all the F-3 or 1-9 and remove them | ||
const regex = /[F\d]\s?-\s?[\d]/gm; | ||
return name.replace(regex, "").trim(); | ||
} | ||
|
||
module.exports = function (schoolsInput) { | ||
console.log("Filtering and reformatting Skolverket data."); | ||
console.log(" > Skolenheter in input: " + schoolsInput.length); | ||
|
||
const result = schoolsInput | ||
// first filter out all "skolenheter" we're not interested in | ||
.filter((element) => { | ||
const school = element.SkolenhetInfo; | ||
if (!school.Besoksadress.GeoData.Koordinat_WGS84_Lat) return false; | ||
if (!school.Besoksadress.GeoData.Koordinat_WGS84_Lng) return false; | ||
if ( | ||
splitAddress(school.Besoksadress.Adress.trim().toLowerCase())[0] === | ||
"box" | ||
) | ||
return false; | ||
if (school.Status !== "Aktiv") return false; | ||
if (school.Skolenhetstyp !== "Skolenhet") return false; | ||
|
||
// filter all school types that we're not interested in | ||
if ( | ||
school.Skolformer.filter((skolform) => | ||
[ | ||
"Grundskola", | ||
"Gymnasieskola", | ||
"Forskoleklass", | ||
"Grundsameskola", | ||
].includes(skolform.type) | ||
).length === 0 | ||
) { | ||
return false; | ||
} | ||
return true; | ||
}) | ||
|
||
// now we unify the data structure | ||
.map((element) => { | ||
const school = element.SkolenhetInfo; | ||
const name = capitalizeWords( | ||
cleanSchoolName(school.SkolaNamn ? school.SkolaNamn : school.Namn) | ||
); | ||
const lat = school.Besoksadress.GeoData.Koordinat_WGS84_Lat; | ||
const lng = school.Besoksadress.GeoData.Koordinat_WGS84_Lng; | ||
const addressSplit = splitAddress(school.Besoksadress.Adress.trim()); | ||
let street = addressSplit[0]; | ||
let housenr = addressSplit[1]; | ||
const city = capitalizeWords(school.Besoksadress.Ort.trim()); | ||
|
||
// parse out the grades and the level | ||
const grades = new Set(); | ||
school.Skolformer.forEach((schooltype) => { | ||
if (schooltype.type === "Forskoleklass") grades.add(0); | ||
if ( | ||
schooltype.type === "Grundskola" || | ||
schooltype.type === "Grundsameskola" | ||
) { | ||
if (schooltype.Ak1) grades.add(1); | ||
if (schooltype.Ak2) grades.add(2); | ||
if (schooltype.Ak3) grades.add(3); | ||
if (schooltype.Ak4) grades.add(4); | ||
if (schooltype.Ak5) grades.add(5); | ||
if (schooltype.Ak6) grades.add(6); | ||
if (schooltype.Ak7) grades.add(7); | ||
if (schooltype.Ak8) grades.add(8); | ||
if (schooltype.Ak9) grades.add(9); | ||
} | ||
}); | ||
const iscedLevel = new Set(); | ||
school.Skolformer.forEach((schooltype) => { | ||
if (schooltype.type === "Forskoleklass") iscedLevel.add(0); | ||
if ( | ||
(schooltype.type === "Grundskola" || | ||
schooltype.type === "Grundsameskola") && | ||
(grades.has(1) || | ||
grades.has(3) || | ||
grades.has(4) || | ||
grades.has(5) || | ||
grades.has(6)) | ||
) { | ||
iscedLevel.add(1); | ||
} | ||
if ( | ||
schooltype.type === "Grundskola" && | ||
(grades.has(7) || grades.has(8) || grades.has(9)) | ||
) { | ||
iscedLevel.add(2); | ||
} | ||
if (schooltype.type === "Gymnasieskola") iscedLevel.add(3); | ||
}); | ||
|
||
return { | ||
name, | ||
lat, | ||
lng, | ||
street, | ||
housenr, | ||
city, | ||
postcode: school.Besoksadress.Postnr.trim(), | ||
operator: capitalizeWords(school.Huvudman.Namn), | ||
ref: [parseInt(school.Skolenhetskod)], | ||
grades: Array.from(grades), | ||
iscedLevel: Array.from(iscedLevel), | ||
kommun: school.Kommun.Namn, | ||
website: school.Webbadress, | ||
}; | ||
}); | ||
|
||
console.log(" > Skolenheter after filter and cleanup: " + result.length); | ||
return result; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
module.exports = function (schoolsInput) { | ||
console.log("Merging school units into single schools."); | ||
console.log(" > Skolenheter in input: " + schoolsInput.length); | ||
|
||
const result = new Map(); | ||
for (let i = 0; i < schoolsInput.length; i++) { | ||
const schoolElement = schoolsInput[i]; | ||
const mapKey = schoolElement.name + "/" + schoolElement.city; | ||
|
||
if (result.has(mapKey)) { | ||
const existingSchool = result.get(mapKey); | ||
existingSchool.ref.push(...schoolElement.ref); | ||
existingSchool.grades.push(...schoolElement.grades); | ||
existingSchool.iscedLevel.push(...schoolElement.iscedLevel); | ||
} else { | ||
result.set(mapKey, schoolElement); | ||
} | ||
} | ||
|
||
const resultArr = Array.from(result.values()); | ||
console.log(" > Schools after merging: " + resultArr.length); | ||
return resultArr; | ||
}; |
Oops, something went wrong.