## Finlex API

In [40]:
# https://data.finlex.fi/fi/rest-api

# Legislation
# https://data.finlex.fi/eli/sd/2004/1.html
# https://data.finlex.fi/eli/sd/2004/1.jsonld

# Case law
# https://data.finlex.fi/ecli/kho/2011/1.html
# https://data.finlex.fi/ecli/kho/2011/1.jsonld

In [37]:
import os
import requests
import json
import re
import string
import pandas as pd

In [38]:
def clean(s):
    s = re.sub("\n", "", s)
    s = s.translate(str.maketrans('', '', string.punctuation + string.digits))
    s = re.sub("§(\S+)?", "", s)
    s = re.sub(" +", " ", s)
    s = re.sub("(^| ).( |$)", " ", s)
    s = re.sub("–", "", s)
    s = s.strip()
    s = s.lower()
    return(s)

In [None]:
data_type = "ecli/kko"

for year in range(1700, 2020):
    
    data = []
    
    url = "https://data.finlex.fi/" + data_type + "/" + str(year) + ".jsonld"
    r = requests.get(url)
    
    d_json = json.loads(r.text)
    
    if "error" in d_json.keys():
        continue
        
    for i in range(0, len(d_json['@graph'])):
        
        if data_type == "eli/sd":
            d = d_json['@graph'][i]['temporalVersions'][0]['languageVersion'][0]
            text = clean(d['title_fi'][0] + " " + d['content'])
        elif data_type == "ecli/kho":
            d = d_json['@graph'][i]['languageVersion'][0]
            text = clean(d['abstract_fi'][0])
        else:
            d = d_json['@graph'][i]['languageVersion'][0]
            
            if 'hasFormat' in d.keys():
                text = clean(d['abstract_fi'][0] + " " + d['hasFormat'][0]['content_fi'])
            elif 'content_fi' in d.keys():
                text = clean(d['abstract_fi'][0] + " " + d['content_fi'])
            else:
                text = clean(d['abstract_fi'][0])

        data.append(
            {
                "id" : d['@id'],
                "type" : d['@type'],
                "year" : year,
                "text" : text,
                "url" : url
            }
        )
    
    if data:
        print(str(year) + ": " + str(len(data)))
        with open('data/' + data_type.split("/")[1] + '/' + str(year) + '.json', 'w') as f:
            json.dump(data, f)

In [11]:
with open("data/sd/" + os.listdir("data/sd")[100], 'r') as f:
    d = json.load(f)