In [65]:
import urllib.request, json 
import pandas as pd
from pandas import json_normalize
import boto3
import os
from dotenv import load_dotenv

load_dotenv()

ACCESS_ID = os.getenv('ACCESS_ID')
SECRET_KEY = os.getenv('SECRET_KEY')



#############################
#load original data set
#############################
with urllib.request.urlopen("https://severin.fra1.digitaloceanspaces.com/hslu/RoadTrafficAccidentLocations.json") as url:
    data = json.load(url)


#############################
#prepocessing json file with pandas
#############################
df = json_normalize(data['features']) 
columns = ['geometry.coordinates',
           'properties.AccidentType_de',
           'properties.AccidentSeverityCategory_de',
           'properties.AccidentInvolvingPedestrian',
           'properties.AccidentInvolvingBicycle',
           'properties.AccidentInvolvingMotorcycle',
           'properties.RoadType_de',
           'properties.CantonCode',
           'properties.MunicipalityCode',
           'properties.AccidentYear',
           'properties.AccidentMonth',
           'properties.AccidentHour',
           'properties.AccidentWeekDay_en'
                 ]

df = df[columns]

df.columns = df.columns.where(~df.columns.str.contains('\.'), 
                               df.columns.str.split('.', n=1).str[1])

df[["x_coordinates","y_coordinates"]] = df.coordinates.apply(lambda c: pd.Series([c[0], c[1]]))
df = df.drop(columns=['coordinates'])
df.to_json('RoadTrafficPreProcessed.json', orient='records')


#############################
#upload processed file to S3 bucket for direct public access
#############################
session = boto3.session.Session()
client = session.client('s3',
                        region_name='fra1',
                        endpoint_url='https://fra1.digitaloceanspaces.com',
                        aws_access_key_id=ACCESS_ID,
                        aws_secret_access_key=SECRET_KEY)


client.upload_file('RoadTrafficPreProcessed.json', 'severin', 'hslu/RoadTrafficPreProcessed.json', {"ACL": "public-read", "ContentType": "application/json"})

In [None]:
#############################
#test file access
#############################
with urllib.request.urlopen("https://severin.fra1.digitaloceanspaces.com/hslu/RoadTrafficPreProcessed.json") as url:
    data = json.load(url)
df = json_normalize(data) 
df