In [1]:
import numpy as np
import pandas as pd
import os
import json

In [2]:
# TODO: get dataset with flaeche and dichte for all BZ and BZR

In [3]:
# get data from kernindikatoren bz
df_ki_bz = pd.read_csv("raw_bz_ki.csv", dtype={'RAUMID6':str, 'BEZ':str, 'PGR':str, 'BZR': str})
df_ki_bz.set_index('BEZ', inplace=True)
df_ki_bz = df_ki_bz.round(1)

# get data from kernindikatoren bzr
df_ki = pd.read_excel("raw_bzr_ki.xlsx", sheet_name='2017_BZR', dtype={'RAUMID6':str, 'BEZ':str, 'PRG':str, 'BZR': str})
df_ki.set_index('RAUMID6', inplace=True)
df_ki = df_ki.round(1)

# get ids for lors to reference kernindikatoren
df_lor = pd.read_csv("preprocessed_lor.csv", dtype={'bezirk_id':str, 'prg_id':str, 'bzr_id':str, 'pnr_id': str, 'full_id_bzr': str, 'full_id_pnr': str})

# only use bezirksregionen from lors
df_bzr = df_lor[['full_id_bzr', 'bzr_name', 'bezirk_id', 'bezirk_name']].drop_duplicates()

In [4]:
df_bzr.head()

Unnamed: 0,full_id_bzr,bzr_name,bezirk_id,bezirk_name
0,10111,Tiergarten Süd,1,Mitte
5,10112,Regierungsviertel,1,Mitte
9,10113,Alexanderplatz,1,Mitte
15,10114,Brunnenstr. Süd,1,Mitte
17,10221,Moabit West,1,Mitte


In [5]:
# format brz url
df_bzr['bzr_url'] = df_bzr.bzr_name.str.lower()
df_bzr['bzr_url'] = df_bzr.bzr_url.str.replace('str.', 'strasse', regex=False)
# remove everything that is not a letter
df_bzr['bzr_url'] = df_bzr.bzr_url.apply(lambda x:"".join(c for c in x if c.isalnum()))
df_bzr['bzr_url'] = df_bzr.bzr_url.str.replace('ü', 'ue')
df_bzr['bzr_url'] = df_bzr.bzr_url.str.replace('ö', 'oe')
df_bzr['bzr_url'] = df_bzr.bzr_url.str.replace('ä', 'ae')
df_bzr['bzr_url'] = df_bzr.bzr_url.str.replace('ß', 'ss')

# format bz url
df_bzr['bz_url'] = df_bzr.bezirk_name.str.lower()
df_bzr['bz_url'] = df_bzr.bz_url.str.replace('charlottenburg-wilm.', 'charlottenburg-wilmersdorf', regex=False)
# remove everything that is not a letter
df_bzr['bz_url'] = df_bzr.bz_url.apply(lambda x:"".join(c for c in x if c.isalnum()))
df_bzr['bz_url'] = df_bzr.bz_url.str.replace('ü', 'ue')
df_bzr['bz_url'] = df_bzr.bz_url.str.replace('ö', 'oe')
df_bzr['bz_url'] = df_bzr.bz_url.str.replace('ä', 'ae')
df_bzr['bz_url'] = df_bzr.bz_url.str.replace('ß', 'ss')
df_bzr['bz_url'] = df_bzr.bz_url.str.replace(' - ', '-', regex=False)

In [6]:
# add inhabitants count
# df_bzr = df_bzr.join(df_ew_bzr)

In [7]:
# df for bezirke
df_bz = df_bzr[['bezirk_id', 'bezirk_name', 'bz_url']].drop_duplicates()
df_bz['bezirk_name'] = df_bz.bezirk_name.str.replace('Charlottenburg-Wilm.', 'Charlottenburg-Wilmersdorf', regex=False)

df_bzr.set_index('full_id_bzr', inplace=True)

df = df_ki.join(df_bzr)

df_bz.set_index('bezirk_id', inplace=True)
df_bezirke = df_ki_bz.join(df_bz)

In [8]:
def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print ('Error: Creating directory. ' +  directory)

In [9]:
# create folders and data for bezirk

for j in range(df_bezirke.shape[0]):

    # create folder for bezirk    
    bezirk = df_bezirke.iloc[j]
    createFolder(bezirk.bz_url)
    
    # create csv file template
    indikatoren_csv = pd.DataFrame(index={'A4', 'A5', 'A6', 'A7', 'A8', 
                                      'B1','B2', 
                                      'C1', 'C2', 'C3', 
                                      'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 
                                      'E1', 'E2', 'E3', 'E5', 'E6', 'E8', 'DA1'}).sort_index()
    
    # create csv for indikatoren
    indikatoren_csv = indikatoren_csv.join(df_bezirke.iloc[j].T).reset_index()
    indikatoren_csv.columns = ['name', 'wert']
    indikatoren_csv['phase_b'] = ''
    indikatoren_csv['phase_bz'] = ''
    indikatoren_csv.to_csv(bezirk.bz_url + '/' + 'indikatoren.csv', index=False)
    
    overview_json = {  
        'name': bezirk.bezirk_name,
        'url': bezirk.bz_url,
        'introText':'Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua.',
        'facts': {'flaeche': 1, 'einw': str(bezirk.DA1), 'dichte':1}
    }
    with open(bezirk.bz_url + '/bz-overview.json', 'w', encoding='utf8') as outfile:  
        json.dump(overview_json, outfile, ensure_ascii=False)

In [10]:
# create folders and data for bezirksregionen

for i in range(df.shape[0]):
    
    # create csv file template
    indikatoren_csv = pd.DataFrame(index={'A4', 'A5', 'A6', 'A7', 'A8', 
                                      'B1','B2', 
                                      'C1', 'C2', 'C3', 
                                      'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 
                                      'E1', 'E2', 'E3', 'E5', 'E6', 'E8', 'DA1'}).sort_index()

    # create folder for bezirksregion
    folder_name = df.iloc[i].bz_url+ "/bzr-data/" + df.iloc[i].bzr_url
    createFolder(folder_name)
    
    # create csv for indikatoren
    indikatoren_csv = indikatoren_csv.join(df.iloc[i].T).reset_index()
    indikatoren_csv.columns = ['name', 'wert']
    indikatoren_csv['phase_b'] = ''
    indikatoren_csv['phase_bz'] = ''
    indikatoren_csv.to_csv(folder_name + '/' + 'indikatoren.csv', index=False)
    
    # create data folder
    createFolder(folder_name + '/data')
    
    # create bzr-overview json
    overview_json = {  
        'name': df.iloc[i].bzr_name,
        'url': df.iloc[i].bzr_url,
        'introText':'Für diese Bezirksregion liegen leider noch keine Daten vor.',
        'facts': {'flaeche': 1, 'einw': str(df.iloc[i].DA1), 'dichte':1}
    }
    with open(folder_name + '/bzr-overview.json', 'w', encoding='utf8') as outfile:  
        json.dump(overview_json, outfile, ensure_ascii=False)
        
    # create datenblatt.csv
    datenblatt_csv = pd.DataFrame()
    columnsTitles = ['ref','type','datasource','content']
    datenblatt_csv = datenblatt_csv.reindex(columns=columnsTitles)
    datenblatt_csv.to_csv(folder_name + '/' + 'datenblatt.csv', index=False)