In [2]:
import pandas as pd
from data import load_debt_data, total_annual_debt, total_annual_unemployment, filter_by_year, filter_by_years, filter_by_states

In [3]:
df = pd.read_csv('data/debt_92-05.csv', sep=';')
df['value'] = pd.to_numeric(df['value'], errors='coerce')
df['time'] = pd.to_datetime(df['time']).dt.year

In [4]:
# get total annual debt by year for Berlin on a state level
filtered_df = df[df['1_variable_attribute_label'] == "Berlin"]
filtered_df = filtered_df[filtered_df['2_variable_attribute_label'] == "Länder"]
filtered_df['total_annual_debt'] = filtered_df.groupby('time')['value'].transform('sum')
filtered_df = filtered_df.groupby('time')['total_annual_debt'].agg(lambda x: list(x)[0]).reset_index()
filtered_df

Unnamed: 0,time,total_annual_debt
0,1992,13069.0
1,1993,16053.0
2,1994,18454.0
3,1995,23700.0
4,1996,26911.0
5,1997,29000.0
6,1998,31211.0
7,1999,33231.0
8,2000,34936.0
9,2001,39778.0


In [1]:
from data import load_population_density, population_from_density

df= load_population_density()

pop= population_from_density()

print(pop[pop['year']=='2015-12-31'])

          year  Baden-Württemberg    Bayern   Berlin  Brandenburg  Bremen  \
20  2015-12-31           10868608  12839918  3521616      2476068  645996   

    Hamburg   Hessen  Mecklenburg-Vorpommern  Niedersachsen  \
20  1786330  6186695                 1598937        7904588   

    Nordrhein-Westfalen  Rheinland-Pfalz  Saarland  Sachsen  Sachsen-Anhalt  \
20             17859492          4048788    996772  4069273         2248950   

    Schleswig-Holstein  Thüringen  
20             2853103    2167048  


In [1]:
from data import normalized_debt_per_capita, normalized_unemployment_per_capita


debt_norm = normalized_debt_per_capita()
unemp_norm = normalized_unemployment_per_capita()

print(debt_norm.head())
print(unemp_norm.head())


               state  year  debt_per_person_eur
0  Baden-Württemberg  1995              2481.92
1  Baden-Württemberg  1996              2632.99
2  Baden-Württemberg  1997              2727.84
3  Baden-Württemberg  1998              2831.82
4  Baden-Württemberg  1999              2893.37
               state  year  unemployment_rate_percent
0  Baden-Württemberg  1995                       3.18
1  Baden-Württemberg  1996                       3.41
2  Baden-Württemberg  1997                       3.67
3  Baden-Württemberg  1998                       3.37
4  Baden-Württemberg  1999                        3.1


In [11]:
#### Example of merging two variables

debt = load_debt_data()[['state', 'year', 'value']]
unemployment = total_annual_unemployment()

debt_grouped = debt.groupby(['state','year'], as_index=False).agg({'value': 'sum'})

# Debt data covers smaller span so using that now, extend to more features
min_year = min(debt['year'])
max_year = max(debt['year'])

unemployment = filter_by_years(unemployment, min_year, max_year)

combined = debt_grouped.sort_values('state')
combined['unemployment'] = unemployment.sort_values('state')['value'].values
combined = combined.rename(columns={'value': 'debt'})

combined


Unnamed: 0,state,year,debt,unemployment
0,Baden-Württemberg,1992,32024.0,191970
13,Baden-Württemberg,2005,48916.0,385267
12,Baden-Württemberg,2004,46961.0,340943
11,Baden-Württemberg,2003,45186.0,336881
10,Baden-Württemberg,2002,42737.0,295005
...,...,...,...,...
211,Thüringen,1993,5793.0,192939
210,Thüringen,1992,3070.0,192748
222,Thüringen,2004,17021.0,207430
215,Thüringen,1997,11358.0,217675


In [12]:
#### Testing encoding states as numbers

combined['state_enc']=combined['state'].astype('category').cat.codes
combined

Unnamed: 0,state,year,debt,unemployment,state_enc
0,Baden-Württemberg,1992,32024.0,191970,0
13,Baden-Württemberg,2005,48916.0,385267,0
12,Baden-Württemberg,2004,46961.0,340943,0
11,Baden-Württemberg,2003,45186.0,336881,0
10,Baden-Württemberg,2002,42737.0,295005,0
...,...,...,...,...,...
211,Thüringen,1993,5793.0,192939,15
210,Thüringen,1992,3070.0,192748,15
222,Thüringen,2004,17021.0,207430,15
215,Thüringen,1997,11358.0,217675,15
