In [1]:
import pandas as pd
from data import load_debt_data, total_annual_debt, total_annual_unemployment, filter_by_year, filter_by_years, filter_by_states

In [2]:
df = pd.read_csv('data/debt_92-05.csv', sep=';')
df['value'] = pd.to_numeric(df['value'], errors='coerce')
df['time'] = pd.to_datetime(df['time']).dt.year

In [3]:
# get total annual debt by year for Berlin on a state level
filtered_df = df[df['1_variable_attribute_label'] == "Berlin"]
filtered_df = filtered_df[filtered_df['2_variable_attribute_label'] == "Länder"]
filtered_df['total_annual_debt'] = filtered_df.groupby('time')['value'].transform('sum')
filtered_df = filtered_df.groupby('time')['total_annual_debt'].agg(lambda x: list(x)[0]).reset_index()
filtered_df

Unnamed: 0,time,total_annual_debt
0,1992,13069.0
1,1993,16053.0
2,1994,18454.0
3,1995,23700.0
4,1996,26911.0
5,1997,29000.0
6,1998,31211.0
7,1999,33231.0
8,2000,34936.0
9,2001,39778.0


In [4]:
debt = load_debt_data()[['state', 'year', 'value']]
unemployment = total_annual_unemployment()

debt_grouped = debt.groupby(['state','year'], as_index=False).agg({'value': 'sum'})

# Debt data covers full span so only need to search other features, extend to more features
min_year = min(debt_grouped['year'])
max_year = max(debt_grouped['year'])

unemployment = filter_by_years(unemployment, min_year, max_year)

combined = debt_grouped.sort_values('state')
combined['unemployment'] = unemployment.sort_values('state')['value'].values
combined

Unnamed: 0,state,year,value,unemployment
0,Baden-Württemberg,1992,32024.0,191970
13,Baden-Württemberg,2005,48916.0,385267
12,Baden-Württemberg,2004,46961.0,340943
11,Baden-Württemberg,2003,45186.0,336881
10,Baden-Württemberg,2002,42737.0,295005
...,...,...,...,...
211,Thüringen,1993,5793.0,192939
210,Thüringen,1992,3070.0,192748
222,Thüringen,2004,17021.0,207430
215,Thüringen,1997,11358.0,217675


In [5]:
from radviz_plotly import RadViz2D

y = combined['state']
x = combined.drop(['state'], axis=1) 

BPs = 10000
RadViz2D(y, x, BPs)