In [1]:
import pandas as pd
import psycopg2 as pg
import pandas.io.sql as psql
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
db_conn = pg.connect(
    dbname="wheretolive",
    user="wheretolive",
    password="wheretolive",
    host="127.0.0.1")

In [3]:
sql_query = """
select s.bfs_nr, s.min_income, s.rate as s_rate, m0c.rate as m0c_rate, m2c.rate as m2c_rate, m2c2s.rate as m2c2s_rate from
(select * from tax_rate
where profile = 'single') as s
join
(select * from tax_rate
where profile = 'married_no_children') as m0c
on m0c.bfs_nr = s.bfs_nr and m0c.min_income = s.min_income and m0c.max_income = s.max_income
join
(select * from tax_rate
where profile = 'married_2_children') as m2c
on m2c.bfs_nr = s.bfs_nr and m2c.min_income = s.min_income and m2c.max_income = s.max_income
join
(select * from tax_rate
where profile = 'married_2_children_2_salaries') as m2c2s
on m2c2s.bfs_nr = s.bfs_nr and m2c2s.min_income = s.min_income and m2c2s.max_income = s.max_income
order by s.bfs_nr, s.min_income
"""

In [5]:
df = pd.read_sql(sql_query, db_conn)
df.describe()

Unnamed: 0,bfs_nr,min_income,s_rate,m0c_rate,m2c_rate,m2c2s_rate
count,53328.0,53328.0,53328.0,53328.0,53328.0,53328.0
mean,3301.048155,157916.666667,11.679109,8.609754,6.59793,6.230546
std,2130.290483,215612.427687,7.73139,7.421921,7.317855,7.190539
min,1.0,12500.0,0.0,0.0,0.0,0.0
25%,1083.0,33750.0,4.919688,1.13632,0.1,0.068
50%,3338.5,75000.0,12.10333,7.556205,3.552211,2.837271
75%,5411.0,181250.0,17.990356,14.456284,12.196979,11.721828
max,6810.0,1000000.0,29.751435,28.795846,28.174927,27.6682


In [12]:
df['2c_effect'] = df['m2c_rate'] - df['m0c_rate']
df['m_effect'] = df['m0c_rate'] - df['s_rate']
df['2s_effect'] = df['m2c2s_rate'] - df['m2c_rate']
df['2c_effect_by_income'] = df['2c_effect'] / df['min_income']
df.head(n=48)

Unnamed: 0,bfs_nr,min_income,s_rate,m0c_rate,m2c_rate,m2c2s_rate,2c_effect,m_effect,2s_effect,2c_effect_by_income
0,1,12500,0.59328,0.384,0.384,0.384,0.0,-0.20928,0.0,0.0
1,1,15000,1.107467,0.32,0.32,0.32,0.0,-0.787467,0.0,0.0
2,1,17500,1.618057,0.274286,0.274286,0.274286,0.0,-1.343771,0.0,0.0
3,1,20000,2.1055,0.4072,0.24,0.24,-0.1672,-1.6983,0.0,-8.36e-06
4,1,25000,2.82972,1.06144,0.192,0.192,-0.86944,-1.76828,0.0,-3.47776e-05
5,1,30000,3.6121,1.741433,0.16,0.16,-1.581433,-1.870667,0.0,-5.271444e-05
6,1,35000,4.302314,2.083829,0.137143,0.137143,-1.946686,-2.218486,0.0,-5.561959e-05
7,1,40000,4.757275,2.695925,0.13045,0.12,-2.565475,-2.06135,-0.01045,-6.413687e-05
8,1,45000,5.357289,3.083756,0.533956,0.106667,-2.5498,-2.273533,-0.427289,-5.666222e-05
9,1,50000,5.92508,3.49852,0.96962,0.18796,-2.5289,-2.42656,-0.78166,-5.0578e-05
