In [16]:
from __future__ import absolute_import, division, print_function

import logging
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import mean_squared_error, accuracy_score

import torch
# from torch.distributions import constraints

import pyro
import pyro.distributions as dist
from pyro.distributions import Normal, Uniform, Delta
from pyro.infer import EmpiricalMarginal, SVI, Trace_ELBO, JitTrace_ELBO, TracePredictive
from pyro.contrib.autoguide import AutoMultivariateNormal
from pyro.infer.mcmc.api import MCMC
from pyro.infer.mcmc import NUTS
from pyro.infer.mcmc.util import diagnostics
import pyro.optim as optim

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import normalize
from pyro.ops.stats import waic
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt

from copy import deepcopy


pyro.set_rng_seed(1)
assert pyro.__version__.startswith('0.4.1')

In [6]:
%matplotlib inline
logging.basicConfig(format='%(message)s', level=logging.INFO)
# Enable validation checks
pyro.enable_validation(True)
smoke_test = ('CI' in os.environ)
pyro.set_rng_seed(1)

In [43]:
data = pd.read_csv('data/crap.csv')

In [44]:
data['target'] = data.apply(lambda row : row['FELONY'] + row['MISDEMEANOR'] + row['VIOLATION'], axis=1)

In [45]:
data['target'] = data['target'].astype(int)

In [46]:
def get_data(data, features, target):
    df_X = data[features]
    df_y = data[target]
    X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.2)
    # Train Data
    X_np_train = normalize(np.array(X_train))
    y_np_train = np.array(y_train)

    X_nuts_train = torch.from_numpy(X_np_train).type(torch.float32)
    y_nuts_train = torch.from_numpy(y_np_train).type(torch.float32)

    population =  torch.from_numpy(np.array(X_train['TotalPop'])).type(torch.float32)

    #Test Data
    X_np_test = normalize(np.array(X_test))
    y_np_test = np.array(y_test)

    X_nuts_test = torch.from_numpy(X_np_test).type(torch.float32)
    y_nuts_test = torch.from_numpy(y_np_test).type(torch.float32)

    test_population =  torch.from_numpy(np.array(X_test['TotalPop'])).type(torch.float32)
    return X_nuts_train, y_nuts_train, population, X_nuts_test, y_nuts_test, test_population


In [47]:
bool_split = 3
offset = 3

In [48]:
def sep_data(row):
    if row['target'] >= bool_split + offset :
        return 1
    elif row['target'] <= bool_split - offset :
        return 0
    else:
        return float('nan')

In [49]:
data.columns

Index(['Unnamed: 0', 'month', 'CensusTract', 'TotalPop', 'Income', 'IncomeErr',
       'IncomePerCap', 'IncomePerCapErr', 'Poverty', 'ChildPoverty',
       'Professional', 'Service', 'Office', 'Construction', 'Production',
       'WorkAtHome', 'MeanCommute', 'Employed', 'PrivateWork', 'PublicWork',
       'SelfEmployed', 'FamilyWork', 'Unemployment', 'FELONY', 'MISDEMEANOR',
       'VIOLATION', 'restaurants', 'bar', 'park', 'subway_station', 'target'],
      dtype='object')

In [50]:
features = ['TotalPop', 'Income', 'IncomeErr',
       'IncomePerCap', 'IncomePerCapErr', 'Poverty', 'ChildPoverty',
       'Professional', 'Service', 'Office', 'Construction', 'Production',
       'WorkAtHome', 'MeanCommute', 'Employed', 'PrivateWork', 'PublicWork',
       'SelfEmployed', 'FamilyWork', 'Unemployment','restaurants', 'bar', 'park', 'subway_station']
target = 'binary_target'            

In [52]:
# for bool_split in range(2,10):
#     for offset in range(1,5):

bool_split = 5
offset = 2

for bool_split in range(2,20):
    for offset in range(1,10):
        if bool_split - offset <= 0:
            continue
        print(bool_split, offset)
        data_new = deepcopy(data)
        data_new['binary_target'] = data_new.apply(sep_data, axis=1)
        data_new = data_new.dropna()
        print(len(data_new[data_new['binary_target']==0]), len(data_new[data_new['binary_target']==1]))
        if (len(data_new[data_new['binary_target']==0]) == 0) or (len(data_new[data_new['binary_target']==1]) == 0):
            print('-'*30)
            continue
        X_nuts_train, y_nuts_train, population, X_nuts_test, y_nuts_test, test_population = get_data(data_new, features, target)
        reg = LogisticRegression(C=0.8).fit(X_nuts_train, y_nuts_train)
        print('Train Score : ',reg.score(X_nuts_train, y_nuts_train))
        print('Test Score :', reg.score(X_nuts_test, y_nuts_test))
        print('-'*30)

        

2 1
4350 13227
Train Score :  0.7589787355095655
Test Score : 0.7610921501706485
------------------------------
3 1




8251 10067
Train Score :  0.6939402211000409
Test Score : 0.6943231441048034
------------------------------
3 2




4350 7710
Train Score :  0.7503109452736318
Test Score : 0.7657545605306799
------------------------------
4 1




11411 7710
Train Score :  0.716461820083682
Test Score : 0.7163398692810458
------------------------------
4 2




8251 6003
Train Score :  0.7456809611505744
Test Score : 0.7411434584356367
------------------------------
4 3




4350 4638
Train Score :  0.7726008344923505
Test Score : 0.764182424916574
------------------------------
5 1




13768 6003
Train Score :  0.7561962569549823
Test Score : 0.7436156763590392
------------------------------
5 2




11411 4638
Train Score :  0.7808240517174235
Test Score : 0.7847352024922118
------------------------------
5 3




8251 3602
Train Score :  0.8032060746677916
Test Score : 0.8030366933783214
------------------------------
5 4




4350 2769
Train Score :  0.8115891132572431
Test Score : 0.8012640449438202
------------------------------
6 1




15475 4638
Train Score :  0.7967060285891858
Test Score : 0.808351976137211
------------------------------
6 2




13768 3602
Train Score :  0.8244818652849741
Test Score : 0.8261370178468624
------------------------------
6 3




11411 2769
Train Score :  0.8474083215796897
Test Score : 0.8416784203102962
------------------------------
6 4




8251 2143
Train Score :  0.8450992182802165
Test Score : 0.8532948532948533
------------------------------
6 5




4350 1655
Train Score :  0.8334721065778518
Test Score : 0.8501248959200666
------------------------------
7 1




16840 3602
Train Score :  0.8385617317923317
Test Score : 0.8341892883345561
------------------------------
7 2




15475 2769
Train Score :  0.8608427543679342
Test Score : 0.866264730063031
------------------------------
7 3




13768 2143
Train Score :  0.8790854808296669
Test Score : 0.8768457430097393
------------------------------
7 4




11411 1655
Train Score :  0.8923650975889782
Test Score : 0.8844682478959449
------------------------------
7 5




8251 1298
Train Score :  0.8871580049744731
Test Score : 0.9015706806282723
------------------------------
7 6




4350 996
Train Score :  0.867867165575304
Test Score : 0.8766355140186916
------------------------------
8 1




17876 2769
Train Score :  0.8691571809154759
Test Score : 0.864131751029305
------------------------------
8 2




16840 2143
Train Score :  0.8908863426840511
Test Score : 0.8806952857519094
------------------------------
8 3




15475 1655
Train Score :  0.9044804436660829
Test Score : 0.9127262113251605
------------------------------
8 4




13768 1298
Train Score :  0.9191005642217059
Test Score : 0.9134041141340411
------------------------------
8 5




11411 996
Train Score :  0.9232241813602015
Test Score : 0.9335213537469782
------------------------------
8 6




8251 799
Train Score :  0.924585635359116
Test Score : 0.9027624309392265
------------------------------
8 7




4350 608
Train Score :  0.8961169944528492
Test Score : 0.8951612903225806
------------------------------
9 1




18709 2143
Train Score :  0.8959295006294586
Test Score : 0.9031407336370175
------------------------------
9 2




17876 1655
Train Score :  0.9156426011264721
Test Score : 0.9193754799078577
------------------------------
9 3




16840 1298
Train Score :  0.9286698828394211
Test Score : 0.9324696802646086
------------------------------
9 4




15475 996
Train Score :  0.9395871281117183
Test Score : 0.9423368740515933
------------------------------
9 5




13768 799
Train Score :  0.9459366686690123
Test Score : 0.9498970487302677
------------------------------
9 6




11411 608
Train Score :  0.9530941237649506
Test Score : 0.9463394342762064
------------------------------
9 7




8251 506
Train Score :  0.9434689507494647
Test Score : 0.9526255707762558
------------------------------
9 8




4350 418
Train Score :  0.9202936549554274
Test Score : 0.9171907756813418
------------------------------
10 1




19335 1655
Train Score :  0.921033825631253
Test Score : 0.918532634587899
------------------------------
10 2




18709 1298
Train Score :  0.9345204623555139
Test Score : 0.94127936031984
------------------------------
10 3




17876 996
Train Score :  0.9468768629529045
Test Score : 0.9525827814569536
------------------------------
10 4




16840 799
Train Score :  0.9547161788675501
Test Score : 0.9608843537414966
------------------------------
10 5




15475 608
Train Score :  0.9627700917145966
Test Score : 0.9661175007771216
------------------------------
10 6




13768 506
Train Score :  0.9647079429021805
Test Score : 0.9691768826619965
------------------------------
10 7




11411 418
Train Score :  0.9661840853851844
Test Score : 0.9666103127641589
------------------------------
10 8




8251 335
Train Score :  0.9649097262667443
Test Score : 0.9563445867287543
------------------------------
10 9




4350 270
Train Score :  0.9453463203463204
Test Score : 0.9448051948051948
------------------------------
11 1




19823 1298
Train Score :  0.9380326704545454
Test Score : 0.9401183431952663
------------------------------
11 2




19335 996
Train Score :  0.9513649778652238
Test Score : 0.9515613474305384
------------------------------
11 3




18709 799
Train Score :  0.9602076124567474
Test Score : 0.9582265504869297
------------------------------
11 4




17876 608
Train Score :  0.9682829512409549
Test Score : 0.9651068433865296
------------------------------
11 5




16840 506
Train Score :  0.9727587200922456
Test Score : 0.9668587896253602
------------------------------
11 6




15475 418
Train Score :  0.9738084001887682
Test Score : 0.9773513683548286
------------------------------
11 7




13768 335
Train Score :  0.97775217160078
Test Score : 0.9748316199929103
------------------------------
11 8




11411 270
Train Score :  0.9783818493150684
Test Score : 0.9764655541292255
------------------------------
11 9




8251 220
Train Score :  0.9747638724911453
Test Score : 0.9781710914454277
------------------------------
12 1




20180 996
Train Score :  0.9531286894923259
Test Score : 0.9534938621340887
------------------------------
12 2




19823 799
Train Score :  0.9623567921440261
Test Score : 0.9583030303030303
------------------------------
12 3




19335 608
Train Score :  0.9708537044001504
Test Score : 0.9671596891451492
------------------------------
12 4




18709 506
Train Score :  0.9734582357533177
Test Score : 0.9776216497527973
------------------------------
12 5




17876 418
Train Score :  0.9778612914246669
Test Score : 0.977589505329325
------------------------------
12 6




16840 335
Train Score :  0.9800582241630277
Test Score : 0.9822416302765647
------------------------------
12 7




15475 270
Train Score :  0.9828516989520483
Test Score : 0.9828516989520483
------------------------------
12 8




13768 220
Train Score :  0.9845397676496872
Test Score : 0.9832022873481058
------------------------------
12 9




11411 181
Train Score :  0.9852259247277041
Test Score : 0.9810263044415697
------------------------------
13 1




20482 799
Train Score :  0.9635808270676691
Test Score : 0.9607704956542166
------------------------------
13 2




20180 608
Train Score :  0.9705953096812988
Test Score : 0.9713804713804713
------------------------------
13 3




19823 506
Train Score :  0.9746049314394638
Test Score : 0.9771273979340875
------------------------------
13 4




19335 418
Train Score :  0.978736868750791
Test Score : 0.9792457605669451
------------------------------
13 5




18709 335
Train Score :  0.9823432884804726
Test Score : 0.9826726174849042
------------------------------
13 6




17876 270
Train Score :  0.9849131992284376
Test Score : 0.9859504132231405
------------------------------
13 7




16840 220
Train Score :  0.9873241500586166
Test Score : 0.9862250879249707
------------------------------
13 8




15475 181
Train Score :  0.9886617694027467
Test Score : 0.9875478927203065
------------------------------
13 9




13768 147
Train Score :  0.9889507725476105
Test Score : 0.9913762127200862
------------------------------
14 1




20679 608
Train Score :  0.9706970462152799
Test Score : 0.9744011272898074
------------------------------
14 2




20482 506
Train Score :  0.976235854675402
Test Score : 0.974511672224869
------------------------------
14 3




20180 418
Train Score :  0.9799126107537323
Test Score : 0.9788834951456311
------------------------------
14 4




19823 335
Train Score :  0.9833188639464219
Test Score : 0.9836309523809523
------------------------------
14 5




19335 270
Train Score :  0.9864830400408059
Test Score : 0.9852078551389951
------------------------------
14 6




18709 220
Train Score :  0.9889718021528099
Test Score : 0.9860010565240359
------------------------------
14 7




17876 181
Train Score :  0.9898926964347525
Test Score : 0.9903100775193798
------------------------------
14 8




16840 147
Train Score :  0.9913900949297225
Test Score : 0.9911712772218952
------------------------------
14 9




15475 121
Train Score :  0.9921449182430266
Test Score : 0.9926282051282052
------------------------------
15 1




20870 506
Train Score :  0.9770175438596491
Test Score : 0.9735734331150608
------------------------------
15 2




20679 418
Train Score :  0.9806837708123481
Test Score : 0.9781990521327014
------------------------------
15 3




20482 335
Train Score :  0.9838467543385576
Test Score : 0.984149855907781
------------------------------
15 4




20180 270
Train Score :  0.9864303178484107
Test Score : 0.9882640586797066
------------------------------
15 5




19823 220
Train Score :  0.9892104278408382
Test Score : 0.9882763781491644
------------------------------
15 6




19335 181
Train Score :  0.9901998462720983
Test Score : 0.992827868852459
------------------------------
15 7




18709 147
Train Score :  0.992574913815964
Test Score : 0.9907211028632026
------------------------------
15 8




17876 121
Train Score :  0.9936097798152392
Test Score : 0.9919444444444444
------------------------------
15 9




16840 102
Train Score :  0.9936545414299417
Test Score : 0.9952788433166125
------------------------------
16 1




20972 418
Train Score :  0.9812412342215989
Test Score : 0.977325853202431
------------------------------
16 2




20870 335
Train Score :  0.9843197359113417
Test Score : 0.9837302522989861
------------------------------
16 3




20679 270
Train Score :  0.9867533862402291
Test Score : 0.98854415274463
------------------------------
16 4




20482 220
Train Score :  0.9900972163516696
Test Score : 0.9864766964501328
------------------------------
16 5




20180 181
Train Score :  0.9906679764243614
Test Score : 0.9928799410753744
------------------------------
16 6




19823 147
Train Score :  0.991862794191287
Test Score : 0.9957436154231347
------------------------------
16 7




19335 121
Train Score :  0.9935749164739142
Test Score : 0.9946043165467626
------------------------------
16 8




18709 102
Train Score :  0.9946172248803827
Test Score : 0.9944193462662769
------------------------------
16 9




17876 83
Train Score :  0.9953365351151946
Test Score : 0.9955456570155902
------------------------------
17 1




21060 335
Train Score :  0.9839331619537275
Test Score : 0.9859780322505258
------------------------------
17 2




20972 270
Train Score :  0.9871123403754487
Test Score : 0.987997175806072
------------------------------
17 3




20870 220
Train Score :  0.9894499762920815
Test Score : 0.9900426742532006
------------------------------
17 4




20679 181
Train Score :  0.9913111217641419
Test Score : 0.9913710450623202
------------------------------
17 5




20482 147
Train Score :  0.9925468096709689
Test Score : 0.9941832283082889
------------------------------
17 6




20180 121
Train Score :  0.9943349753694581
Test Score : 0.9928589017483378
------------------------------
17 7




19823 102
Train Score :  0.9950439146800502
Test Score : 0.9942283563362609
------------------------------
17 8




19335 83
Train Score :  0.9957512553109309
Test Score : 0.9956230690010298
------------------------------
17 9




18709 73
Train Score :  0.9957404326123128
Test Score : 0.9976044716529145
------------------------------
18 1




21143 270
Train Score :  0.9875656742556918
Test Score : 0.986691571328508
------------------------------
18 2




21060 220
Train Score :  0.9898966165413534
Test Score : 0.9887218045112782
------------------------------
18 3




20972 181
Train Score :  0.9920222195957925
Test Score : 0.9891278657527771
------------------------------
18 4




20870 147
Train Score :  0.9929816213644204
Test Score : 0.9931018078020932
------------------------------
18 5




20679 121
Train Score :  0.9944711538461538
Test Score : 0.9930288461538461
------------------------------
18 6




20482 102
Train Score :  0.9951417987490132
Test Score : 0.9946563031333495
------------------------------
18 7




20180 83
Train Score :  0.9959901295496607
Test Score : 0.9955588452997779
------------------------------
18 8




19823 73
Train Score :  0.995790399597889
Test Score : 0.9984924623115577
------------------------------
18 9




19335 69
Train Score :  0.9961991883012304
Test Score : 0.9974233444988405
------------------------------
19 1




21208 220
Train Score :  0.989732819974332
Test Score : 0.9897340177321512
------------------------------
19 2




21143 181
Train Score :  0.9914414678468844
Test Score : 0.9917936694021102
------------------------------
19 3




21060 147
Train Score :  0.9935750073681108
Test Score : 0.991041961338991
------------------------------
19 4




20972 121
Train Score :  0.994014460116155
Test Score : 0.995259540175397
------------------------------
19 5




20870 102
Train Score :  0.9949931453776003
Test Score : 0.9957091775923719
------------------------------
19 6




20679 83
Train Score :  0.9962670841110242
Test Score : 0.9949434143992295
------------------------------
19 7




20482 73
Train Score :  0.9962296278277791
Test Score : 0.997324252006811
------------------------------
19 8




20180 69
Train Score :  0.9966047286869559
Test Score : 0.9965432098765432
------------------------------
19 9




19823 56
Train Score :  0.996918820348362
Test Score : 0.9982394366197183
------------------------------


