## Multiple Output Models
+ Multi Tast Elastic Net
+ Multi Task Models


In [21]:
import pandas as pd
import numpy as np
from sklearn.linear_model import MultiTaskElasticNet
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_squared_error

In [None]:
#### Read in Crime by year / month data set

In [12]:
## read in data
path = '../Homeworks/chicagoCrimesByYear.csv'
df = pd.read_csv(path).fillna(0)

## set month year as index
df.sort_values(by=['year', 'month'])
df.set_index(['year', 'month'], inplace=True)

df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ARSON,ASSAULT,BATTERY,BURGLARY,CONCEALED CARRY LICENSE VIOLATION,CRIM SEXUAL ASSAULT,CRIMINAL DAMAGE,CRIMINAL TRESPASS,DECEPTIVE PRACTICE,DOMESTIC VIOLENCE,...,OTHER OFFENSE,PROSTITUTION,PUBLIC INDECENCY,PUBLIC PEACE VIOLATION,RITUALISM,ROBBERY,SEX OFFENSE,STALKING,THEFT,WEAPONS VIOLATION
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2001,1,67.0,2123.0,6525.0,1934.0,0.0,163.0,3966.0,1191.0,1393.0,1.0,...,2800.0,563.0,0.0,161.0,2.0,1396.0,218.0,26.0,7866.0,337.0
2001,2,57.0,2029.0,6040.0,1666.0,0.0,135.0,3664.0,1063.0,1159.0,0.0,...,2487.0,426.0,1.0,173.0,0.0,1159.0,149.0,13.0,6669.0,301.0
2001,3,93.0,2824.0,7658.0,1832.0,0.0,153.0,4615.0,1141.0,1308.0,0.0,...,2836.0,550.0,0.0,267.0,2.0,1399.0,184.0,17.0,7765.0,344.0
2001,4,89.0,2746.0,8325.0,1931.0,0.0,133.0,4922.0,1133.0,1197.0,0.0,...,2483.0,564.0,1.0,229.0,1.0,1341.0,169.0,29.0,7702.0,321.0
2001,5,94.0,2903.0,8886.0,1997.0,0.0,156.0,4756.0,1067.0,1255.0,0.0,...,2529.0,503.0,1.0,239.0,1.0,1491.0,225.0,11.0,8417.0,390.0


#### One Month Ahead Forecasting using mutlitask Elastic Net
This predicts all crime counts one month in advance
uses linear model (elastic net) at the core 

In [27]:
# date shift
X = df.iloc[0:-1, :]
y = df.iloc[1: : ]

# builds Model
model = MultiTaskElasticNet().fit(X, y)

# create predictions, formats to pandas data frame
preds = pd.DataFrame(model.predict(X), columns=df.columns, index=y.index)

# create a dictionary to calculate peformance for every column
r2_dict = {}
for col in df.columns:
    r2_dict[col] = r2_score(y[col], preds[col])

print('models r2 scores :')
r2_dict


models r2 scores :


  check_random_state(self.random_state), random)


{'ARSON': 0.8279448476873243,
 'ASSAULT': 0.9068680880368422,
 'BATTERY': 0.9407085699793026,
 'BURGLARY': 0.9485009402508481,
 'CONCEALED CARRY LICENSE VIOLATION': 0.8834404905302924,
 'CRIM SEXUAL ASSAULT': 0.5570321612507594,
 'CRIMINAL DAMAGE': 0.9353298038053376,
 'CRIMINAL TRESPASS': 0.9474962680572311,
 'DECEPTIVE PRACTICE': 0.8109374610188494,
 'DOMESTIC VIOLENCE': 1.0,
 'GAMBLING': 0.8668658471575261,
 'HOMICIDE': 0.6466780783351185,
 'HUMAN TRAFFICKING': 0.40584549622460564,
 'INTERFERENCE WITH PUBLIC OFFICER': 0.8608623850298649,
 'INTIMIDATION': 0.6905087918508732,
 'KIDNAPPING': 0.84407578184751,
 'LIQUOR LAW VIOLATION': 0.8917977437524581,
 'MOTOR VEHICLE THEFT': 0.9542287561941238,
 'NARCOTICS': 0.9621222248685374,
 'NON - CRIMINAL': 0.3595918889680684,
 'NON-CRIMINAL': 0.693736446230367,
 'NON-CRIMINAL (SUBJECT SPECIFIED)': 0.17496925575125954,
 'OBSCENITY': 0.4873378224276309,
 'OFFENSE INVOLVING CHILDREN': 0.5324528390421861,
 'OTHER NARCOTIC VIOLATION': 0.14290431114

#### One Month Ahead Forecasting using DecisionTreeRegression
This predicts all crime counts one month in advance
uses multiple decision Tree Regressors 

In [24]:
# date shift
X = df.iloc[0:-1, :]
y = df.iloc[1: : ]

# builds Model
model = MultiOutputRegressor(DecisionTreeRegressor()).fit(X, y)

# create predictions, formats to pandas data frame
preds = pd.DataFrame(model.predict(X), columns=df.columns, index=y.index)

# create a dictionary to calculate peformance for every column
r2_dict = {}
for col in df.columns:
    r2_dict[col] = r2_score(y[col], preds[col])

print('models r2 scores :')
r2_dict


models r2 scores :


{'ARSON': 1.0,
 'ASSAULT': 1.0,
 'BATTERY': 1.0,
 'BURGLARY': 1.0,
 'CONCEALED CARRY LICENSE VIOLATION': 1.0,
 'CRIM SEXUAL ASSAULT': 1.0,
 'CRIMINAL DAMAGE': 1.0,
 'CRIMINAL TRESPASS': 1.0,
 'DECEPTIVE PRACTICE': 1.0,
 'DOMESTIC VIOLENCE': 1.0,
 'GAMBLING': 1.0,
 'HOMICIDE': 1.0,
 'HUMAN TRAFFICKING': 1.0,
 'INTERFERENCE WITH PUBLIC OFFICER': 1.0,
 'INTIMIDATION': 1.0,
 'KIDNAPPING': 1.0,
 'LIQUOR LAW VIOLATION': 1.0,
 'MOTOR VEHICLE THEFT': 1.0,
 'NARCOTICS': 1.0,
 'NON - CRIMINAL': 1.0,
 'NON-CRIMINAL': 1.0,
 'NON-CRIMINAL (SUBJECT SPECIFIED)': 1.0,
 'OBSCENITY': 1.0,
 'OFFENSE INVOLVING CHILDREN': 1.0,
 'OTHER NARCOTIC VIOLATION': 1.0,
 'OTHER OFFENSE': 1.0,
 'PROSTITUTION': 1.0,
 'PUBLIC INDECENCY': 1.0,
 'PUBLIC PEACE VIOLATION': 1.0,
 'RITUALISM': 1.0,
 'ROBBERY': 1.0,
 'SEX OFFENSE': 1.0,
 'STALKING': 1.0,
 'THEFT': 1.0,
 'WEAPONS VIOLATION': 1.0}

#### One Year  Ahead Forecasting using DecisionTreeRegression
This predicts all crime counts one month in advance
uses multiple decision Tree Regressors 

In [37]:
# date shift

df_grouped = df.groupby(df.index.get_level_values('year')).sum()

# filter out partial year
df_grouped = df_grouped.loc[df_grouped.index < 2020, :]

X  =df_grouped.iloc[0:-1, :]
y  = df_grouped.iloc[1: : ]

# builds Model
model = MultiTaskElasticNet().fit(X, y)

# create predictions, formats to pandas data frame
preds = pd.DataFrame(model.predict(X), columns=df.columns, index=y.index)

# create a dictionary to calculate peformance for every column
r2_dict = {}
for col in df.columns:
    r2_dict[col] = r2_score(y[col], preds[col])

print('models r2 scores :')
r2_dict


models r2 scores :


{'ARSON': 0.9999979510048724,
 'ASSAULT': 0.9999424941596216,
 'BATTERY': 0.9999823024865105,
 'BURGLARY': 0.9999696735395049,
 'CONCEALED CARRY LICENSE VIOLATION': 0.9999822617376024,
 'CRIM SEXUAL ASSAULT': 0.9996094927490798,
 'CRIMINAL DAMAGE': 0.9999969519982373,
 'CRIMINAL TRESPASS': 0.9999942355924377,
 'DECEPTIVE PRACTICE': 0.9997876099058433,
 'DOMESTIC VIOLENCE': 1.0,
 'GAMBLING': 0.9999977442676548,
 'HOMICIDE': 0.9998227775826299,
 'HUMAN TRAFFICKING': 0.9997121314359062,
 'INTERFERENCE WITH PUBLIC OFFICER': 0.9999581795301626,
 'INTIMIDATION': 0.999993365531253,
 'KIDNAPPING': 0.9999863020663545,
 'LIQUOR LAW VIOLATION': 0.9999930142049961,
 'MOTOR VEHICLE THEFT': 0.9999913438153967,
 'NARCOTICS': 0.9999915569763381,
 'NON - CRIMINAL': 0.9995428702114323,
 'NON-CRIMINAL': 0.9996938765981948,
 'NON-CRIMINAL (SUBJECT SPECIFIED)': 0.9979169647403296,
 'OBSCENITY': 0.9999930481451783,
 'OFFENSE INVOLVING CHILDREN': 0.9999708700687479,
 'OTHER NARCOTIC VIOLATION': 0.99793619787

#### Predict Next Year's crime trends 
use all the data to create a prediction for next year

In [58]:
index =  list(df_grouped.index)
last_value = index[-1]
index.append(last_value +  1)


[2001,
 2002,
 2003,
 2004,
 2005,
 2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016,
 2017,
 2018,
 2019,
 2020]

In [61]:
index =  list(df_grouped.index)
last_value = index[-1]
index.append(last_value +  1)
index = index[1:]
preds = pd.DataFrame(model.predict(df_grouped), columns=df_grouped.columns, index = index)
preds.tail()

Unnamed: 0,ARSON,ASSAULT,BATTERY,BURGLARY,CONCEALED CARRY LICENSE VIOLATION,CRIM SEXUAL ASSAULT,CRIMINAL DAMAGE,CRIMINAL TRESPASS,DECEPTIVE PRACTICE,DOMESTIC VIOLENCE,...,OTHER OFFENSE,PROSTITUTION,PUBLIC INDECENCY,PUBLIC PEACE VIOLATION,RITUALISM,ROBBERY,SEX OFFENSE,STALKING,THEFT,WEAPONS VIOLATION
2016,515.296919,18716.367243,50255.417403,14275.445138,36.168284,1574.499407,30979.558169,6307.680821,19021.056338,0.0,...,17306.777113,796.949502,10.009967,1615.249648,0.004618,11931.754471,998.906056,175.997101,61561.158265,3453.147195
2017,444.308109,19295.180803,49198.742268,13018.009172,68.850056,1641.502753,29066.257724,6808.81367,19352.106203,0.0,...,17257.727525,734.646985,10.0325,1495.595278,0.007822,11890.31849,1027.68901,190.931198,64379.083362,4684.44229
2018,373.052256,20395.838902,49808.829031,11749.368737,148.951554,1651.541223,27823.07396,6907.928083,19356.846246,0.0,...,17236.774437,712.198138,13.954062,1373.053573,0.004675,9682.437264,1136.637882,206.941091,65226.480395,5456.128024
2019,374.98432,20597.11669,49451.375994,9624.132815,216.937004,1595.146007,26656.718211,6814.886667,18014.858793,0.0,...,16637.004493,679.832283,11.009329,1516.6665,-6.9e-05,7989.917136,1273.590198,221.970231,62296.431455,6321.426208
2020,461.821449,20690.346405,50990.89817,10566.85196,238.959559,1489.070224,29184.75892,7133.297817,15955.826454,0.0,...,18408.799082,739.428168,3.54892,1523.632572,2.635373,8314.662539,1424.34047,213.76528,63350.511153,6369.384959
