# Lab | Random Forests

In [172]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, f1_score, recall_score
from sklearn.model_selection import cross_val_score

In [173]:
pd.set_option('display.max_columns',None)
categorical = pd.read_csv('categorical.csv')
numerical = pd.read_csv('numerical.csv')
target = pd.read_csv('target.csv')

In [174]:
X = pd.concat([categorical, numerical], axis=1)
y = target['TARGET_B']

In [175]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)


In [176]:
X_train_num = X_train.select_dtypes(np.number)
X_train_cat = X_train.select_dtypes(object)
X_test_num = X_test.select_dtypes(np.number)
X_test_cat = X_test.select_dtypes(object)

In [177]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_num_scaled = scaler.fit_transform(X_train_num)
X_test_num_scaled = scaler.transform(X_test_num)

In [178]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(drop='first')
X_train_cat_encoded = enc.fit_transform(X_train_cat).toarray()
X_test_cat_encoded = enc.transform(X_test_cat).toarray()
column_names = enc.get_feature_names_out(X_train_cat.columns)

In [179]:
X_train_scaled = pd.concat([pd.DataFrame(X_train_num_scaled, columns=X_train_num.columns), pd.DataFrame(X_train_cat_encoded, columns = column_names)], axis=1)
X_test_scaled = pd.concat([pd.DataFrame(X_test_num_scaled, columns=X_test_num.columns), pd.DataFrame(X_test_cat_encoded, columns = column_names)], axis=1)

Check Imbalanced Dataset

In [180]:
y_train = y_train.reset_index().drop(['index'], axis=1)

In [181]:
trainset= pd.concat([X_train_scaled, y_train], axis=1)
trainset

Unnamed: 0,CLUSTER,DATASRCE,DOMAIN_B,ODATEW_YR,ODATEW_MM,DOB_YR,DOB_MM,MINRDATE_YR,MINRDATE_MM,MAXRDATE_YR,MAXRDATE_MM,LASTDATE_YR,LASTDATE_MM,FIRSTDATE_YR,FIRSTDATE_MM,TCODE,AGE,INCOME,WEALTH1,HIT,MALEMILI,MALEVET,VIETVETS,WWIIVETS,LOCALGOV,STATEGOV,FEDGOV,WEALTH2,POP901,POP902,POP903,POP90C1,POP90C2,POP90C3,POP90C4,POP90C5,ETH1,ETH2,ETH3,ETH4,ETH5,ETH6,ETH7,ETH8,ETH9,ETH10,ETH11,ETH12,ETH13,ETH14,ETH15,ETH16,AGE901,AGE902,AGE903,AGE904,AGE905,AGE906,AGE907,CHIL1,CHIL2,CHIL3,AGEC1,AGEC2,AGEC3,AGEC4,AGEC5,AGEC6,AGEC7,CHILC1,CHILC2,CHILC3,CHILC4,CHILC5,HHAGE1,HHAGE2,HHAGE3,HHN1,HHN2,HHN3,HHN4,HHN5,HHN6,MARR1,MARR2,MARR3,MARR4,HHP1,HHP2,DW1,DW2,DW3,DW4,DW5,DW6,DW7,DW8,DW9,HV1,HV2,HV3,HV4,HU1,HU2,HU3,HU4,HU5,HHD1,HHD2,HHD3,HHD4,HHD5,HHD6,HHD7,HHD8,HHD9,HHD10,HHD11,HHD12,ETHC1,ETHC2,ETHC3,ETHC4,ETHC5,ETHC6,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR1,HUR2,RHP1,RHP2,RHP3,RHP4,HUPA1,HUPA2,HUPA3,HUPA4,HUPA5,HUPA6,HUPA7,RP1,RP2,RP3,RP4,MSA,ADI,DMA,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,IC11,IC12,IC13,IC14,IC15,IC16,IC17,IC18,IC19,IC20,IC21,IC22,IC23,HHAS1,HHAS2,HHAS3,HHAS4,MC1,MC2,MC3,TPE1,TPE2,TPE3,TPE4,TPE5,TPE6,TPE7,TPE8,TPE9,PEC1,PEC2,TPE10,TPE11,TPE12,TPE13,LFC1,LFC2,LFC3,LFC4,LFC5,LFC6,LFC7,LFC8,LFC9,LFC10,OCC1,OCC2,OCC3,OCC4,OCC5,OCC6,OCC7,OCC8,OCC9,OCC10,OCC11,OCC12,OCC13,EIC1,EIC2,EIC3,EIC4,EIC5,EIC6,EIC7,EIC8,EIC9,EIC10,EIC11,EIC12,EIC13,EIC14,EIC15,EIC16,OEDC1,OEDC2,OEDC3,OEDC4,OEDC5,OEDC6,OEDC7,EC1,EC2,EC3,EC4,EC5,EC6,EC7,EC8,SEC1,SEC2,SEC3,SEC4,SEC5,AFC1,AFC2,AFC3,AFC4,AFC5,AFC6,VC1,VC2,VC3,VC4,ANC1,ANC2,ANC3,ANC4,ANC5,ANC6,ANC7,ANC8,ANC9,ANC10,ANC11,ANC12,ANC13,ANC14,ANC15,POBC1,POBC2,LSC1,LSC2,LSC3,LSC4,VOC1,VOC2,VOC3,HC1,HC2,HC3,HC4,HC5,HC6,HC7,HC8,HC9,HC10,HC11,HC12,HC13,HC14,HC15,HC16,HC17,HC18,HC19,HC20,HC21,MHUC1,MHUC2,AC1,AC2,CARDPROM,NUMPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MAXRAMNT,LASTGIFT,TIMELAG,AVGGIFT,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2,STATE_FL,STATE_GA,STATE_IL,STATE_IN,STATE_MI,STATE_MO,STATE_NC,STATE_TX,STATE_WA,STATE_WI,STATE_other,HOMEOWNR_U,GENDER_M,GENDER_other,RFA_2A_E,RFA_2A_F,RFA_2A_G,GEOCODE2_B,GEOCODE2_C,GEOCODE2_D,DOMAIN_A_R,DOMAIN_A_S,DOMAIN_A_T,DOMAIN_A_U,TARGET_B
0,0.442308,1.0,0.000000,0.214286,0.0,0.237113,0.000000,0.636364,0.090909,0.681818,1.000000,0.5,0.000000,0.916667,0.181818,0.000017,0.762887,0.500000,0.666667,0.008299,0.000000,0.313131,0.101010,0.686869,0.060606,0.121212,0.034483,1.000000,0.007153,0.009129,0.007683,1.000000,0.000000,0.000000,0.474747,0.535354,0.757576,0.212121,0.000000,0.020202,0.020202,0.0,0.000000,0.000000,0.000000,0.000000,0.021277,0.000000,0.000000,0.017544,0.0,0.023256,0.404762,0.500000,0.547619,0.428571,0.535714,0.583333,0.333333,0.363636,0.474747,0.171717,0.131313,0.212121,0.191919,0.131313,0.151515,0.121212,0.060606,0.141414,0.141414,0.353535,0.212121,0.161616,0.282828,0.090909,0.252525,0.212121,0.383838,0.424242,0.232323,0.101010,0.040404,0.616162,0.101010,0.095890,0.222222,0.272308,0.371429,0.777778,0.757576,0.028571,0.222222,0.202020,0.101010,0.000000,0.000000,0.0,0.105167,0.107333,0.230769,0.230769,0.717172,0.292929,0.939394,0.070707,0.000000,0.343434,0.757576,0.595960,0.242424,0.888889,0.121212,0.101010,0.04,0.080808,0.090909,0.212121,0.040404,0.186667,0.434343,0.191919,0.162791,0.141414,0.012346,0.000000,0.010101,0.040404,0.232323,0.757576,0.000000,0.000000,0.525253,0.635294,0.633333,0.229508,0.100,0.161616,0.060606,0.000000,0.111111,0.090909,0.090909,0.000000,0.030303,0.080808,0.808081,0.929293,0.337607,0.327189,0.643587,0.225333,0.220667,0.238000,0.228667,0.068232,0.101010,0.313131,0.131313,0.212121,0.212121,0.050505,0.00,0.000000,0.000000,0.080808,0.333333,0.161616,0.212121,0.161616,0.060606,0.00,0.000000,0.000000,0.414141,0.070707,0.575758,0.070707,0.343434,0.666667,0.111111,0.757576,0.202020,0.000000,0.000000,0.000000,0.0,0.00,0.000000,0.050505,0.000000,0.050505,0.155556,0.250000,0.050505,0.454545,0.676768,0.666667,0.686869,0.636364,0.686869,0.909091,0.595960,1.000000,1.000000,0.000000,0.272727,0.202020,0.020202,0.101010,0.171717,0.000000,0.000000,0.070707,0.000000,0.040404,0.080808,0.028571,0.030303,0.000000,0.000000,0.020202,0.141414,0.100000,0.078125,0.020202,0.222222,0.030303,0.000000,0.000000,0.000000,0.030303,0.282828,0.070707,0.090909,0.060606,0.121212,0.030303,0.040404,0.595960,0.161616,0.0,0.823529,0.050505,0.101010,0.292929,0.121212,0.162162,0.252525,0.141414,0.072165,0.222222,0.033333,0.263889,0.080808,0.000000,0.000000,0.000000,0.211268,0.313131,0.066667,0.101010,0.303030,0.686869,0.000000,0.000000,0.060606,0.000000,0.012048,0.0,0.0,0.122449,0.018182,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.00000,0.011111,0.676768,0.989899,0.020202,0.000000,0.000000,0.969697,0.666667,0.131313,0.322581,0.615385,0.000000,0.050505,0.050505,0.141414,0.464646,0.545455,0.0,0.000000,0.212121,0.030303,0.262626,0.474747,0.0,0.030303,1.000000,0.000000,1.000000,1.000000,0.909091,0.285714,0.4,0.090909,0.080808,0.366667,0.230366,0.263158,0.129870,0.008658,0.063559,0.195122,0.0010,0.001001,0.007,0.003676,0.004720,0.127215,1.0,0.666667,0.508197,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,1.000000,0.5,0.666667,0.928571,0.0,0.463918,0.000000,0.954545,0.090909,0.954545,0.090909,0.5,0.090909,1.000000,0.090909,0.000000,0.536082,0.666667,1.000000,0.000000,0.000000,0.292929,0.242424,0.383838,0.070707,0.080808,0.045977,1.000000,0.045856,0.055403,0.044968,0.000000,0.000000,1.000000,0.494949,0.515152,0.757576,0.161616,0.090909,0.000000,0.010101,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.010309,0.000000,0.0,0.000000,0.416667,0.559524,0.607143,0.452381,0.583333,0.630952,0.373333,0.393939,0.404040,0.202020,0.111111,0.181818,0.161616,0.151515,0.141414,0.121212,0.141414,0.141414,0.181818,0.343434,0.191919,0.151515,0.353535,0.141414,0.323232,0.212121,0.333333,0.464646,0.303030,0.141414,0.050505,0.626263,0.080808,0.150685,0.181818,0.290769,0.395714,0.787879,0.777778,0.000000,0.030303,0.030303,0.010101,0.020202,0.020202,0.0,0.042833,0.052667,0.153846,0.076923,0.797980,0.212121,0.898990,0.111111,0.070707,0.393939,0.777778,0.636364,0.303030,0.888889,0.121212,0.090909,0.04,0.070707,0.101010,0.181818,0.020202,0.213333,0.404040,0.181818,0.093023,0.080808,0.049383,0.000000,0.010101,0.020202,0.040404,0.161616,0.000000,0.010101,0.373737,0.576471,0.577778,0.229508,0.125,0.030303,0.000000,0.191919,0.141414,0.020202,0.010101,0.040404,0.000000,0.010101,0.050505,0.303030,0.000000,0.413210,0.761635,0.117333,0.142667,0.148667,0.166000,0.044796,0.454545,0.202020,0.141414,0.141414,0.050505,0.010101,0.00,0.000000,0.000000,0.363636,0.222222,0.171717,0.171717,0.060606,0.010101,0.00,0.000000,0.000000,0.414141,0.141414,0.252525,0.232323,0.353535,0.656566,0.040404,0.767677,0.212121,0.000000,0.000000,0.000000,0.0,0.00,0.010101,0.020202,0.000000,0.444444,0.344444,0.447368,0.131313,0.595960,0.484848,0.585859,0.404040,0.555556,0.363636,0.555556,0.444444,0.575758,0.909091,0.101010,0.070707,0.060606,0.030303,0.121212,0.141414,0.000000,0.036364,0.101010,0.050505,0.131313,0.131313,0.100000,0.070707,0.040404,0.049180,0.080808,0.242424,0.083333,0.031250,0.030303,0.151515,0.050505,0.030303,0.020202,0.016393,0.080808,0.080808,0.030303,0.060606,0.070707,0.080808,0.040404,0.080808,0.696970,0.040404,0.0,0.705882,0.202020,0.212121,0.393939,0.111111,0.081081,0.030303,0.030303,0.010309,0.242424,0.033333,0.277778,0.040404,0.000000,0.000000,0.000000,0.197183,0.292929,0.033333,0.242424,0.171717,0.383838,0.101010,0.000000,0.030303,0.000000,0.048193,0.0,0.0,0.061224,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.00000,0.000000,0.808081,0.989899,0.010101,0.000000,0.010101,0.888889,0.606061,0.232323,0.290323,0.442308,0.010101,0.050505,0.202020,0.454545,0.595960,0.414141,0.0,0.016129,0.666667,0.121212,0.101010,0.010101,0.0,0.111111,0.969697,0.010101,0.565657,0.979798,0.868687,0.380952,0.4,0.070707,0.070707,0.083333,0.052356,0.263158,0.142857,0.000634,0.000000,0.024390,0.0200,0.003003,0.020,0.008272,0.018738,0.599688,0.0,0.000000,0.967213,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0
2,0.615385,1.0,0.666667,0.357143,0.0,0.381443,0.727273,0.727273,0.818182,0.909091,0.909091,0.0,0.909091,0.916667,0.727273,0.000017,0.608247,0.666667,0.111111,0.020747,0.000000,0.424242,0.161616,0.626263,0.101010,0.020202,0.011494,0.111111,0.067304,0.091154,0.087309,1.000000,0.000000,0.000000,0.454545,0.555556,0.989899,0.000000,0.000000,0.010101,0.020202,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.017544,0.0,0.011628,0.595238,0.702381,0.738095,0.559524,0.642857,0.678571,0.200000,0.505051,0.353535,0.151515,0.101010,0.161616,0.111111,0.080808,0.151515,0.252525,0.151515,0.232323,0.191919,0.303030,0.151515,0.131313,0.464646,0.141414,0.444444,0.292929,0.494949,0.232323,0.101010,0.040404,0.010101,0.606061,0.111111,0.178082,0.151515,0.221538,0.298571,0.454545,0.383838,0.042857,0.404040,0.373737,0.323232,0.020202,0.020202,0.0,0.102667,0.108167,0.307692,0.230769,0.696970,0.313131,0.838384,0.171717,0.575758,0.202020,0.666667,0.545455,0.131313,0.777778,0.232323,0.070707,0.02,0.060606,0.131313,0.262626,0.060606,0.173333,0.434343,0.424242,0.000000,0.000000,0.000000,0.010101,0.010101,0.060606,0.242424,0.757576,0.000000,0.030303,0.191919,0.494118,0.511111,0.196721,0.100,0.171717,0.232323,0.141414,0.040404,0.080808,0.181818,0.010101,0.020202,0.292929,0.777778,0.969697,0.884615,0.201229,0.611805,0.141333,0.156000,0.165333,0.188000,0.068255,0.333333,0.272727,0.191919,0.131313,0.050505,0.010101,0.02,0.000000,0.000000,0.242424,0.282828,0.222222,0.161616,0.070707,0.020202,0.02,0.000000,0.000000,0.454545,0.050505,0.474747,0.151515,0.676768,0.333333,0.323232,0.868687,0.111111,0.000000,0.000000,0.000000,0.0,0.00,0.010101,0.010101,0.012048,0.353535,0.200000,0.328947,0.080808,0.555556,0.464646,0.525253,0.414141,0.484848,0.404040,0.656566,0.484848,0.676768,1.000000,0.070707,0.151515,0.070707,0.060606,0.141414,0.151515,0.000000,0.036364,0.151515,0.040404,0.111111,0.010101,0.071429,0.060606,0.040404,0.000000,0.090909,0.060606,0.083333,0.031250,0.020202,0.232323,0.070707,0.040404,0.050505,0.032787,0.131313,0.070707,0.080808,0.050505,0.101010,0.020202,0.010101,0.060606,0.757576,0.060606,0.0,0.705882,0.080808,0.191919,0.383838,0.181818,0.162162,0.070707,0.030303,0.030928,0.161616,0.033333,0.152778,0.070707,0.000000,0.000000,0.000000,0.281690,0.424242,0.066667,0.161616,0.141414,0.626263,0.090909,0.012048,0.050505,0.066667,0.132530,0.0,0.0,0.122449,0.181818,0.014706,0.050505,0.000000,0.0,0.052632,0.037037,0.03125,0.055556,0.161616,0.909091,0.020202,0.000000,0.080808,0.959596,0.333333,0.060606,0.096774,0.153846,0.070707,0.343434,0.595960,0.848485,0.929293,0.080808,0.1,0.064516,0.000000,0.020202,0.969697,0.010101,0.0,0.010101,0.949495,0.060606,0.909091,1.000000,0.979798,0.285714,0.4,0.040404,0.090909,0.416667,0.314136,0.315789,0.155844,0.006652,0.059322,0.146341,0.0020,0.001001,0.010,0.004596,0.003853,0.237264,1.0,0.666667,0.786885,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
3,0.076923,1.0,0.333333,0.428571,0.0,0.216495,0.000000,0.590909,0.909091,0.818182,0.727273,0.0,1.000000,0.916667,0.909091,0.000017,0.783505,0.833333,0.666667,0.037344,0.010101,0.404040,0.232323,0.414141,0.080808,0.030303,0.045977,0.666667,0.014063,0.017853,0.013728,1.000000,0.000000,0.000000,0.494949,0.515152,0.787879,0.010101,0.010101,0.151515,0.111111,0.0,0.013889,0.050505,0.089552,0.021739,0.021277,0.013889,0.072165,0.017544,0.0,0.034884,0.476190,0.583333,0.630952,0.476190,0.571429,0.619048,0.280000,0.343434,0.444444,0.232323,0.101010,0.161616,0.171717,0.141414,0.222222,0.141414,0.060606,0.151515,0.131313,0.353535,0.212121,0.161616,0.343434,0.080808,0.292929,0.151515,0.383838,0.474747,0.313131,0.141414,0.060606,0.636364,0.080808,0.109589,0.212121,0.295385,0.407143,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.342000,0.340333,0.692308,0.615385,0.888889,0.121212,1.000000,0.010101,0.000000,0.323232,0.828283,0.717172,0.272727,0.909091,0.101010,0.050505,0.02,0.030303,0.090909,0.151515,0.030303,0.160000,0.444444,0.212121,0.023256,0.010101,0.000000,0.545455,0.898990,0.979798,1.000000,1.000000,0.010101,0.000000,0.606061,0.635294,0.644444,0.245902,0.125,0.000000,0.000000,0.000000,0.121212,0.000000,0.000000,0.000000,0.898990,0.919192,0.939394,0.939394,0.616987,0.099846,0.916005,0.292667,0.310000,0.333333,0.350667,0.114323,0.050505,0.050505,0.232323,0.272727,0.262626,0.101010,0.04,0.000000,0.020202,0.010101,0.050505,0.222222,0.272727,0.323232,0.090909,0.06,0.000000,0.030303,0.323232,0.060606,0.575758,0.020202,0.313131,0.696970,0.060606,0.818182,0.101010,0.040404,0.020202,0.028169,0.0,0.08,0.010101,0.010101,0.000000,0.131313,0.233333,0.315789,0.030303,0.666667,0.646465,0.707071,0.585859,0.686869,0.585859,0.727273,0.636364,0.767677,1.000000,0.050505,0.121212,0.141414,0.050505,0.070707,0.252525,0.000000,0.072727,0.060606,0.010101,0.121212,0.040404,0.028571,0.070707,0.020202,0.000000,0.060606,0.141414,0.133333,0.093750,0.090909,0.191919,0.060606,0.020202,0.040404,0.032787,0.080808,0.070707,0.000000,0.070707,0.080808,0.030303,0.040404,0.030303,0.787879,0.040404,0.0,0.705882,0.060606,0.131313,0.363636,0.212121,0.297297,0.090909,0.030303,0.030928,0.212121,0.033333,0.208333,0.080808,0.010309,0.010101,0.000000,0.281690,0.404040,0.000000,0.232323,0.272727,0.414141,0.070707,0.000000,0.020202,0.000000,0.060241,0.0,0.0,0.122449,0.145455,0.000000,0.010101,0.219512,0.0,0.052632,0.074074,0.00000,0.188889,0.545455,0.828283,0.040404,0.090909,0.050505,1.000000,0.777778,0.393939,0.612903,0.673077,0.000000,0.000000,0.000000,0.010101,0.141414,0.868687,0.0,0.000000,0.888889,0.030303,0.050505,0.000000,0.0,0.030303,1.000000,0.000000,1.000000,1.000000,1.000000,0.428571,0.4,0.141414,0.090909,0.433333,0.314136,0.315789,0.155844,0.015099,0.033898,0.170732,0.0100,0.004004,0.020,0.003676,0.016180,0.863723,1.0,0.000000,0.131148,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0
4,0.500000,1.0,0.333333,0.785714,0.0,0.443299,0.000000,0.863636,0.000000,0.909091,0.272727,0.0,0.272727,0.979167,0.000000,0.000052,0.556701,0.666667,0.222222,0.087137,0.333333,0.272727,0.292929,0.181818,0.121212,0.020202,0.080460,1.000000,0.010040,0.012052,0.010988,0.939394,0.000000,0.070707,0.505051,0.505051,0.888889,0.080808,0.000000,0.030303,0.040404,0.0,0.000000,0.000000,0.014925,0.021739,0.000000,0.000000,0.020619,0.017544,0.0,0.011628,0.309524,0.380952,0.416667,0.321429,0.428571,0.476190,0.400000,0.545455,0.343434,0.121212,0.202020,0.383838,0.161616,0.131313,0.080808,0.040404,0.010101,0.222222,0.242424,0.343434,0.131313,0.070707,0.060606,0.020202,0.060606,0.242424,0.313131,0.454545,0.252525,0.080808,0.010101,0.626263,0.131313,0.041096,0.222222,0.281538,0.364286,0.222222,0.222222,0.014286,0.272727,0.262626,0.252525,0.000000,0.000000,0.0,0.102500,0.100167,0.307692,0.230769,0.686869,0.323232,0.969697,0.040404,0.191919,0.444444,0.696970,0.555556,0.333333,0.838384,0.171717,0.111111,0.02,0.101010,0.212121,0.131313,0.070707,0.320000,0.595960,0.050505,0.069767,0.050505,0.000000,0.000000,0.000000,0.000000,0.070707,0.838384,0.000000,0.030303,0.222222,0.517647,0.533333,0.229508,0.125,0.060606,0.202020,0.515152,0.040404,0.020202,0.242424,0.030303,0.010101,0.161616,0.919192,0.959596,0.632479,0.462366,0.740068,0.164667,0.172000,0.179333,0.182667,0.064267,0.222222,0.292929,0.282828,0.131313,0.050505,0.010101,0.02,0.000000,0.000000,0.212121,0.252525,0.363636,0.080808,0.080808,0.000000,0.04,0.000000,0.000000,0.101010,0.040404,0.434343,0.151515,0.777778,0.232323,0.353535,0.808081,0.151515,0.000000,0.000000,0.000000,0.0,0.00,0.000000,0.060606,0.000000,0.272727,0.155556,0.250000,0.000000,0.505051,0.868687,0.929293,0.797980,0.878788,0.717172,0.636364,0.555556,0.919192,1.000000,0.020202,0.111111,0.040404,0.050505,0.101010,0.232323,0.000000,0.054545,0.202020,0.020202,0.090909,0.030303,0.128571,0.020202,0.020202,0.000000,0.040404,0.070707,0.116667,0.046875,0.060606,0.181818,0.070707,0.101010,0.000000,0.032787,0.111111,0.101010,0.090909,0.050505,0.121212,0.020202,0.070707,0.090909,0.646465,0.070707,0.0,0.723529,0.050505,0.121212,0.313131,0.343434,0.108108,0.141414,0.000000,0.041237,0.424242,0.100000,0.277778,0.232323,0.195876,0.333333,0.051282,0.225352,0.272727,0.200000,0.292929,0.111111,0.181818,0.545455,0.024096,0.030303,0.000000,0.216867,0.0,0.0,0.102041,0.054545,0.000000,0.020202,0.000000,0.0,0.000000,0.000000,0.00000,0.066667,0.393939,0.898990,0.010101,0.030303,0.060606,0.979798,0.484848,0.090909,0.096774,0.307692,0.030303,0.171717,0.505051,0.797980,0.919192,0.090909,0.0,0.000000,0.898990,0.020202,0.101010,0.000000,0.0,0.000000,1.000000,0.000000,1.000000,1.000000,0.989899,0.285714,0.0,0.060606,0.040404,0.183333,0.141361,0.210526,0.103896,0.001795,0.008475,0.024390,0.0050,0.002002,0.015,0.004596,0.009059,0.555564,1.0,0.000000,0.573770,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76324,0.211538,1.0,0.000000,0.214286,0.0,0.278351,0.636364,0.909091,1.000000,0.863636,0.818182,1.0,0.090909,0.895833,1.000000,0.000000,0.711340,0.333333,1.000000,0.020747,0.020202,0.373737,0.101010,0.323232,0.262626,0.010101,0.022989,1.000000,0.011732,0.015919,0.011214,1.000000,0.000000,0.000000,0.505051,0.505051,0.989899,0.000000,0.000000,0.010101,0.040404,0.0,0.000000,0.010101,0.000000,0.000000,0.000000,0.000000,0.020619,0.000000,0.0,0.011628,0.476190,0.547619,0.583333,0.464286,0.559524,0.607143,0.266667,0.333333,0.353535,0.323232,0.131313,0.161616,0.171717,0.191919,0.171717,0.131313,0.050505,0.141414,0.111111,0.292929,0.222222,0.242424,0.282828,0.050505,0.262626,0.090909,0.373737,0.545455,0.333333,0.101010,0.030303,0.656566,0.040404,0.068493,0.262626,0.336923,0.417143,0.898990,0.848485,0.128571,0.090909,0.000000,0.000000,0.000000,0.000000,0.0,0.150333,0.155167,0.384615,0.384615,0.929293,0.080808,1.000000,0.010101,0.000000,0.333333,0.898990,0.808081,0.313131,0.959596,0.050505,0.020202,0.02,0.010101,0.060606,0.121212,0.020202,0.200000,0.636364,0.212121,0.000000,0.000000,0.000000,0.000000,0.010101,0.282828,0.848485,0.989899,0.000000,0.000000,0.808081,0.729412,0.722222,0.262295,0.100,0.090909,0.000000,0.000000,0.030303,0.040404,0.000000,0.000000,0.595960,0.757576,0.979798,1.000000,0.542735,0.170507,0.700341,0.332000,0.344000,0.350000,0.365333,0.097970,0.030303,0.080808,0.161616,0.232323,0.363636,0.101010,0.00,0.065574,0.000000,0.030303,0.050505,0.141414,0.242424,0.383838,0.111111,0.00,0.040404,0.000000,0.262626,0.040404,0.676768,0.020202,0.202020,0.808081,0.010101,0.858586,0.101010,0.000000,0.000000,0.000000,0.0,0.00,0.050505,0.000000,0.024096,0.030303,0.166667,0.223684,0.010101,0.515152,0.717172,0.797980,0.636364,0.767677,0.626263,0.737374,0.696970,1.000000,0.000000,0.000000,0.131313,0.131313,0.040404,0.131313,0.202020,0.000000,0.218182,0.080808,0.000000,0.060606,0.050505,0.028571,0.040404,0.000000,0.000000,0.020202,0.222222,0.083333,0.015625,0.010101,0.161616,0.030303,0.050505,0.020202,0.049180,0.161616,0.070707,0.020202,0.181818,0.262626,0.010101,0.020202,0.000000,0.636364,0.090909,0.0,0.741176,0.040404,0.080808,0.333333,0.313131,0.135135,0.121212,0.060606,0.092784,0.161616,0.033333,0.194444,0.090909,0.010309,0.020202,0.000000,0.281690,0.373737,0.000000,0.101010,0.343434,0.323232,0.222222,0.012048,0.000000,0.000000,0.156627,0.0,0.0,0.000000,0.090909,0.029412,0.303030,0.000000,0.0,0.000000,0.000000,0.00000,0.066667,0.878788,0.828283,0.040404,0.000000,0.141414,1.000000,0.787879,0.343434,0.451613,0.403846,0.020202,0.030303,0.101010,0.454545,0.888889,0.121212,0.0,0.000000,0.969697,0.000000,0.020202,0.020202,0.0,0.000000,1.000000,0.000000,1.000000,1.000000,1.000000,0.428571,0.8,0.080808,0.101010,0.516667,0.497382,0.315789,0.480519,0.018477,0.084746,0.219512,0.0050,0.003003,0.010,0.000919,0.007724,0.061154,1.0,0.333333,0.049180,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0
76325,0.634615,1.0,0.000000,0.928571,0.0,0.329897,0.000000,0.954545,0.090909,0.954545,0.090909,0.5,0.090909,1.000000,0.090909,0.000034,0.670103,0.333333,0.666667,0.000000,0.000000,0.363636,0.111111,0.626263,0.040404,0.161616,0.000000,1.000000,0.010344,0.014660,0.011722,0.000000,0.000000,1.000000,0.505051,0.505051,0.959596,0.050505,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.464286,0.535714,0.571429,0.464286,0.547619,0.583333,0.266667,0.333333,0.414141,0.262626,0.121212,0.181818,0.181818,0.212121,0.151515,0.121212,0.040404,0.141414,0.151515,0.313131,0.232323,0.171717,0.232323,0.050505,0.212121,0.191919,0.444444,0.373737,0.202020,0.060606,0.010101,0.696970,0.101010,0.054795,0.181818,0.261538,0.350000,0.666667,0.666667,0.000000,0.202020,0.202020,0.121212,0.000000,0.000000,0.0,0.169000,0.194500,0.230769,0.230769,0.858586,0.151515,0.757576,0.252525,0.202020,0.292929,0.797980,0.696970,0.252525,0.898990,0.111111,0.040404,0.02,0.030303,0.121212,0.151515,0.030303,0.186667,0.626263,0.191919,0.023256,0.030303,0.000000,0.101010,0.222222,0.515152,0.727273,0.878788,0.020202,0.050505,0.444444,0.635294,0.600000,0.229508,0.075,0.202020,0.000000,0.141414,0.040404,0.040404,0.020202,0.050505,0.050505,0.202020,0.414141,0.828283,0.000000,0.336406,0.570942,0.245333,0.257333,0.328667,0.364000,0.107023,0.090909,0.141414,0.191919,0.262626,0.141414,0.080808,0.12,0.081967,0.000000,0.050505,0.080808,0.222222,0.252525,0.161616,0.101010,0.14,0.060606,0.000000,0.282828,0.020202,0.575758,0.000000,0.282828,0.727273,0.010101,0.767677,0.212121,0.000000,0.000000,0.000000,0.0,0.00,0.010101,0.020202,0.060241,0.080808,0.177778,0.263158,0.030303,0.545455,0.707071,0.797980,0.626263,0.777778,0.575758,0.919192,0.888889,0.737374,0.000000,0.020202,0.181818,0.161616,0.040404,0.191919,0.171717,0.000000,0.000000,0.050505,0.000000,0.171717,0.000000,0.028571,0.020202,0.000000,0.000000,0.050505,0.212121,0.000000,0.125000,0.000000,0.181818,0.070707,0.030303,0.020202,0.016393,0.121212,0.131313,0.060606,0.040404,0.040404,0.161616,0.000000,0.121212,0.646465,0.040404,0.0,0.823529,0.030303,0.121212,0.212121,0.191919,0.216216,0.232323,0.131313,0.020619,0.212121,0.033333,0.208333,0.080808,0.000000,0.000000,0.000000,0.253521,0.363636,0.000000,0.111111,0.141414,0.626263,0.050505,0.000000,0.141414,0.000000,0.036145,0.0,0.0,0.224490,0.018182,0.000000,0.000000,0.000000,0.0,0.263158,0.000000,0.00000,0.000000,0.747475,0.989899,0.000000,0.000000,0.020202,1.000000,0.818182,0.414141,0.354839,0.307692,0.020202,0.151515,0.303030,0.676768,0.949495,0.060606,0.0,0.032258,0.030303,0.303030,0.636364,0.020202,0.0,0.020202,0.585859,0.424242,0.222222,1.000000,1.000000,0.380952,0.4,0.111111,0.090909,0.083333,0.052356,0.263158,0.142857,0.000634,0.000000,0.024390,0.0200,0.003003,0.020,0.008272,0.018738,0.157140,1.0,0.000000,0.245902,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0
76326,0.865385,1.0,0.333333,0.285714,0.0,0.000000,0.090909,0.545455,0.000000,0.863636,1.000000,0.5,0.000000,0.906250,0.000000,0.000017,0.624862,0.666667,1.000000,0.000000,0.000000,0.353535,0.292929,0.424242,0.171717,0.070707,0.034483,0.444444,0.045511,0.055583,0.051606,0.000000,0.717172,0.292929,0.494949,0.515152,0.979798,0.000000,0.030303,0.000000,0.010101,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.010309,0.000000,0.0,0.000000,0.452381,0.571429,0.595238,0.476190,0.595238,0.619048,0.346667,0.313131,0.434343,0.262626,0.070707,0.171717,0.202020,0.151515,0.141414,0.131313,0.131313,0.111111,0.141414,0.353535,0.242424,0.161616,0.313131,0.151515,0.303030,0.303030,0.363636,0.353535,0.212121,0.080808,0.020202,0.606061,0.121212,0.150685,0.161616,0.241538,0.337143,0.656566,0.646465,0.057143,0.151515,0.111111,0.060606,0.040404,0.030303,0.0,0.088833,0.097667,0.153846,0.153846,0.676768,0.333333,0.818182,0.191919,0.070707,0.333333,0.686869,0.575758,0.252525,0.818182,0.191919,0.080808,0.04,0.060606,0.141414,0.212121,0.030303,0.266667,0.525253,0.252525,0.000000,0.000000,0.000000,0.010101,0.010101,0.070707,0.191919,0.565657,0.000000,0.070707,0.363636,0.588235,0.566667,0.213115,0.100,0.111111,0.040404,0.191919,0.161616,0.070707,0.050505,0.040404,0.010101,0.050505,0.222222,0.505051,0.000000,0.723502,0.870602,0.164667,0.201333,0.194667,0.232000,0.070850,0.282828,0.222222,0.202020,0.171717,0.080808,0.030303,0.02,0.000000,0.010101,0.151515,0.222222,0.242424,0.222222,0.111111,0.030303,0.04,0.000000,0.010101,0.333333,0.050505,0.363636,0.111111,0.525253,0.484848,0.121212,0.717172,0.151515,0.000000,0.000000,0.000000,0.0,0.00,0.090909,0.040404,0.000000,0.060606,0.088889,0.184211,0.030303,0.242424,0.646465,0.727273,0.565657,0.696970,0.535354,0.767677,0.606061,0.838384,1.000000,0.030303,0.121212,0.131313,0.030303,0.090909,0.121212,0.023256,0.018182,0.161616,0.070707,0.121212,0.030303,0.114286,0.020202,0.080808,0.163934,0.050505,0.040404,0.066667,0.015625,0.020202,0.171717,0.050505,0.020202,0.060606,0.032787,0.111111,0.121212,0.101010,0.030303,0.161616,0.070707,0.030303,0.111111,0.525253,0.111111,0.0,0.705882,0.101010,0.141414,0.353535,0.212121,0.162162,0.101010,0.050505,0.000000,0.262626,0.100000,0.277778,0.040404,0.000000,0.000000,0.000000,0.239437,0.353535,0.033333,0.282828,0.161616,0.434343,0.070707,0.012048,0.080808,0.033333,0.168675,0.0,0.0,0.102041,0.036364,0.029412,0.010101,0.000000,0.0,0.052632,0.037037,0.00000,0.011111,0.494949,0.969697,0.020202,0.000000,0.030303,0.939394,0.585859,0.202020,0.193548,0.519231,0.000000,0.020202,0.151515,0.434343,0.555556,0.454545,0.0,0.000000,0.686869,0.070707,0.151515,0.010101,0.0,0.080808,0.858586,0.111111,0.767677,0.989899,0.929293,0.380952,0.4,0.070707,0.080808,0.466667,0.361257,0.315789,0.155844,0.011403,0.067797,0.268293,0.0030,0.001201,0.008,0.010110,0.005898,0.692061,0.0,1.000000,0.950820,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1
76327,0.730769,1.0,0.333333,0.214286,0.0,0.216495,0.545455,0.545455,0.909091,0.909091,1.000000,0.5,0.181818,0.895833,0.727273,0.000017,0.773196,0.666667,0.333333,0.004149,0.000000,0.595960,0.222222,0.313131,0.030303,0.030303,0.000000,0.333333,0.008044,0.010073,0.007965,0.000000,0.000000,1.000000,0.474747,0.535354,0.767677,0.232323,0.000000,0.000000,0.040404,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.030928,0.017544,0.0,0.000000,0.380952,0.488095,0.523810,0.392857,0.511905,0.559524,0.400000,0.373737,0.373737,0.262626,0.141414,0.191919,0.252525,0.171717,0.121212,0.101010,0.030303,0.151515,0.151515,0.292929,0.222222,0.191919,0.212121,0.080808,0.181818,0.171717,0.373737,0.464646,0.272727,0.141414,0.060606,0.565657,0.161616,0.082192,0.222222,0.290769,0.397143,0.656566,0.656566,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.077167,0.084500,0.307692,0.230769,0.878788,0.131313,0.959596,0.050505,0.333333,0.424242,0.797980,0.585859,0.272727,0.898990,0.111111,0.161616,0.06,0.131313,0.111111,0.161616,0.040404,0.200000,0.494949,0.121212,0.186047,0.121212,0.024691,0.010101,0.010101,0.010101,0.040404,0.434343,0.010101,0.020202,0.363636,0.576471,0.577778,0.229508,0.125,0.000000,0.000000,0.343434,0.080808,0.000000,0.000000,0.060606,0.030303,0.191919,0.545455,0.848485,0.618590,0.505376,0.606129,0.184667,0.182000,0.177333,0.170667,0.056399,0.252525,0.181818,0.292929,0.222222,0.050505,0.000000,0.00,0.000000,0.000000,0.252525,0.191919,0.333333,0.202020,0.030303,0.000000,0.00,0.000000,0.000000,0.292929,0.020202,0.232323,0.131313,0.434343,0.575758,0.050505,0.828283,0.080808,0.000000,0.000000,0.000000,0.0,0.00,0.060606,0.030303,0.000000,0.030303,0.188889,0.250000,0.000000,0.636364,0.636364,0.767677,0.515152,0.767677,0.515152,0.606061,0.303030,0.898990,0.000000,0.050505,0.030303,0.090909,0.020202,0.212121,0.141414,0.000000,0.000000,0.121212,0.000000,0.161616,0.030303,0.142857,0.101010,0.000000,0.000000,0.080808,0.090909,0.116667,0.000000,0.070707,0.353535,0.040404,0.070707,0.020202,0.000000,0.070707,0.060606,0.070707,0.000000,0.030303,0.030303,0.000000,0.141414,0.808081,0.000000,0.0,0.705882,0.121212,0.303030,0.404040,0.090909,0.081081,0.010101,0.040404,0.061856,0.181818,0.100000,0.236111,0.040404,0.000000,0.000000,0.000000,0.422535,0.595960,0.066667,0.222222,0.131313,0.313131,0.222222,0.000000,0.030303,0.000000,0.036145,0.0,0.0,0.204082,0.000000,0.000000,0.000000,0.000000,0.0,0.052632,0.000000,0.00000,0.022222,0.474747,0.979798,0.010101,0.000000,0.010101,1.000000,0.565657,0.090909,0.193548,0.192308,0.000000,0.232323,0.565657,0.888889,0.888889,0.121212,0.0,0.000000,0.000000,0.171717,0.666667,0.070707,0.0,0.090909,0.000000,1.000000,0.000000,1.000000,0.959596,0.238095,0.4,0.060606,0.070707,0.483333,0.350785,0.315789,0.168831,0.013462,0.093220,0.292683,0.0015,0.007007,0.005,0.002757,0.004873,0.233499,1.0,1.000000,0.606557,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0


In [182]:
from sklearn.utils import resample

category_0 = trainset[trainset['TARGET_B'] == 0]
category_1 = trainset[trainset['TARGET_B'] == 1]

#### Apply the Random Forests algorithm but this time only by upscaling the data to deal with the imbalance

In [183]:
category_1_oversampled = resample(category_1, 
                                  replace=True, 
                                  n_samples = len(category_0))

In [184]:
data_upsampled = pd.concat([category_0, category_1_oversampled], axis=0)

In [185]:
X_train_upsampled = data_upsampled.drop(['TARGET_B'], axis =1)
y_train_upsampled = data_upsampled['TARGET_B'] 

In [186]:
def model_eval(model,X_train,y_train,X_test,y_test):
    # Fitting
    model.fit(X_train, y_train)
    # Predicting
    predictions = model.predict(X_test)
    # Calculating confusion matrix
    cm =confusion_matrix (y_test, predictions)
    # Printing different evaluation metrics
    print ( str(model.base_estimator_)[:-2] + " score: ", model.score(X_test, y_test))
    print ( str(model.base_estimator_)[:-2] + " precision: ", precision_score(y_test, predictions))
    print ( str(model.base_estimator_)[:-2] + " recall: ", recall_score(y_test, predictions))
    print ( str(model.base_estimator_)[:-2] + " score: ", f1_score(y_test, predictions))
    print ('\n')
    print (cm)
    return model

In [187]:
# I will use this one, because is giving me the best results

# Fit the Random Forests algorithm on the training data
rfc = RandomForestClassifier(max_depth=5,
                             min_samples_split=20,
                             min_samples_leaf =20,
                             max_samples=0.2,
                             random_state = 0)
rfc.fit(X_train_upsampled, y_train_upsampled)
print('Train score: ', rfc.score(X_train_upsampled,y_train_upsampled))
print('Test score: ' , rfc.score(X_test_scaled,y_test))


Train score:  0.6208440250531138
Test score:  0.5990148299533616


In [188]:
# Make predictions on the test data
y_pred = rfc.predict(X_test_scaled)

print('accuracy:', accuracy_score(y_test, y_pred))
print("precision: ",precision_score(y_test,y_pred))
print("recall: ",recall_score(y_test,y_pred))
print("f1: ",f1_score(y_test,y_pred))

accuracy: 0.5990148299533616
precision:  0.06972833117723157
recall:  0.539
f1:  0.12348224513172969


In [189]:
rfc1 = RandomForestClassifier(max_depth=5,
                             min_samples_split=20,
                             min_samples_leaf =20,
                             max_samples=0.2,
                             random_state = 0)

#### Use Feature Selections that you have learned in class to decide if you want to use all of the features (Variance Threshold, RFE, PCA, etc.)

#### Variance Threshold

In [190]:
from sklearn.feature_selection import VarianceThreshold, RFE
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score

In [191]:
X_added_constant = sm.add_constant(X_train_upsampled)
X_added_constant
model = sm.OLS(y_train_upsampled,X_added_constant).fit()
model.summary()
#drop values higher that 0.8

0,1,2,3
Dep. Variable:,TARGET_B,R-squared:,0.073
Model:,OLS,Adj. R-squared:,0.071
Method:,Least Squares,F-statistic:,32.19
Date:,"Thu, 22 Jun 2023",Prob (F-statistic):,0.0
Time:,16:32:47,Log-Likelihood:,-99722.0
No. Observations:,144972,AIC:,200200.0
Df Residuals:,144617,BIC:,203700.0
Df Model:,354,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.8031,0.212,8.510,0.000,1.388,2.218
CLUSTER,0.2130,0.043,4.989,0.000,0.129,0.297
DATASRCE,-0.0130,0.005,-2.804,0.005,-0.022,-0.004
DOMAIN_B,-0.0982,0.013,-7.307,0.000,-0.124,-0.072
ODATEW_YR,0.0206,0.030,0.676,0.499,-0.039,0.080
ODATEW_MM,0.2717,0.054,5.029,0.000,0.166,0.378
DOB_YR,-0.0134,0.007,-1.863,0.062,-0.028,0.001
DOB_MM,0.0361,0.005,7.142,0.000,0.026,0.046
MINRDATE_YR,0.0653,0.018,3.726,0.000,0.031,0.100

0,1,2,3
Omnibus:,609952.301,Durbin-Watson:,0.146
Prob(Omnibus):,0.0,Jarque-Bera (JB):,17865.716
Skew:,-0.008,Prob(JB):,0.0
Kurtosis:,1.28,Cond. No.,41100.0


In [192]:
drop_list = ['MAXRDATE_MM', 'MALEVET', 'FEDGOV','WEALTH2','HUPA4', 'HHN1', 'HUPA6', 'ADI', 'IC4', 'IC22', 'TPE11', 'OCC13','EIC4', 'EIC8', 'EIC16','EC2', 'EC5','ANC6', 'HC6', 'HC11', 'HC12','HC15']

In [193]:
# Use variance threshold to remove low-variance features
variance_threshold = VarianceThreshold(threshold=0.02)
X_variance_threshold = variance_threshold.fit(X_train_upsampled)
X_variance_threshold_transform = variance_threshold.transform(X_train_upsampled)

var_list = list(X_variance_threshold.get_support())
list(zip(X_train_upsampled.columns, var_list))
[col[0] for col in zip(X_train_upsampled.columns, var_list) if col[1] == False]

removed_columns_variance_threshold = pd.DataFrame(data=(X_train_upsampled.columns,variance_threshold.variances_,X_variance_threshold.get_support()), index=('column_name','variance','statement')).T
removed_columns_variance_threshold = removed_columns_variance_threshold.loc[(removed_columns_variance_threshold['statement'] == False),:]
drop_list_variance_threshold = list(removed_columns_variance_threshold['column_name'])

In [194]:
drop_list_1 = drop_list_variance_threshold + drop_list

In [195]:
drop_list_1 = list(set(drop_list_1))
drop_list_1

['AGEC6',
 'CHILC4',
 'RHP2',
 'EIC14',
 'RHP4',
 'AFC1',
 'EIC4',
 'AFC3',
 'MINRDATE_YR',
 'HC10',
 'MAXRAMNT',
 'OEDC4',
 'ANC4',
 'HUR1',
 'TPE12',
 'CHILC5',
 'ETHC1',
 'ANC14',
 'ODATEW_MM',
 'EIC11',
 'EC6',
 'ANC8',
 'AGEC2',
 'IC18',
 'ANC11',
 'HC14',
 'ANC2',
 'LSC2',
 'EIC9',
 'IC19',
 'ETH12',
 'TPE10',
 'TPE11',
 'OCC7',
 'EIC6',
 'TIMELAG',
 'AVGGIFT',
 'LOCALGOV',
 'HHAGE2',
 'HC3',
 'EC4',
 'ETHC2',
 'RHP1',
 'IC22',
 'MAXRDATE_MM',
 'LFC10',
 'IC9',
 'WEALTH2',
 'EIC8',
 'PEC1',
 'RHP3',
 'ANC7',
 'AGE902',
 'AGE905',
 'TPE1',
 'NUMPROM',
 'HHD4',
 'HC20',
 'STATEGOV',
 'OEDC7',
 'HC9',
 'HHD11',
 'MARR4',
 'POP901',
 'ETH7',
 'TPE5',
 'HC12',
 'HU4',
 'TPE7',
 'VC4',
 'TPE9',
 'HHAS4',
 'EIC16',
 'ETHC6',
 'AFC6',
 'ANC13',
 'HHAGE3',
 'POP903',
 'HC6',
 'SEC2',
 'HHD10',
 'OCC12',
 'VOC3',
 'MALEVET',
 'HUPA6',
 'CHILC3',
 'NGIFTALL',
 'TPE4',
 'ETH4',
 'OEDC2',
 'HC21',
 'HHP2',
 'IC5',
 'EIC15',
 'HC11',
 'HC16',
 'DW3',
 'IC14',
 'ETH10',
 'IC17',
 'AGE901',
 'ET

In [196]:
X_train_upsampled_1= X_train_upsampled.drop(columns = drop_list_1)

In [197]:
X_test_1 = X_test_scaled.drop(columns = drop_list_1)


In [198]:
model_eval(rfc1,X_train_upsampled_1,y_train_upsampled,X_test_1,y_test)

DecisionTreeClassifier score:  0.6025782109731175
DecisionTreeClassifier precision:  0.07224532224532225
DecisionTreeClassifier recall:  0.556
DecisionTreeClassifier score:  0.12787488500459984


[[10943  7140]
 [  444   556]]




#### PCA

In [199]:
# Use PCA to reduce the dimensionality of the data
pca = PCA(n_components=5)
X_pca = pca.fit(X_train_upsampled)
X_pca_transform = pca.transform(X_train_upsampled)
X_test_pca = pca.transform(X_test_scaled)

In [200]:
model_eval(rfc1,X_pca_transform,y_train_upsampled,X_test_pca,y_test)

DecisionTreeClassifier score:  0.6343866268406435
DecisionTreeClassifier precision:  0.06857225350079399
DecisionTreeClassifier recall:  0.475
DecisionTreeClassifier score:  0.11984357259997476


[[11631  6452]
 [  525   475]]




In [201]:
#I see that using different tools the accurancy sometimes gets worst 

#### Re-run the Random Forest algorithm to determine if the Feature Selection has improved the results.

In [202]:
numerical =  X.select_dtypes(np.number)
categorical = X.select_dtypes(object)

In [203]:
num_scaled = pd.DataFrame(scaler.transform(numerical), columns = numerical.columns, index = numerical.index)

In [204]:
encoded = pd.DataFrame(enc.transform(categorical).toarray(), columns = column_names, index = categorical.index)

In [205]:
X_all = pd.concat([num_scaled, encoded], axis=1)

In [206]:
rfc.score(X_all,y)

0.6050077558378401

In [207]:
X['predicted_donate']=rfc.predict(X_all)

In [208]:
full_data = pd.concat([X, target], axis=1)
full_data

Unnamed: 0,STATE,CLUSTER,HOMEOWNR,GENDER,DATASRCE,RFA_2R,RFA_2A,GEOCODE2,DOMAIN_A,DOMAIN_B,ODATEW_YR,ODATEW_MM,DOB_YR,DOB_MM,MINRDATE_YR,MINRDATE_MM,MAXRDATE_YR,MAXRDATE_MM,LASTDATE_YR,LASTDATE_MM,FIRSTDATE_YR,FIRSTDATE_MM,TCODE,AGE,INCOME,WEALTH1,HIT,MALEMILI,MALEVET,VIETVETS,WWIIVETS,LOCALGOV,STATEGOV,FEDGOV,WEALTH2,POP901,POP902,POP903,POP90C1,POP90C2,POP90C3,POP90C4,POP90C5,ETH1,ETH2,ETH3,ETH4,ETH5,ETH6,ETH7,ETH8,ETH9,ETH10,ETH11,ETH12,ETH13,ETH14,ETH15,ETH16,AGE901,AGE902,AGE903,AGE904,AGE905,AGE906,AGE907,CHIL1,CHIL2,CHIL3,AGEC1,AGEC2,AGEC3,AGEC4,AGEC5,AGEC6,AGEC7,CHILC1,CHILC2,CHILC3,CHILC4,CHILC5,HHAGE1,HHAGE2,HHAGE3,HHN1,HHN2,HHN3,HHN4,HHN5,HHN6,MARR1,MARR2,MARR3,MARR4,HHP1,HHP2,DW1,DW2,DW3,DW4,DW5,DW6,DW7,DW8,DW9,HV1,HV2,HV3,HV4,HU1,HU2,HU3,HU4,HU5,HHD1,HHD2,HHD3,HHD4,HHD5,HHD6,HHD7,HHD8,HHD9,HHD10,HHD11,HHD12,ETHC1,ETHC2,ETHC3,ETHC4,ETHC5,ETHC6,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR1,HUR2,RHP1,RHP2,RHP3,RHP4,HUPA1,HUPA2,HUPA3,HUPA4,HUPA5,HUPA6,HUPA7,RP1,RP2,RP3,RP4,MSA,ADI,DMA,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,IC11,IC12,IC13,IC14,IC15,IC16,IC17,IC18,IC19,IC20,IC21,IC22,IC23,HHAS1,HHAS2,HHAS3,HHAS4,MC1,MC2,MC3,TPE1,TPE2,TPE3,TPE4,TPE5,TPE6,TPE7,TPE8,TPE9,PEC1,PEC2,TPE10,TPE11,TPE12,TPE13,LFC1,LFC2,LFC3,LFC4,LFC5,LFC6,LFC7,LFC8,LFC9,LFC10,OCC1,OCC2,OCC3,OCC4,OCC5,OCC6,OCC7,OCC8,OCC9,OCC10,OCC11,OCC12,OCC13,EIC1,EIC2,EIC3,EIC4,EIC5,EIC6,EIC7,EIC8,EIC9,EIC10,EIC11,EIC12,EIC13,EIC14,EIC15,EIC16,OEDC1,OEDC2,OEDC3,OEDC4,OEDC5,OEDC6,OEDC7,EC1,EC2,EC3,EC4,EC5,EC6,EC7,EC8,SEC1,SEC2,SEC3,SEC4,SEC5,AFC1,AFC2,AFC3,AFC4,AFC5,AFC6,VC1,VC2,VC3,VC4,ANC1,ANC2,ANC3,ANC4,ANC5,ANC6,ANC7,ANC8,ANC9,ANC10,ANC11,ANC12,ANC13,ANC14,ANC15,POBC1,POBC2,LSC1,LSC2,LSC3,LSC4,VOC1,VOC2,VOC3,HC1,HC2,HC3,HC4,HC5,HC6,HC7,HC8,HC9,HC10,HC11,HC12,HC13,HC14,HC15,HC16,HC17,HC18,HC19,HC20,HC21,MHUC1,MHUC2,AC1,AC2,CARDPROM,NUMPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MAXRAMNT,LASTGIFT,TIMELAG,AVGGIFT,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2,predicted_donate,TARGET_B,TARGET_D
0,IL,36,H,F,3,L,E,C,T,2,89,1,37,12,92,8,94,2,95,12,89,11,0,60.000000,5,9,0,0,39,34,18,10,2,1,5,992,264,332,0,35,65,47,53,92,1,0,0,11,0,0,0,0,0,0,0,11,0,0,0,39,48,51,40,50,54,25,31,42,27,11,14,18,17,13,11,15,12,11,34,25,18,26,10,23,18,33,49,28,12,4,61,7,12,19,198,276,97,95,2,2,0,0,7,7,0,479,635,3,2,86,14,96,4,7,38,80,70,32,84,16,6,2,5,9,15,3,17,50,25,0,0,0,2,7,13,27,47,0,1,61,58,61,15,4,2,0,0,14,1,0,0,2,5,17,73,0.0,177.0,682.0,307,318,349,378,12883,13,23,23,23,15,1,0,0,1,4,25,24,26,17,2,0,0,2,28,4,51,1,46,54,3,88,8,0,0,0,0,0,0,4,1,13,14,16,2,45,56,64,50,64,44,62,53,99,0,0,9,3,8,13,9,0,3,9,3,15,19,5,4,3,0,3,41,1,0,7,13,6,5,0,4,9,4,1,3,10,2,1,7,78,2,0,120,16,10,39,21,8,4,3,5,20,3,19,4,0,0,0,18,39,0,34,23,18,16,1,4,0,23,0,0,5,1,0,0,0,0,0,2,0,3,74,88,8,0,4,96,77,19,13,31,5,14,14,31,54,46,0,0,90,0,10,0,0,0,33,65,40,99,99,6,2,10,7,27,74,6,14,240.0,31,14,5.0,12.0,10.0,4,7.741935,95515,0,4,39,1,0,0.0
1,CA,14,H,M,3,L,G,A,S,1,94,1,52,2,93,10,95,12,95,12,93,10,1,46.000000,6,9,16,0,15,55,11,6,2,1,9,3611,940,998,99,0,0,50,50,67,0,0,31,6,4,2,6,4,14,0,0,2,0,1,4,34,41,43,32,42,45,32,33,46,21,13,14,33,23,10,4,2,11,16,36,22,15,12,1,5,4,21,75,55,23,9,69,4,3,24,317,360,99,99,0,0,0,0,0,0,0,5468,5218,12,10,96,4,97,3,9,59,94,88,55,95,5,4,1,3,5,4,2,18,44,5,0,0,0,97,98,98,98,99,94,0,83,76,73,21,5,0,0,0,4,0,0,0,91,91,91,94,4480.0,13.0,803.0,1088,1096,1026,1037,36175,2,6,2,5,15,14,13,10,33,2,5,2,5,15,14,14,10,32,6,2,66,3,56,44,9,80,14,0,0,0,0,0,0,6,0,2,24,32,12,71,70,83,58,81,57,64,57,99,99,0,22,24,4,21,13,2,1,6,0,4,1,0,3,1,0,6,13,1,2,8,18,11,4,3,4,10,7,11,1,6,2,1,16,69,5,2,160,5,5,12,21,7,30,20,14,24,4,24,10,0,0,0,8,15,0,55,10,11,0,0,2,0,3,1,1,2,3,1,1,0,3,0,0,0,42,39,50,7,27,16,99,92,53,5,10,2,26,56,97,99,0,0,0,96,0,4,0,0,0,99,0,99,99,99,20,4,6,5,12,32,6,13,47.0,3,1,10.0,25.0,25.0,18,15.666667,148535,0,2,1,0,0,0.0
2,NC,43,U,M,3,L,E,C,R,2,90,1,0,2,91,11,92,7,95,12,90,1,1,61.611649,3,1,2,0,20,29,33,6,8,1,1,7001,2040,2669,0,2,98,49,51,96,2,0,0,2,0,0,0,0,0,0,0,2,0,0,0,35,43,46,37,45,49,23,35,40,25,13,20,19,16,13,10,8,15,14,30,22,19,25,10,23,21,35,44,22,6,2,63,9,9,19,183,254,69,69,1,6,5,3,3,3,0,497,546,2,1,78,22,93,7,18,36,76,65,30,86,14,7,2,5,11,17,3,17,60,18,0,1,0,0,1,6,18,50,0,4,36,49,51,14,5,4,2,24,11,2,3,6,0,2,9,44,0.0,281.0,518.0,251,292,292,340,11576,32,18,20,15,12,2,0,0,1,20,19,24,18,16,2,0,0,1,28,8,31,11,38,62,8,74,22,0,0,0,0,0,2,2,1,21,19,24,6,61,65,73,59,70,56,78,62,82,99,4,10,5,2,6,12,0,1,9,5,18,20,5,7,6,0,11,33,4,3,2,12,3,3,2,0,7,8,3,3,6,7,1,8,74,3,1,120,22,20,28,16,6,5,3,1,23,1,16,6,0,0,0,10,21,0,28,23,32,8,1,14,1,5,0,0,7,0,0,0,0,0,1,0,0,2,84,96,3,0,0,92,65,29,9,22,3,12,23,50,69,31,0,0,0,6,35,44,0,15,22,77,17,97,92,9,2,6,5,26,63,6,14,202.0,27,14,2.0,16.0,5.0,12,7.481481,15078,1,4,60,1,0,0.0
3,CA,44,U,F,3,L,E,C,R,2,87,1,28,1,87,11,94,11,95,12,87,2,0,70.000000,1,4,2,0,23,14,31,3,0,3,0,640,160,219,0,8,92,54,46,61,0,0,11,32,6,2,0,0,0,0,0,31,0,0,1,32,40,44,34,43,47,25,45,35,20,15,25,17,17,12,7,7,20,17,30,14,19,25,11,23,23,27,50,30,15,8,63,9,6,23,199,283,85,83,3,4,1,0,2,0,2,1000,1263,2,1,48,52,93,7,6,36,73,61,30,84,16,6,3,3,21,12,4,13,36,13,0,0,0,10,25,50,69,92,10,15,42,55,50,15,5,4,0,9,42,4,0,5,1,8,17,34,9340.0,67.0,862.0,386,388,396,423,15130,27,12,4,26,22,5,0,0,4,35,5,6,12,30,6,0,0,5,22,14,26,20,46,54,3,58,36,0,0,0,0,0,6,0,0,17,13,15,0,43,69,81,53,68,45,33,31,0,99,23,17,3,0,6,6,0,0,13,42,12,0,0,0,42,0,6,3,0,0,0,23,3,3,6,0,3,3,3,3,3,0,3,6,87,0,0,120,28,12,14,27,10,3,5,0,19,1,17,0,0,0,0,13,23,0,14,40,31,16,0,1,0,13,0,0,4,0,0,0,3,0,0,0,0,29,67,56,41,3,0,94,43,27,4,38,0,10,19,39,45,55,0,0,45,22,17,0,0,16,23,77,22,93,89,16,2,6,6,27,66,6,14,109.0,16,7,2.0,11.0,10.0,9,6.812500,172556,1,4,41,1,0,0.0
4,FL,16,H,F,3,L,F,A,S,2,86,1,20,1,93,10,96,1,96,1,79,3,0,78.000000,3,2,60,1,28,9,53,26,3,2,9,2520,627,761,99,0,0,46,54,2,98,0,0,1,0,0,0,0,0,0,0,0,0,0,0,33,45,50,36,46,50,27,34,43,23,14,21,13,15,20,12,5,13,15,34,19,19,31,7,27,16,26,57,36,24,14,42,17,9,33,235,323,99,98,0,0,0,0,0,0,0,576,594,4,3,90,10,97,3,0,42,82,49,22,92,8,20,3,17,9,23,1,1,1,0,21,58,19,0,1,2,16,67,0,2,45,52,53,16,6,0,0,0,9,0,0,0,25,58,74,83,5000.0,127.0,528.0,240,250,293,321,9836,24,29,23,13,4,4,0,0,2,21,30,22,16,4,5,0,0,3,35,8,11,14,20,80,4,73,22,1,1,0,0,0,3,1,2,1,24,27,3,76,61,73,51,65,49,80,31,81,99,10,17,8,2,6,15,3,7,22,2,9,0,7,2,2,0,6,1,5,2,2,12,2,7,6,4,15,29,4,3,26,3,2,7,49,12,1,120,16,20,30,13,3,12,5,2,26,1,20,7,1,1,1,15,28,4,9,16,53,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,65,99,0,0,0,90,45,18,25,34,0,1,3,6,33,67,0,0,9,14,72,3,0,0,99,1,21,99,96,6,2,7,11,43,113,10,25,254.0,37,8,3.0,15.0,15.0,14,6.864865,7112,1,2,26,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,other,27,H,M,3,L,G,C,C,2,96,1,0,2,96,2,96,2,96,2,96,2,1,61.611649,5,9,0,14,36,47,11,7,8,13,9,27380,7252,10037,99,0,0,50,50,78,10,6,4,5,0,0,0,1,1,0,0,3,1,0,2,28,35,38,29,38,41,30,45,37,18,16,31,25,15,8,3,1,20,18,31,18,13,7,3,5,20,32,48,28,10,4,58,15,3,24,195,271,54,38,8,32,24,14,0,0,0,988,1025,6,6,56,44,89,11,3,44,72,56,32,83,17,12,3,10,16,15,8,19,55,5,3,6,0,2,10,49,73,92,0,4,40,52,53,15,4,24,8,13,14,15,12,3,69,84,92,97,380.0,0.0,743.0,433,481,499,535,18807,11,13,13,21,22,13,4,2,2,9,11,11,21,24,16,4,2,2,9,6,70,6,63,37,27,76,15,2,2,0,0,0,5,2,1,2,18,20,2,69,81,89,73,83,69,69,57,61,94,7,15,16,5,10,21,0,3,11,1,11,2,3,3,1,4,6,4,7,3,3,17,7,5,3,1,9,8,7,14,7,8,13,6,59,7,0,136,2,7,28,33,8,15,8,3,26,2,19,8,8,15,2,20,35,5,48,15,11,25,1,5,1,9,0,0,4,1,1,1,0,0,1,1,0,4,26,92,3,2,4,95,60,19,3,14,0,7,32,78,91,9,6,5,86,1,12,0,0,1,93,7,98,99,98,16,4,4,3,6,14,5,12,25.0,1,0,25.0,25.0,25.0,9,25.000000,184568,0,1,12,0,0,0.0
95408,TX,24,H,M,3,L,F,A,C,1,96,1,50,1,96,3,96,3,96,3,96,3,1,48.000000,7,9,1,0,31,43,19,4,1,0,9,1254,322,361,96,0,4,51,49,91,3,0,2,6,1,0,1,0,0,0,0,5,0,0,1,30,40,40,28,41,43,39,33,42,25,9,19,43,17,7,4,2,10,16,35,23,16,9,2,7,10,20,70,52,25,6,73,4,2,20,307,346,89,88,1,1,0,0,0,0,0,1679,1723,3,3,88,12,97,3,0,63,89,85,60,96,4,2,1,1,7,5,1,28,58,5,2,2,0,18,71,88,91,97,5,1,77,82,75,20,4,1,0,10,7,1,0,5,16,26,44,79,3360.0,201.0,618.0,806,836,802,849,26538,8,9,7,6,11,29,13,2,15,10,0,8,2,13,35,16,3,13,8,5,61,7,83,17,36,80,4,4,4,0,0,0,6,5,3,3,25,32,10,61,73,88,56,87,52,48,43,99,0,0,18,31,0,13,17,0,1,2,4,6,0,3,5,1,8,8,9,3,7,9,13,9,6,0,0,4,7,13,3,4,1,0,4,78,12,0,160,1,6,12,24,7,36,14,9,35,5,32,7,0,0,0,21,31,8,43,5,19,15,1,12,1,14,0,0,4,0,0,1,0,0,0,1,0,2,51,94,3,0,2,99,84,29,4,7,2,55,90,94,94,6,0,0,82,2,16,0,0,0,69,31,67,99,97,18,5,3,2,4,10,3,8,20.0,1,0,20.0,20.0,20.0,9,20.000000,122706,1,1,2,0,0,0.0
95409,MI,30,H,M,3,L,E,B,C,3,95,1,38,1,96,3,95,1,96,10,94,10,1,60.000000,5,9,0,0,18,46,20,7,23,0,9,552,131,205,99,0,0,53,47,82,14,0,1,9,0,0,0,0,0,0,0,9,0,0,0,28,35,37,30,41,44,32,46,38,17,13,34,21,9,9,9,4,21,17,32,20,10,18,7,17,27,29,44,31,14,5,45,19,5,31,179,268,96,95,1,2,1,0,0,0,0,376,377,4,3,66,34,95,5,10,37,64,43,21,80,20,16,2,14,21,20,9,20,49,12,7,7,1,0,0,0,1,9,0,2,45,51,54,14,5,2,0,0,31,2,0,0,3,34,78,91,4040.0,61.0,551.0,263,264,319,345,12178,21,26,20,18,12,0,3,0,0,26,18,17,11,21,0,6,0,0,10,13,26,26,43,57,3,83,17,0,0,0,0,0,0,0,0,25,17,17,0,69,69,70,69,70,69,77,24,62,0,25,5,13,9,5,22,0,2,14,0,13,9,5,2,0,0,4,14,3,11,0,10,5,2,0,5,6,19,3,19,7,23,0,0,52,18,0,120,5,3,51,23,7,11,0,6,32,4,27,7,0,0,0,9,18,0,46,0,20,20,2,8,0,14,0,0,0,1,0,0,0,0,1,0,0,6,82,92,5,3,0,93,42,12,6,51,0,0,0,0,0,99,0,0,97,0,0,0,0,4,99,0,99,99,99,5,2,3,11,14,33,7,17,58.0,7,4,3.0,10.0,10.0,3,8.285714,189641,1,3,34,1,0,0.0
95410,CA,24,H,F,2,L,F,A,C,1,86,1,40,5,90,11,96,8,97,1,86,12,0,58.000000,7,9,0,0,28,35,20,9,1,1,7,1746,432,508,99,0,0,47,53,92,1,1,5,8,0,1,2,0,1,0,0,5,0,0,3,34,42,45,36,45,49,25,38,40,22,12,21,21,18,12,7,9,13,16,34,20,17,20,4,16,9,26,65,41,17,6,56,9,8,27,262,324,99,99,0,0,0,0,5,4,1,2421,2459,11,10,88,12,99,1,0,44,85,71,36,84,16,8,2,6,9,12,6,19,56,16,0,0,0,89,96,99,99,99,9,0,90,65,68,18,5,0,0,0,12,0,0,0,88,88,90,91,8735.0,13.0,803.0,552,544,568,556,15948,7,4,11,18,38,15,5,3,0,4,6,15,19,38,13,4,3,0,25,2,46,3,43,57,9,80,11,0,0,0,0,1,2,6,0,24,18,28,11,52,73,88,60,85,57,70,54,99,99,0,14,16,6,16,17,0,2,12,1,11,2,0,2,1,0,2,22,4,6,4,19,4,7,2,4,6,7,9,4,9,1,1,7,72,8,2,140,7,6,20,35,12,15,5,6,29,4,21,10,0,0,0,13,28,1,35,18,20,8,0,3,1,9,0,0,2,6,1,2,0,0,0,0,0,14,50,83,8,4,5,99,85,43,9,25,0,0,6,17,99,1,0,0,99,0,1,0,0,0,99,0,99,99,99,12,3,6,3,36,127,9,31,498.0,41,18,5.0,21.0,18.0,4,12.146341,4693,1,4,11,1,1,18.0


In [209]:
full_data.to_csv("full_data.csv")

Discuss the output and its impact in the business scenario. Is the cost of a false positive equals to the cost of the false negative? How would you change your algorithm or data in order to maximize the return of the business?

In [2]:
#The result is good but if we have more false positives means that we will wasted the resources on someone who is not likely to donate, and in the false negative we could lose a potential donor 