In [1]:
# Begin the Modeling here**
# - Look critically at the dtypes of numerical and categorical columns and make changes where appropriate.
# - Concatenate numerical and categorical back together again for your X dataframe.  Designate the TargetB as y.
#   - Split the data into a training set and a test set.
#   - Split further into train_num and train_cat.  Also test_num and test_cat.
#   - Scale the features either by using MinMax Scaler or a Standard Scaler. (train_num, test_num)
#   - Encode the categorical features using One-Hot Encoding or Ordinal Encoding.  (train_cat, test_cat)
#       - **fit** only on train data, transform both train and test
#       - again re-concatenate train_num and train_cat as X_train as well as test_num and test_cat as X_test
#   - Fit a logistic regression (classification) model on the training data.
#   - Check the accuracy on the test data.

# **Note**: So far we have not balanced the data.

# Import Libraries

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import VarianceThreshold 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier
pd.set_option('display.max_columns',None)

In [3]:
categorical = pd.read_csv('categorical.csv')
numerical = pd.read_csv('numerical.csv')
target = pd.read_csv('target.csv')

In [4]:
categorical.dtypes

STATE           object
CLUSTER          int64
HOMEOWNR        object
GENDER          object
DATASRCE         int64
RFA_2R          object
RFA_2A          object
GEOCODE2        object
DOMAIN_A        object
DOMAIN_B         int64
ODATEW_YR        int64
ODATEW_MM        int64
DOB_YR           int64
DOB_MM           int64
MINRDATE_YR      int64
MINRDATE_MM      int64
MAXRDATE_YR      int64
MAXRDATE_MM      int64
LASTDATE_YR      int64
LASTDATE_MM      int64
FIRSTDATE_YR     int64
FIRSTDATE_MM     int64
dtype: object

In [5]:
categorical

Unnamed: 0,STATE,CLUSTER,HOMEOWNR,GENDER,DATASRCE,RFA_2R,RFA_2A,GEOCODE2,DOMAIN_A,DOMAIN_B,ODATEW_YR,ODATEW_MM,DOB_YR,DOB_MM,MINRDATE_YR,MINRDATE_MM,MAXRDATE_YR,MAXRDATE_MM,LASTDATE_YR,LASTDATE_MM,FIRSTDATE_YR,FIRSTDATE_MM
0,IL,36,H,F,3,L,E,C,T,2,89,1,37,12,92,8,94,2,95,12,89,11
1,CA,14,H,M,3,L,G,A,S,1,94,1,52,2,93,10,95,12,95,12,93,10
2,NC,43,U,M,3,L,E,C,R,2,90,1,0,2,91,11,92,7,95,12,90,1
3,CA,44,U,F,3,L,E,C,R,2,87,1,28,1,87,11,94,11,95,12,87,2
4,FL,16,H,F,3,L,F,A,S,2,86,1,20,1,93,10,96,1,96,1,79,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,other,27,H,M,3,L,G,C,C,2,96,1,0,2,96,2,96,2,96,2,96,2
95408,TX,24,H,M,3,L,F,A,C,1,96,1,50,1,96,3,96,3,96,3,96,3
95409,MI,30,H,M,3,L,E,B,C,3,95,1,38,1,96,3,95,1,96,10,94,10
95410,CA,24,H,F,2,L,F,A,C,1,86,1,40,5,90,11,96,8,97,1,86,12


In [6]:
# - Look critically at the dtypes of numerical and categorical columns and make changes where appropriate.
# By looking categorical, changed datatypes whereever it is necessary
categorical_columns = ['CLUSTER','DATASRCE','DOMAIN_B','ODATEW_YR','ODATEW_MM','DOB_YR','DOB_MM','MINRDATE_YR','MINRDATE_MM','MAXRDATE_YR','MAXRDATE_MM','LASTDATE_YR','LASTDATE_MM','FIRSTDATE_YR','FIRSTDATE_MM']
categorical[categorical_columns] = categorical[categorical_columns].astype('object')

In [7]:
categorical.dtypes

STATE           object
CLUSTER         object
HOMEOWNR        object
GENDER          object
DATASRCE        object
RFA_2R          object
RFA_2A          object
GEOCODE2        object
DOMAIN_A        object
DOMAIN_B        object
ODATEW_YR       object
ODATEW_MM       object
DOB_YR          object
DOB_MM          object
MINRDATE_YR     object
MINRDATE_MM     object
MAXRDATE_YR     object
MAXRDATE_MM     object
LASTDATE_YR     object
LASTDATE_MM     object
FIRSTDATE_YR    object
FIRSTDATE_MM    object
dtype: object

In [8]:
numerical.dtypes.values

array([dtype('int64'), dtype('float64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
    

In [9]:
# Scaling the numerical data before feature selection

In [10]:
scaler = MinMaxScaler()
numerical_scaled = scaler.fit_transform(numerical)

In [11]:
numerical_scaled = pd.DataFrame(numerical_scaled,columns = numerical.columns)

# Feature Selection (Variance Threshold)

In [12]:
var_threshold = 0.02
sel = VarianceThreshold(threshold=(var_threshold))

In [13]:
# 1) default is threshold = 0 :eliminate all coumns that are identical for all rows
# 2) in practice we would scale the columns first, and then apply threshold, or apply 
#    a different threshold for different columns

# This drops the columns that have a variance less than this threshold
sel = sel.fit(numerical_scaled)
new_numerical_scaled = sel.transform(numerical_scaled)
var_list = list(sel.get_support())
var_list
cols = [col[0] for col in zip(numerical.columns, var_list) if col[1] == True]
new_numerical_scaled = pd.DataFrame(new_numerical_scaled,columns = cols)

print(numerical_scaled.shape)
print(new_numerical_scaled.shape)

(95412, 315)
(95412, 79)


In [14]:
# concate both categorical and numerical 
# X-y Split
X= pd.concat([categorical,new_numerical_scaled],axis = 1)
X

Unnamed: 0,STATE,CLUSTER,HOMEOWNR,GENDER,DATASRCE,RFA_2R,RFA_2A,GEOCODE2,DOMAIN_A,DOMAIN_B,ODATEW_YR,ODATEW_MM,DOB_YR,DOB_MM,MINRDATE_YR,MINRDATE_MM,MAXRDATE_YR,MAXRDATE_MM,LASTDATE_YR,LASTDATE_MM,FIRSTDATE_YR,FIRSTDATE_MM,AGE,INCOME,WEALTH1,VIETVETS,WWIIVETS,WEALTH2,POP90C1,POP90C2,POP90C3,ETH1,ETH2,HHN3,DW1,DW2,DW4,DW5,DW6,HV1,HV2,HV3,HV4,HU1,HU2,HU5,HHD2,HHD3,HHD5,ETHC2,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR2,HUPA2,HUPA3,HUPA6,RP1,RP2,RP3,RP4,MSA,ADI,IC6,HHAS3,MC1,MC2,PEC2,TPE13,LFC2,LFC4,LFC6,LFC7,LFC8,LFC9,VC1,VC3,POBC2,LSC1,VOC2,HC2,HC4,HC5,HC6,HC7,HC8,HC11,HC13,HC17,HC18,HC19,MHUC1,MHUC2,CARDPROM,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2
0,IL,36,H,F,3,L,E,C,T,2,89,1,37,12,92,8,94,2,95,12,89,11,0.608247,0.666667,1.000000,0.343434,0.181818,0.555556,0.000000,0.353535,0.656566,0.929293,0.010101,0.494949,0.979798,0.959596,0.020202,0.000000,0.000000,0.079833,0.105833,0.230769,0.153846,0.868687,0.141414,0.070707,0.808081,0.707071,0.848485,0.505051,0.020202,0.070707,0.131313,0.272727,0.474747,0.000000,0.616162,0.000000,0.000000,0.000000,0.020202,0.050505,0.171717,0.737374,0.000000,0.271889,0.131313,0.515152,0.464646,0.545455,0.131313,0.454545,0.646465,0.646465,0.626263,0.535354,1.000000,0.000000,0.343434,0.181818,0.747475,0.888889,0.777778,0.596154,0.141414,0.141414,0.313131,0.545455,0.464646,0.909091,0.101010,0.333333,0.656566,0.404040,0.285714,0.4,0.433333,0.498045,0.0,1.000000,0.622951
1,CA,14,H,M,3,L,G,A,S,1,94,1,52,2,93,10,95,12,95,12,93,10,0.463918,0.833333,1.000000,0.555556,0.111111,1.000000,1.000000,0.000000,0.000000,0.676768,0.000000,0.757576,1.000000,1.000000,0.000000,0.000000,0.000000,0.911333,0.869667,0.923077,0.769231,0.969697,0.040404,0.090909,0.949495,0.888889,0.959596,0.444444,0.979798,0.989899,0.989899,0.989899,1.000000,0.949495,0.838384,0.000000,0.000000,0.000000,0.919192,0.919192,0.919192,0.949495,0.478632,0.019969,0.020202,0.666667,0.565657,0.444444,0.020202,0.717172,0.838384,0.818182,0.646465,0.575758,1.000000,1.000000,0.555556,0.111111,0.393939,0.505051,0.929293,0.192308,0.262626,0.565657,0.979798,1.000000,0.000000,0.969697,0.040404,1.000000,0.000000,1.000000,0.952381,0.8,0.183333,0.774510,0.0,0.333333,0.000000
2,NC,43,U,M,3,L,E,C,R,2,90,1,0,2,91,11,92,7,95,12,90,1,0.624862,0.333333,0.111111,0.292929,0.333333,0.111111,0.000000,0.020202,0.989899,0.969697,0.020202,0.444444,0.696970,0.696970,0.060606,0.050505,0.030303,0.082833,0.091000,0.153846,0.076923,0.787879,0.222222,0.181818,0.767677,0.656566,0.868687,0.606061,0.000000,0.010101,0.060606,0.181818,0.505051,0.000000,0.363636,0.020202,0.242424,0.030303,0.000000,0.020202,0.090909,0.444444,0.000000,0.431644,0.323232,0.313131,0.383838,0.626263,0.212121,0.616162,0.737374,0.707071,0.787879,0.626263,0.828283,1.000000,0.282828,0.323232,0.848485,0.969697,0.656566,0.423077,0.121212,0.232323,0.505051,0.696970,0.313131,0.000000,0.353535,0.222222,0.777778,0.171717,0.428571,0.4,0.416667,0.078617,1.0,1.000000,0.967213
3,CA,44,U,F,3,L,E,C,R,2,87,1,28,1,87,11,94,11,95,12,87,2,0.711340,0.000000,0.444444,0.141414,0.313131,0.000000,0.000000,0.080808,0.929293,0.616162,0.000000,0.505051,0.858586,0.838384,0.040404,0.010101,0.000000,0.166667,0.210500,0.153846,0.076923,0.484848,0.525253,0.060606,0.737374,0.616162,0.848485,0.363636,0.101010,0.252525,0.505051,0.696970,0.929293,0.101010,0.424242,0.000000,0.090909,0.000000,0.010101,0.080808,0.171717,0.343434,0.997863,0.102919,0.272727,0.262626,0.464646,0.545455,0.171717,0.434343,0.818182,0.686869,0.333333,0.313131,0.000000,1.000000,0.141414,0.313131,0.676768,0.565657,0.434343,0.730769,0.101010,0.191919,0.393939,0.454545,0.555556,0.454545,0.171717,0.232323,0.777778,0.222222,0.761905,0.4,0.433333,0.899764,1.0,1.000000,0.655738
4,FL,16,H,F,3,L,F,A,S,2,86,1,20,1,93,10,96,1,96,1,79,3,0.793814,0.333333,0.222222,0.090909,0.535354,1.000000,1.000000,0.000000,0.000000,0.020202,0.989899,0.575758,1.000000,0.989899,0.000000,0.000000,0.000000,0.096000,0.099000,0.307692,0.230769,0.909091,0.101010,0.000000,0.828283,0.494949,0.929293,0.010101,0.000000,0.010101,0.020202,0.161616,0.676768,0.000000,0.454545,0.000000,0.000000,0.000000,0.252525,0.585859,0.747475,0.838384,0.534188,0.195084,0.242424,0.111111,0.202020,0.808081,0.010101,0.767677,0.737374,0.656566,0.808081,0.313131,0.818182,1.000000,0.090909,0.535354,0.656566,1.000000,0.454545,0.653846,0.010101,0.030303,0.060606,0.333333,0.676768,0.090909,0.727273,1.000000,0.010101,0.212121,0.285714,0.4,0.700000,0.037079,1.0,0.333333,0.409836
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,other,27,H,M,3,L,G,C,C,2,96,1,0,2,96,2,96,2,96,2,96,2,0.624862,0.666667,1.000000,0.474747,0.111111,1.000000,1.000000,0.000000,0.000000,0.787879,0.101010,0.484848,0.545455,0.383838,0.323232,0.242424,0.141414,0.164667,0.170833,0.461538,0.461538,0.565657,0.444444,0.030303,0.727273,0.565657,0.838384,0.555556,0.020202,0.101010,0.494949,0.737374,0.929293,0.000000,0.404040,0.080808,0.131313,0.121212,0.696970,0.848485,0.929293,0.979798,0.040598,0.000000,0.111111,0.707071,0.636364,0.373737,0.020202,0.696970,0.898990,0.838384,0.696970,0.575758,0.616162,0.949495,0.484848,0.111111,0.262626,0.929293,0.606061,0.269231,0.070707,0.323232,0.787879,0.919192,0.090909,0.868687,0.121212,0.939394,0.070707,0.989899,0.761905,0.8,0.083333,0.962399,0.0,0.000000,0.180328
95408,TX,24,H,M,3,L,F,A,C,1,96,1,50,1,96,3,96,3,96,3,96,3,0.484536,1.000000,1.000000,0.434343,0.191919,1.000000,0.969697,0.000000,0.040404,0.919192,0.030303,0.707071,0.898990,0.888889,0.010101,0.000000,0.000000,0.279833,0.287167,0.230769,0.230769,0.888889,0.121212,0.000000,0.898990,0.858586,0.969697,0.585859,0.181818,0.717172,0.888889,0.919192,0.979798,0.050505,0.777778,0.000000,0.101010,0.000000,0.161616,0.262626,0.444444,0.797980,0.358974,0.308756,0.080808,0.616162,0.838384,0.171717,0.030303,0.616162,0.888889,0.878788,0.484848,0.434343,1.000000,0.000000,0.434343,0.191919,0.515152,0.949495,0.848485,0.134615,0.555556,0.909091,0.949495,0.949495,0.060606,0.828283,0.161616,0.696970,0.313131,0.676768,0.857143,1.0,0.050000,0.639828,1.0,0.000000,0.016393
95409,MI,30,H,M,3,L,E,B,C,3,95,1,38,1,96,3,95,1,96,10,94,10,0.608247,0.666667,1.000000,0.464646,0.202020,1.000000,1.000000,0.000000,0.000000,0.828283,0.141414,0.444444,0.969697,0.959596,0.020202,0.010101,0.000000,0.062667,0.062833,0.307692,0.230769,0.666667,0.343434,0.101010,0.646465,0.434343,0.808081,0.494949,0.000000,0.000000,0.000000,0.010101,0.090909,0.000000,0.454545,0.000000,0.000000,0.000000,0.030303,0.343434,0.787879,0.919192,0.431624,0.093702,0.212121,0.262626,0.434343,0.575758,0.252525,0.696970,0.707071,0.707071,0.777778,0.242424,0.626263,0.000000,0.464646,0.202020,0.828283,0.929293,0.424242,0.980769,0.000000,0.000000,0.000000,0.000000,1.000000,0.979798,0.000000,1.000000,0.000000,1.000000,0.238095,0.4,0.216667,0.988852,1.0,0.666667,0.540984
95410,CA,24,H,F,2,L,F,A,C,1,86,1,40,5,90,11,96,8,97,1,86,12,0.587629,1.000000,1.000000,0.353535,0.202020,0.777778,1.000000,0.000000,0.000000,0.929293,0.010101,0.656566,1.000000,1.000000,0.000000,0.000000,0.000000,0.403500,0.409833,0.846154,0.769231,0.888889,0.121212,0.000000,0.858586,0.717172,0.848485,0.565657,0.898990,0.969697,1.000000,1.000000,1.000000,0.090909,0.909091,0.000000,0.000000,0.000000,0.888889,0.888889,0.909091,0.919192,0.933226,0.019969,0.070707,0.464646,0.434343,0.575758,0.242424,0.525253,0.888889,0.858586,0.707071,0.545455,1.000000,1.000000,0.353535,0.202020,0.505051,0.838384,0.858586,0.480769,0.000000,0.060606,0.171717,1.000000,0.010101,1.000000,0.010101,1.000000,0.000000,1.000000,0.571429,0.6,0.583333,0.024466,1.0,1.000000,0.163934


In [15]:
y = target.drop('TARGET_D',axis=1)
y

Unnamed: 0,TARGET_B
0,0
1,0
2,0
3,0
4,0
...,...
95407,0
95408,0
95409,0
95410,1


# Train-Test-Split

In [16]:

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [17]:
train_num = X_train.select_dtypes(include = 'number')
test_num = X_test.select_dtypes(include = 'number')
train_cat = X_train.select_dtypes(object)
test_cat = X_test.select_dtypes(object)

# Scale numerical data (Min-Max Scaler)

In [18]:
scaler = MinMaxScaler()
transformer = scaler.fit(train_num)
train_num_scaled = transformer.transform(train_num)
test_num_scaled = transformer.transform(test_num)

In [19]:
train_num_scaled_df = pd.DataFrame(train_num_scaled, columns=train_num.columns, index=train_num.index)
test_num_scaled_df = pd.DataFrame(test_num_scaled, columns=test_num.columns, index=test_num.index)
train_num_scaled_df

Unnamed: 0,AGE,INCOME,WEALTH1,VIETVETS,WWIIVETS,WEALTH2,POP90C1,POP90C2,POP90C3,ETH1,ETH2,HHN3,DW1,DW2,DW4,DW5,DW6,HV1,HV2,HV3,HV4,HU1,HU2,HU5,HHD2,HHD3,HHD5,ETHC2,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR2,HUPA2,HUPA3,HUPA6,RP1,RP2,RP3,RP4,MSA,ADI,IC6,HHAS3,MC1,MC2,PEC2,TPE13,LFC2,LFC4,LFC6,LFC7,LFC8,LFC9,VC1,VC3,POBC2,LSC1,VOC2,HC2,HC4,HC5,HC6,HC7,HC8,HC11,HC13,HC17,HC18,HC19,MHUC1,MHUC2,CARDPROM,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2
24301,0.624862,0.000000,0.111111,0.191919,0.202020,0.666667,1.000000,0.0,0.000000,0.959596,0.010101,0.262626,0.222222,0.212121,0.696970,0.646465,0.575758,0.126167,0.134000,0.307692,0.307692,0.232323,0.777778,0.020202,0.353535,0.242424,0.424242,0.696970,0.010101,0.060606,0.151515,0.515152,0.878788,0.000000,0.161616,0.505051,0.080808,0.555556,0.161616,0.454545,0.707071,0.959596,0.091880,0.161290,0.383838,0.383838,0.868687,0.141414,0.090909,0.404040,0.747475,0.727273,0.555556,0.313131,0.696970,1.0,0.191919,0.202020,0.515152,0.909091,0.484848,0.250000,0.181818,0.363636,0.535354,0.636364,0.373737,0.090909,0.818182,1.000000,0.000000,0.959596,0.285714,0.4,0.283333,0.941484,1.0,0.000000,0.573770
2977,0.742268,0.666667,1.000000,0.505051,0.252525,0.888889,1.000000,0.0,0.000000,0.909091,0.030303,0.525253,0.757576,0.747475,0.252525,0.242424,0.202020,0.233667,0.246000,0.307692,0.384615,0.727273,0.282828,0.000000,0.828283,0.717172,0.909091,0.595960,0.101010,0.383838,0.939394,1.000000,1.000000,0.010101,0.737374,0.060606,0.000000,0.171717,0.333333,0.696970,0.979798,0.979798,0.811966,0.161290,0.060606,0.545455,0.444444,0.565657,0.121212,0.636364,0.818182,0.797980,0.737374,0.626263,0.949495,1.0,0.505051,0.252525,0.353535,0.898990,0.727273,0.326923,0.010101,0.101010,0.717172,1.000000,0.010101,0.707071,0.282828,1.000000,0.000000,1.000000,0.476190,0.6,0.533333,0.072955,1.0,0.666667,0.065574
7578,0.515464,0.666667,0.555556,0.000000,0.323232,0.555556,0.000000,1.0,0.000000,1.000000,0.000000,0.383838,0.585859,0.575758,0.404040,0.292929,0.101010,0.112500,0.112667,0.230769,0.230769,0.585859,0.424242,0.000000,0.646465,0.434343,0.767677,0.636364,0.000000,0.000000,0.050505,0.343434,0.818182,0.000000,0.393939,0.050505,0.010101,0.101010,0.030303,0.181818,0.494949,0.757576,0.230769,0.087558,0.383838,0.383838,0.424242,0.585859,0.050505,0.414141,0.898990,0.797980,0.868687,0.828283,0.000000,1.0,0.000000,0.323232,0.838384,0.979798,0.474747,0.884615,0.000000,0.000000,0.212121,0.373737,0.636364,0.818182,0.121212,0.959596,0.050505,1.000000,0.238095,0.6,0.350000,0.013255,1.0,0.666667,0.852459
89185,0.752577,0.000000,1.000000,0.161616,0.555556,1.000000,0.717172,0.0,0.292929,0.949495,0.020202,0.404040,0.909091,0.777778,0.101010,0.101010,0.090909,0.198667,0.199667,0.538462,0.538462,0.929293,0.080808,0.232323,0.828283,0.747475,0.878788,0.545455,0.010101,0.313131,0.585859,0.838384,1.000000,0.000000,0.515152,0.010101,0.000000,0.030303,0.898990,0.909091,0.919192,0.929293,0.957265,0.198157,0.101010,0.595960,0.858586,0.151515,0.101010,0.808081,0.696970,0.676768,0.606061,0.525253,1.000000,0.0,0.161616,0.555556,0.151515,0.797980,0.666667,0.076923,0.787879,0.898990,1.000000,1.000000,0.010101,0.000000,0.989899,1.000000,0.000000,1.000000,0.476190,0.6,0.400000,0.987491,0.0,0.000000,0.295082
7970,0.793814,0.500000,1.000000,0.363636,0.252525,1.000000,0.888889,0.0,0.121212,0.949495,0.040404,0.595960,0.989899,0.989899,0.000000,0.000000,0.000000,0.109500,0.114833,0.307692,0.307692,0.929293,0.080808,0.292929,0.888889,0.797980,0.949495,0.686869,0.000000,0.020202,0.060606,0.181818,0.939394,0.000000,0.535354,0.000000,0.010101,0.000000,0.131313,0.616162,0.878788,0.919192,0.162393,0.428571,0.101010,0.323232,0.545455,0.464646,0.242424,0.929293,0.828283,0.767677,0.767677,0.656566,1.000000,1.0,0.363636,0.252525,0.777778,1.000000,0.818182,0.365385,0.060606,0.090909,0.595960,0.959596,0.050505,0.151515,0.777778,0.838384,0.171717,0.000000,0.333333,0.4,0.166667,0.097698,0.0,0.333333,0.622951
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14197,0.567010,0.666667,1.000000,0.484848,0.232323,0.111111,1.000000,0.0,0.000000,0.767677,0.121212,0.494949,0.797980,0.787879,0.000000,0.000000,0.000000,0.057000,0.057000,0.230769,0.153846,0.626263,0.383838,0.020202,0.767677,0.555556,0.888889,0.444444,0.000000,0.000000,0.000000,0.000000,0.060606,0.000000,0.171717,0.000000,0.202020,0.000000,0.000000,0.010101,0.262626,0.767677,0.448718,0.622120,0.424242,0.171717,0.656566,0.353535,0.030303,0.444444,0.828283,0.707071,0.616162,0.474747,0.626263,0.0,0.484848,0.232323,0.565657,0.929293,0.545455,0.576923,0.030303,0.050505,0.151515,0.616162,0.393939,0.909091,0.080808,1.000000,0.000000,1.000000,0.190476,0.4,0.333333,0.592919,0.0,0.666667,0.803279
20723,0.742268,0.500000,0.888889,0.000000,0.161616,1.000000,1.000000,0.0,0.000000,1.000000,0.000000,0.484848,1.000000,1.000000,0.000000,0.000000,0.000000,0.126667,0.128667,0.461538,0.230769,0.989899,0.020202,0.000000,0.929293,0.797980,0.969697,0.626263,0.000000,0.000000,0.050505,0.525253,0.969697,0.000000,0.808081,0.000000,0.000000,0.000000,0.505051,0.505051,0.505051,0.505051,0.728632,0.253456,0.060606,0.707071,0.181818,0.828283,0.000000,0.191919,0.676768,0.676768,1.000000,0.909091,1.000000,0.0,0.000000,0.161616,0.666667,1.000000,0.888889,0.653846,0.000000,0.000000,0.000000,0.272727,0.737374,1.000000,0.000000,0.212121,0.797980,0.363636,0.285714,0.4,0.183333,0.441038,1.0,0.000000,0.163934
59421,0.587629,0.166667,0.777778,0.424242,0.080808,1.000000,0.979798,0.0,0.030303,0.868687,0.060606,0.565657,0.555556,0.313131,0.444444,0.444444,0.404040,0.150000,0.161833,0.461538,0.461538,0.575758,0.434343,0.020202,0.787879,0.616162,0.888889,0.585859,0.010101,0.070707,0.373737,0.696970,0.989899,0.000000,0.242424,0.313131,0.000000,0.303030,0.767677,0.969697,0.989899,1.000000,0.534188,0.195084,0.111111,0.303030,0.777778,0.232323,0.040404,0.747475,0.898990,0.848485,0.686869,0.515152,0.737374,1.0,0.424242,0.080808,0.242424,0.313131,0.636364,0.115385,0.474747,0.898990,1.000000,1.000000,0.000000,0.000000,0.989899,1.000000,0.000000,1.000000,0.476190,0.6,0.200000,0.201543,0.0,0.666667,0.311475
60804,0.505155,0.166667,1.000000,0.060606,0.717172,0.333333,0.000000,1.0,0.000000,0.989899,0.020202,0.383838,0.757576,0.727273,0.242424,0.141414,0.090909,0.112000,0.120667,0.307692,0.230769,0.707071,0.303030,0.000000,0.717172,0.646465,0.838384,0.555556,0.000000,0.010101,0.141414,0.353535,0.818182,0.000000,0.525253,0.000000,0.000000,0.080808,0.141414,0.373737,0.686869,0.919192,0.000000,0.118280,0.111111,0.575758,0.585859,0.424242,0.111111,0.262626,0.818182,0.808081,0.656566,0.535354,0.616162,0.0,0.060606,0.717172,0.828283,0.949495,0.646465,0.557692,0.101010,0.151515,0.282828,0.414141,0.595960,0.888889,0.111111,1.000000,0.000000,1.000000,0.285714,0.4,0.366667,0.513920,0.0,0.333333,0.590164


# Encode Categorical data (OneHotEncoder)

In [20]:
# Step 1: One-Hot Encode the categorical features
encoder = OneHotEncoder(handle_unknown='ignore').fit(train_cat)  # Create the encoder object
# Fit the encoder on the training data and transform both training and test data
train_cat_encoded = encoder.transform(train_cat).toarray()
test_cat_encoded = encoder.transform(test_cat).toarray()

# Convert the encoded categorical features back to DataFrames with column names
train_cat_encoded_df = pd.DataFrame(train_cat_encoded,columns=encoder.get_feature_names_out())
test_cat_encoded_df = pd.DataFrame(test_cat_encoded,columns=encoder.get_feature_names_out())


# Concat both Scaled numerical data and encoded categorical data

In [21]:
# # Step 2: Concatenate numerical and encoded categorical features
X_train = pd.concat([train_num_scaled_df.reset_index(drop=True), train_cat_encoded_df.reset_index(drop=True)], axis=1)
X_test = pd.concat([test_num_scaled_df.reset_index(drop=True), test_cat_encoded_df.reset_index(drop=True)], axis=1)

# Apply Logistic Regression Model

In [22]:
# Step 1: Instantiate the logistic regression model
logistic_model = LogisticRegression()

# Step 2: Fit the model to the training data
logistic_model.fit(X_train, y_train)

# Step 3: Predict on the test data
y_pred = logistic_model.predict(X_test)

  y = column_or_1d(y, warn=True)


In [23]:
# Calculate score of the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

Accuracy: 0.9513268771223745
Confusion Matrix:
[[22692     0]
 [ 1161     0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.95      1.00      0.98     22692
           1       0.00      0.00      0.00      1161

    accuracy                           0.95     23853
   macro avg       0.48      0.50      0.49     23853
weighted avg       0.91      0.95      0.93     23853



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [24]:
# Managing imbalance in the dataset

# - Check for the imbalance.
# - Use the resampling strategies used in class for upsampling and downsampling to create a balance between the two classes.
# - Each time fit the model and see how the accuracy of the model has changed.

In [25]:
# Imbalance in the data
y_train.value_counts()

TARGET_B
0           67877
1            3682
Name: count, dtype: int64

# Perform Downsampling

In [26]:
# for downsampling we need to temporarily concat X_train and y_train
# need to reset_index on y_train to make sure they line up
trainset = pd.concat([X_train, y_train.reset_index(drop=True)], axis=1)
# quicker way to downsample category 0:
category_0_downsampled = trainset[trainset['TARGET_B']==0].sample(len(trainset[trainset['TARGET_B']==1]))
print(category_0_downsampled.shape)

category_1 = trainset[trainset['TARGET_B']== 1 ]
print(category_1.shape)
trainset_new = pd.concat([category_0_downsampled, category_1], axis = 0)
trainset_new = trainset_new.sample(frac=1) #randomize the rows
X_train_treated_downsampled = trainset_new.drop(['TARGET_B'], axis=1)
y_train = trainset_new['TARGET_B']

print(X_train_treated_downsampled.shape)

(3682, 418)
(3682, 418)
(7364, 417)


# Fit model with downsampled data

In [27]:
# Step 2: Fit the model to the training data
logistic_model.fit(X_train_treated_downsampled, y_train)

# Step 3: Predict on the test data
y_pred = logistic_model.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [28]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

Accuracy: 0.5860898000251541
Confusion Matrix:
[[13318  9374]
 [  499   662]]
Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.59      0.73     22692
           1       0.07      0.57      0.12      1161

    accuracy                           0.59     23853
   macro avg       0.51      0.58      0.42     23853
weighted avg       0.92      0.59      0.70     23853



# Perform Upsampling

In [29]:
# quicker way to downsample category 0:
category_1_upsampled = trainset[trainset['TARGET_B']==1].sample(len(trainset[trainset['TARGET_B']==0]),replace = True)
print(category_1_upsampled.shape)

category_0 = trainset[trainset['TARGET_B']== 0 ]
print(category_0.shape)
trainset_new = pd.concat([category_1_upsampled, category_0], axis = 0)
trainset_new = trainset_new.sample(frac=1) #randomize the rows
X_train_treated_upsampled = trainset_new.drop(['TARGET_B'], axis=1)
y_train = trainset_new['TARGET_B']

print(X_train_treated_upsampled.shape)

(67877, 418)
(67877, 418)
(135754, 417)


# Fit the model with upsampled data

In [30]:
logistic_model.fit(X_train_treated_upsampled, y_train)

# Step 3: Predict on the test data
y_pred = logistic_model.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [31]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

Accuracy: 0.6211797258206515
Confusion Matrix:
[[14188  8504]
 [  532   629]]
Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.63      0.76     22692
           1       0.07      0.54      0.12      1161

    accuracy                           0.62     23853
   macro avg       0.52      0.58      0.44     23853
weighted avg       0.92      0.62      0.73     23853



# ---------------------------------------------------------------------------------------

# RandomForestClassifier

In [32]:
# Instructions
# Apply the Random Forests algorithm AFTER upscaling the data to deal with the imbalance.
# Use Feature Selections that you have learned in class to decide if you want to use all of the features (Variance Threshold, RFE, PCA, etc.)
# Re-run the Random Forest algorithm to determine if the Feature Selection has improved the results.
# Discuss the output and its impact in the business scenario. Is the cost of a false positive equals to the cost of the false negative? How would you change your algorithm or data in order to maximize the return of the business?

# Without Feature Selection (VarianceThreshold)

In [33]:
X = pd.concat([numerical,categorical],axis=1)

In [34]:
y = target.drop('TARGET_D',axis=1)

In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y)
train_num = X_train.select_dtypes(include = 'number')
test_num = X_test.select_dtypes(include = 'number')
train_cat = X_train.select_dtypes(object)
test_cat = X_test.select_dtypes(object)

In [36]:
scaler = MinMaxScaler()
transformer = scaler.fit(train_num)
train_num_scaled = transformer.transform(train_num)
test_num_scaled = transformer.transform(test_num)

In [37]:
train_num_scaled_df = pd.DataFrame(train_num_scaled, columns=train_num.columns, index=train_num.index)
test_num_scaled_df = pd.DataFrame(test_num_scaled, columns=test_num.columns, index=test_num.index)
train_num_scaled_df

Unnamed: 0,TCODE,AGE,INCOME,WEALTH1,HIT,MALEMILI,MALEVET,VIETVETS,WWIIVETS,LOCALGOV,STATEGOV,FEDGOV,WEALTH2,POP901,POP902,POP903,POP90C1,POP90C2,POP90C3,POP90C4,POP90C5,ETH1,ETH2,ETH3,ETH4,ETH5,ETH6,ETH7,ETH8,ETH9,ETH10,ETH11,ETH12,ETH13,ETH14,ETH15,ETH16,AGE901,AGE902,AGE903,AGE904,AGE905,AGE906,AGE907,CHIL1,CHIL2,CHIL3,AGEC1,AGEC2,AGEC3,AGEC4,AGEC5,AGEC6,AGEC7,CHILC1,CHILC2,CHILC3,CHILC4,CHILC5,HHAGE1,HHAGE2,HHAGE3,HHN1,HHN2,HHN3,HHN4,HHN5,HHN6,MARR1,MARR2,MARR3,MARR4,HHP1,HHP2,DW1,DW2,DW3,DW4,DW5,DW6,DW7,DW8,DW9,HV1,HV2,HV3,HV4,HU1,HU2,HU3,HU4,HU5,HHD1,HHD2,HHD3,HHD4,HHD5,HHD6,HHD7,HHD8,HHD9,HHD10,HHD11,HHD12,ETHC1,ETHC2,ETHC3,ETHC4,ETHC5,ETHC6,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR1,HUR2,RHP1,RHP2,RHP3,RHP4,HUPA1,HUPA2,HUPA3,HUPA4,HUPA5,HUPA6,HUPA7,RP1,RP2,RP3,RP4,MSA,ADI,DMA,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,IC11,IC12,IC13,IC14,IC15,IC16,IC17,IC18,IC19,IC20,IC21,IC22,IC23,HHAS1,HHAS2,HHAS3,HHAS4,MC1,MC2,MC3,TPE1,TPE2,TPE3,TPE4,TPE5,TPE6,TPE7,TPE8,TPE9,PEC1,PEC2,TPE10,TPE11,TPE12,TPE13,LFC1,LFC2,LFC3,LFC4,LFC5,LFC6,LFC7,LFC8,LFC9,LFC10,OCC1,OCC2,OCC3,OCC4,OCC5,OCC6,OCC7,OCC8,OCC9,OCC10,OCC11,OCC12,OCC13,EIC1,EIC2,EIC3,EIC4,EIC5,EIC6,EIC7,EIC8,EIC9,EIC10,EIC11,EIC12,EIC13,EIC14,EIC15,EIC16,OEDC1,OEDC2,OEDC3,OEDC4,OEDC5,OEDC6,OEDC7,EC1,EC2,EC3,EC4,EC5,EC6,EC7,EC8,SEC1,SEC2,SEC3,SEC4,SEC5,AFC1,AFC2,AFC3,AFC4,AFC5,AFC6,VC1,VC2,VC3,VC4,ANC1,ANC2,ANC3,ANC4,ANC5,ANC6,ANC7,ANC8,ANC9,ANC10,ANC11,ANC12,ANC13,ANC14,ANC15,POBC1,POBC2,LSC1,LSC2,LSC3,LSC4,VOC1,VOC2,VOC3,HC1,HC2,HC3,HC4,HC5,HC6,HC7,HC8,HC9,HC10,HC11,HC12,HC13,HC14,HC15,HC16,HC17,HC18,HC19,HC20,HC21,MHUC1,MHUC2,AC1,AC2,CARDPROM,NUMPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MAXRAMNT,LASTGIFT,TIMELAG,AVGGIFT,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2
29061,0.000014,0.624862,0.000000,1.000000,0.000000,0.000000,0.262626,0.222222,0.323232,0.040404,0.020202,0.011494,1.000000,0.036991,0.042834,0.040957,0.000000,0.000000,1.000000,0.494949,0.515152,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.511905,0.666667,0.678571,0.511905,0.654762,0.666667,0.320000,0.383838,0.393939,0.232323,0.050505,0.151515,0.151515,0.121212,0.151515,0.295082,0.191919,0.141414,0.171717,0.333333,0.222222,0.141414,0.434343,0.191919,0.424242,0.292929,0.383838,0.333333,0.212121,0.111111,0.030303,0.676768,0.030303,0.178082,0.171717,0.238462,0.342857,0.878788,0.878788,0.020202,0.070707,0.060606,0.030303,0.040404,0.040404,0.000000,0.047667,0.056167,0.076923,0.076923,0.797980,0.212121,0.909091,0.101010,0.060606,0.282828,0.707071,0.656566,0.262626,0.838384,0.171717,0.020202,0.00,0.020202,0.121212,0.212121,0.010101,0.266667,0.454545,0.343434,0.000000,0.000000,0.000000,0.000000,0.000000,0.010101,0.050505,0.212121,0.000000,0.020202,0.545455,0.682353,0.644444,0.213115,0.100,0.050505,0.020202,0.050505,0.131313,0.040404,0.030303,0.010101,0.000000,0.000000,0.010101,0.161616,0.000000,0.603101,0.822928,0.136667,0.174667,0.162000,0.191333,0.057121,0.353535,0.242424,0.212121,0.131313,0.060606,0.010101,0.00,0.000000,0.000000,0.232323,0.242424,0.272727,0.171717,0.080808,0.010101,0.02,0.000000,0.000000,0.404040,0.060606,0.606061,0.121212,0.333333,0.676768,0.070707,0.555556,0.080808,0.000000,0.000000,0.000000,0.000000,0.00,0.131313,0.242424,0.000000,0.090909,0.066667,0.144737,0.010101,0.212121,0.606061,0.696970,0.515152,0.696970,0.505051,0.868687,0.707071,1.000000,1.000000,0.030303,0.070707,0.070707,0.020202,0.111111,0.111111,0.000000,0.000000,0.171717,0.252525,0.070707,0.070707,0.057143,0.037975,0.262626,0.000000,0.040404,0.141414,0.010101,0.000000,0.060606,0.141414,0.050505,0.030303,0.020202,0.000000,0.101010,0.060606,0.070707,0.010101,0.040404,0.020202,0.010101,0.303030,0.474747,0.131313,0.020202,0.705882,0.262626,0.080808,0.292929,0.161616,0.243243,0.080808,0.030303,0.020619,0.202020,0.066667,0.250000,0.010101,0.000000,0.000000,0.000000,0.131313,0.262626,0.000000,0.222222,0.252525,0.323232,0.030303,0.014085,0.000000,0.000000,0.782609,0.000000,0.000000,0.010101,0.000000,0.036364,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.010101,0.878788,0.707071,0.000000,0.000000,0.303030,0.959596,0.686869,0.313131,0.354839,0.750000,0.010101,0.030303,0.090909,0.222222,0.363636,0.646465,0.000000,0.000000,0.000000,0.222222,0.161616,0.585859,0.000000,0.040404,0.848485,0.101010,0.636364,1.000000,0.979798,0.428571,0.4,0.080808,0.070707,0.066667,0.057895,0.210526,0.136364,0.000106,0.000000,0.000000,0.015,0.002002,0.015,0.008621,0.013732,0.448031,0.0,0.000000,0.918033
17829,0.000000,0.360825,1.000000,1.000000,0.000000,0.949495,0.030303,0.181818,0.000000,0.020202,0.010101,0.494253,1.000000,0.010142,0.011697,0.009208,0.000000,0.000000,1.000000,0.515152,0.494949,1.000000,0.000000,0.000000,0.000000,0.010101,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.010417,0.0,0.0,0.000000,0.380952,0.464286,0.500000,0.392857,0.511905,0.535714,0.373333,0.393939,0.464646,0.161616,0.101010,0.252525,0.242424,0.171717,0.131313,0.098361,0.030303,0.161616,0.151515,0.393939,0.202020,0.101010,0.161616,0.020202,0.141414,0.111111,0.343434,0.555556,0.373737,0.171717,0.060606,0.676768,0.080808,0.041096,0.212121,0.352308,0.438571,0.939394,0.929293,0.030303,0.040404,0.020202,0.000000,0.000000,0.000000,0.000000,0.177667,0.185167,0.307692,0.230769,0.888889,0.121212,0.969697,0.040404,0.080808,0.444444,0.858586,0.757576,0.404040,0.929293,0.080808,0.040404,0.04,0.010101,0.131313,0.080808,0.040404,0.320000,0.646465,0.111111,0.000000,0.000000,0.000000,0.020202,0.131313,0.575758,0.878788,0.989899,0.000000,0.010101,0.737374,0.729412,0.700000,0.262295,0.125,0.040404,0.000000,0.020202,0.070707,0.040404,0.000000,0.010101,0.161616,0.565657,0.848485,0.888889,0.230769,0.088372,0.573212,0.334000,0.340000,0.338667,0.348667,0.087421,0.020202,0.131313,0.090909,0.262626,0.363636,0.101010,0.08,0.000000,0.000000,0.030303,0.101010,0.060606,0.272727,0.393939,0.111111,0.08,0.000000,0.000000,0.161616,0.030303,0.464646,0.030303,0.464646,0.545455,0.030303,0.828283,0.070707,0.000000,0.000000,0.000000,0.000000,0.00,0.070707,0.050505,0.000000,0.434343,0.266667,0.368421,0.040404,0.737374,0.767677,0.848485,0.676768,0.767677,0.636364,0.747475,0.707071,1.000000,1.000000,0.000000,0.090909,0.111111,0.060606,0.050505,0.222222,0.000000,0.036364,0.111111,0.060606,0.131313,0.111111,0.042857,0.025316,0.080808,0.000000,0.060606,0.191919,0.030303,0.015625,0.070707,0.141414,0.070707,0.060606,0.030303,0.000000,0.101010,0.020202,0.121212,0.020202,0.020202,0.000000,0.000000,0.080808,0.818182,0.080808,0.010101,0.705882,0.010101,0.141414,0.414141,0.232323,0.135135,0.090909,0.060606,0.051546,0.242424,0.100000,0.250000,0.080808,0.000000,0.000000,0.000000,0.212121,0.404040,0.100000,0.414141,0.181818,0.232323,0.101010,0.000000,0.040404,0.032258,0.043478,0.000000,0.071429,0.040404,0.054545,0.000000,0.040404,0.000000,0.019231,0.105263,0.000000,0.0,0.020202,0.868687,0.969697,0.010101,0.000000,0.030303,1.000000,0.939394,0.373737,0.225806,0.461538,0.020202,0.212121,0.212121,0.474747,0.595960,0.414141,0.000000,0.000000,0.393939,0.151515,0.030303,0.393939,0.000000,0.050505,0.000000,1.000000,0.000000,1.000000,0.979798,0.428571,0.8,0.161616,0.050505,0.150000,0.105263,0.315789,0.181818,0.005385,0.008475,0.073171,0.015,0.005005,0.020,0.002874,0.020407,0.294050,0.0,0.666667,0.229508
38419,0.000014,0.624862,0.666667,1.000000,0.000000,0.000000,0.313131,0.262626,0.343434,0.060606,0.030303,0.011494,1.000000,0.249724,0.272111,0.273593,0.717172,0.000000,0.292929,0.484848,0.525253,0.989899,0.010101,0.000000,0.000000,0.010101,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.380952,0.476190,0.535714,0.428571,0.535714,0.583333,0.320000,0.373737,0.424242,0.212121,0.161616,0.222222,0.191919,0.121212,0.121212,0.180328,0.090909,0.151515,0.151515,0.343434,0.202020,0.161616,0.262626,0.121212,0.242424,0.272727,0.323232,0.414141,0.242424,0.080808,0.020202,0.565657,0.121212,0.109589,0.252525,0.264615,0.355714,0.737374,0.727273,0.050505,0.242424,0.191919,0.151515,0.020202,0.020202,0.000000,0.082167,0.093833,0.153846,0.153846,0.666667,0.343434,0.929293,0.080808,0.141414,0.333333,0.676768,0.555556,0.262626,0.808081,0.202020,0.070707,0.04,0.060606,0.161616,0.232323,0.060606,0.266667,0.595960,0.191919,0.000000,0.010101,0.000000,0.000000,0.020202,0.080808,0.232323,0.494949,0.000000,0.070707,0.353535,0.588235,0.555556,0.229508,0.125,0.151515,0.090909,0.020202,0.111111,0.070707,0.151515,0.000000,0.040404,0.080808,0.252525,0.727273,0.260684,0.320930,0.736663,0.161333,0.209333,0.195333,0.238000,0.066593,0.303030,0.222222,0.161616,0.181818,0.111111,0.020202,0.02,0.000000,0.000000,0.151515,0.222222,0.181818,0.242424,0.151515,0.030303,0.02,0.000000,0.010101,0.292929,0.060606,0.424242,0.141414,0.434343,0.575758,0.060606,0.808081,0.131313,0.010101,0.010101,0.000000,0.000000,0.00,0.050505,0.010101,0.030303,0.161616,0.188889,0.263158,0.030303,0.616162,0.646465,0.737374,0.575758,0.676768,0.545455,0.707071,0.545455,0.757576,0.939394,0.050505,0.090909,0.090909,0.040404,0.101010,0.161616,0.000000,0.018182,0.141414,0.010101,0.141414,0.101010,0.071429,0.075949,0.010101,0.015385,0.070707,0.242424,0.040404,0.046875,0.040404,0.202020,0.050505,0.040404,0.030303,0.014925,0.090909,0.070707,0.040404,0.020202,0.050505,0.030303,0.010101,0.050505,0.787879,0.080808,0.000000,0.705882,0.131313,0.161616,0.414141,0.151515,0.135135,0.070707,0.040404,0.041237,0.232323,0.066667,0.250000,0.080808,0.000000,0.000000,0.000000,0.151515,0.313131,0.033333,0.272727,0.191919,0.333333,0.111111,0.000000,0.050505,0.032258,0.369565,0.000000,0.000000,0.050505,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.010101,0.787879,0.979798,0.010101,0.000000,0.020202,0.909091,0.575758,0.181818,0.225806,0.730769,0.020202,0.050505,0.090909,0.222222,0.343434,0.666667,0.000000,0.000000,0.707071,0.040404,0.222222,0.010101,0.000000,0.030303,0.818182,0.181818,0.727273,1.000000,0.929293,0.428571,0.4,0.070707,0.060606,0.200000,0.178947,0.210526,0.121212,0.002640,0.012712,0.048780,0.004,0.003003,0.020,0.001916,0.008475,0.334029,1.0,0.000000,0.852459
70558,0.000000,0.340206,1.000000,1.000000,0.000000,0.000000,0.353535,0.333333,0.171717,0.101010,0.050505,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.426357,0.854711,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.216667,0.152632,0.315789,0.181818,0.003801,0.021186,0.048780,0.005,0.002002,0.015,0.006705,0.007057,0.723914,0.0,0.666667,0.836066
19063,0.000000,0.624862,0.666667,1.000000,0.000000,0.000000,0.333333,0.424242,0.101010,0.000000,0.020202,0.068966,1.000000,0.014205,0.017756,0.012796,1.000000,0.000000,0.000000,0.505051,0.505051,0.868687,0.020202,0.000000,0.121212,0.010101,0.045455,0.027778,0.070707,0.014925,0.026316,0.021277,0.0,0.010417,0.0,0.0,0.000000,0.416667,0.488095,0.500000,0.380952,0.500000,0.523810,0.413333,0.363636,0.393939,0.242424,0.080808,0.181818,0.353535,0.232323,0.101010,0.065574,0.010101,0.161616,0.151515,0.333333,0.212121,0.151515,0.070707,0.010101,0.050505,0.050505,0.323232,0.636364,0.353535,0.111111,0.030303,0.767677,0.040404,0.013699,0.181818,0.380000,0.441429,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.437667,0.518167,0.846154,0.769231,0.949495,0.060606,1.000000,0.010101,0.000000,0.565657,0.939394,0.888889,0.525253,0.969697,0.040404,0.050505,0.02,0.040404,0.050505,0.030303,0.020202,0.266667,0.595960,0.060606,0.018182,0.010101,0.000000,0.808081,0.959596,1.000000,1.000000,1.000000,0.323232,0.000000,0.989899,0.952941,0.911111,0.278689,0.075,0.000000,0.000000,0.000000,0.060606,0.000000,0.000000,0.000000,0.909091,0.939394,0.939394,0.939394,0.811966,0.162791,0.929625,0.490667,0.496667,0.552000,0.565333,0.192995,0.030303,0.020202,0.020202,0.202020,0.252525,0.181818,0.22,0.065574,0.151515,0.010101,0.010101,0.020202,0.212121,0.252525,0.171717,0.24,0.050505,0.161616,0.161616,0.000000,0.747475,0.020202,0.646465,0.363636,0.282828,0.828283,0.101010,0.030303,0.030303,0.000000,0.000000,0.00,0.020202,0.020202,0.020202,0.010101,0.266667,0.342105,0.010101,0.888889,0.787879,0.878788,0.707071,0.878788,0.707071,0.777778,0.595960,1.000000,0.000000,0.000000,0.313131,0.252525,0.040404,0.171717,0.161616,0.000000,0.000000,0.010101,0.030303,0.040404,0.000000,0.014286,0.012658,0.040404,0.015385,0.060606,0.212121,0.030303,0.015625,0.080808,0.060606,0.151515,0.030303,0.030303,0.014925,0.090909,0.101010,0.090909,0.010101,0.020202,0.040404,0.020202,0.040404,0.808081,0.090909,0.000000,0.941176,0.000000,0.000000,0.070707,0.191919,0.216216,0.414141,0.262626,0.123711,0.232323,0.133333,0.319444,0.080808,0.000000,0.000000,0.000000,0.161616,0.333333,0.033333,0.444444,0.151515,0.242424,0.040404,0.014085,0.060606,0.032258,0.054348,0.000000,0.071429,0.020202,0.054545,0.000000,0.000000,0.000000,0.019231,0.052632,0.148148,0.0,0.111111,0.303030,0.898990,0.010101,0.040404,0.060606,1.000000,0.898990,0.303030,0.129032,0.250000,0.060606,0.272727,0.444444,0.878788,0.979798,0.030303,0.000000,0.000000,0.939394,0.000000,0.010101,0.050505,0.000000,0.000000,1.000000,0.000000,0.919192,1.000000,1.000000,0.666667,0.6,0.080808,0.040404,0.083333,0.057895,0.210526,0.136364,0.000634,0.000000,0.024390,0.020,0.003003,0.020,0.008621,0.018738,0.929731,0.0,0.000000,0.016393
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66395,0.000014,0.624862,0.166667,1.000000,0.000000,0.000000,0.262626,0.151515,0.282828,0.020202,0.020202,0.011494,1.000000,0.015349,0.017925,0.015987,0.000000,0.444444,0.565657,0.484848,0.525253,0.878788,0.111111,0.010101,0.000000,0.010101,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.392857,0.500000,0.535714,0.428571,0.535714,0.583333,0.320000,0.414141,0.393939,0.202020,0.121212,0.242424,0.181818,0.161616,0.111111,0.180328,0.080808,0.181818,0.161616,0.323232,0.181818,0.161616,0.272727,0.131313,0.252525,0.222222,0.323232,0.464646,0.252525,0.090909,0.030303,0.636364,0.090909,0.123288,0.191919,0.289231,0.375714,0.797980,0.787879,0.010101,0.050505,0.030303,0.030303,0.020202,0.020202,0.000000,0.086833,0.096167,0.230769,0.153846,0.808081,0.202020,0.959596,0.050505,0.040404,0.373737,0.757576,0.646465,0.313131,0.878788,0.131313,0.060606,0.02,0.050505,0.080808,0.212121,0.030303,0.240000,0.525253,0.171717,0.036364,0.080808,0.012346,0.010101,0.010101,0.050505,0.222222,0.525253,0.000000,0.000000,0.444444,0.600000,0.600000,0.229508,0.125,0.030303,0.020202,0.161616,0.141414,0.010101,0.030303,0.020202,0.010101,0.040404,0.161616,0.555556,0.337607,0.330233,0.643587,0.186000,0.208667,0.204000,0.238667,0.061413,0.262626,0.191919,0.222222,0.181818,0.090909,0.030303,0.02,0.000000,0.000000,0.121212,0.232323,0.242424,0.232323,0.121212,0.040404,0.04,0.000000,0.000000,0.232323,0.070707,0.171717,0.040404,0.373737,0.636364,0.070707,0.757576,0.202020,0.000000,0.000000,0.000000,0.000000,0.04,0.040404,0.000000,0.010101,0.454545,0.200000,0.263158,0.020202,0.616162,0.737374,0.808081,0.666667,0.767677,0.656566,0.787879,0.545455,0.868687,0.767677,0.060606,0.020202,0.101010,0.040404,0.111111,0.171717,0.000000,0.036364,0.080808,0.010101,0.161616,0.232323,0.000000,0.088608,0.010101,0.000000,0.111111,0.383838,0.020202,0.078125,0.030303,0.212121,0.040404,0.040404,0.020202,0.000000,0.050505,0.020202,0.020202,0.010101,0.020202,0.020202,0.010101,0.050505,0.858586,0.060606,0.000000,0.705882,0.202020,0.181818,0.373737,0.101010,0.216216,0.060606,0.020202,0.051546,0.212121,0.133333,0.222222,0.070707,0.000000,0.000000,0.000000,0.131313,0.262626,0.033333,0.151515,0.151515,0.282828,0.222222,0.014085,0.050505,0.000000,0.054348,0.042553,0.000000,0.050505,0.000000,0.000000,0.000000,0.000000,0.000000,0.052632,0.000000,0.0,0.020202,0.818182,0.949495,0.020202,0.000000,0.040404,0.858586,0.555556,0.161616,0.354839,0.346154,0.080808,0.181818,0.272727,0.616162,0.838384,0.171717,0.000000,0.000000,0.353535,0.030303,0.414141,0.161616,0.000000,0.050505,1.000000,0.000000,0.484848,1.000000,0.909091,0.238095,0.4,0.030303,0.020202,0.183333,0.136842,0.210526,0.121212,0.001795,0.008475,0.048780,0.005,0.002803,0.019,0.011494,0.009059,0.997737,0.0,0.000000,0.606557
20482,0.000000,0.608247,0.333333,1.000000,0.000000,0.030303,0.292929,0.404040,0.373737,0.090909,0.000000,0.000000,0.000000,0.004164,0.004586,0.004293,0.000000,0.050505,0.959596,0.505051,0.505051,0.939394,0.010101,0.030303,0.010101,0.080808,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.062500,0.0,0.0,0.011628,0.428571,0.500000,0.511905,0.416667,0.535714,0.559524,0.360000,0.353535,0.444444,0.202020,0.080808,0.181818,0.303030,0.171717,0.121212,0.147541,0.060606,0.121212,0.161616,0.383838,0.212121,0.131313,0.202020,0.060606,0.181818,0.222222,0.373737,0.414141,0.252525,0.111111,0.040404,0.616162,0.141414,0.054795,0.202020,0.272308,0.372857,0.787879,0.767677,0.010101,0.040404,0.030303,0.020202,0.030303,0.010101,0.020202,0.205333,0.223333,0.307692,0.153846,0.707071,0.303030,0.858586,0.151515,0.484848,0.373737,0.727273,0.595960,0.272727,0.808081,0.202020,0.101010,0.06,0.070707,0.161616,0.141414,0.070707,0.280000,0.575758,0.151515,0.000000,0.000000,0.000000,0.121212,0.353535,0.656566,0.868687,0.989899,0.020202,0.121212,0.292929,0.552941,0.511111,0.229508,0.125,0.020202,0.020202,0.171717,0.212121,0.020202,0.010101,0.050505,0.181818,0.393939,0.616162,0.767677,0.000000,0.100775,0.916005,0.170667,0.193333,0.220667,0.244000,0.071309,0.242424,0.252525,0.171717,0.171717,0.090909,0.060606,0.04,0.016393,0.000000,0.181818,0.242424,0.181818,0.191919,0.111111,0.070707,0.06,0.010101,0.000000,0.252525,0.101010,0.434343,0.181818,0.535354,0.474747,0.070707,0.676768,0.151515,0.000000,0.000000,0.000000,0.000000,0.04,0.040404,0.121212,0.010101,0.020202,0.188889,0.315789,0.060606,0.595960,0.636364,0.727273,0.545455,0.666667,0.494949,0.606061,0.505051,0.424242,1.000000,0.131313,0.161616,0.111111,0.010101,0.121212,0.101010,0.023256,0.036364,0.111111,0.111111,0.121212,0.050505,0.071429,0.050633,0.101010,0.015385,0.070707,0.171717,0.030303,0.015625,0.030303,0.181818,0.040404,0.040404,0.040404,0.014925,0.090909,0.080808,0.080808,0.030303,0.090909,0.050505,0.010101,0.252525,0.494949,0.080808,0.020202,0.776471,0.040404,0.141414,0.242424,0.282828,0.270270,0.141414,0.070707,0.020619,0.272727,0.033333,0.250000,0.111111,0.000000,0.010101,0.000000,0.171717,0.333333,0.033333,0.292929,0.202020,0.383838,0.070707,0.014085,0.040404,0.000000,0.076087,0.000000,0.000000,0.070707,0.036364,0.000000,0.010101,0.023256,0.000000,0.052632,0.037037,0.0,0.050505,0.626263,0.929293,0.060606,0.000000,0.020202,0.979798,0.696970,0.333333,0.161290,0.326923,0.020202,0.131313,0.292929,0.686869,0.808081,0.202020,0.000000,0.000000,0.050505,0.191919,0.090909,0.050505,0.033333,0.606061,0.292929,0.515152,0.161616,0.939394,0.929293,0.095238,0.6,0.060606,0.060606,0.450000,0.347368,0.315789,0.181818,0.018161,0.055085,0.146341,0.005,0.004004,0.020,0.005747,0.012015,0.886191,0.0,0.333333,0.901639
48983,0.000014,0.624862,0.500000,0.111111,0.020747,0.777778,0.111111,0.252525,0.000000,0.000000,0.040404,0.287356,1.000000,0.038865,0.044728,0.044290,1.000000,0.000000,0.000000,0.505051,0.505051,0.868687,0.040404,0.010101,0.080808,0.040404,0.000000,0.013889,0.000000,0.074627,0.000000,0.000000,0.0,0.020833,0.0,0.0,0.023256,0.309524,0.369048,0.416667,0.333333,0.428571,0.476190,0.346667,0.515152,0.333333,0.161616,0.222222,0.353535,0.212121,0.090909,0.060606,0.081967,0.010101,0.222222,0.212121,0.292929,0.171717,0.111111,0.080808,0.020202,0.060606,0.242424,0.373737,0.393939,0.202020,0.070707,0.020202,0.656566,0.111111,0.027397,0.222222,0.261538,0.348571,0.373737,0.343434,0.020202,0.595960,0.575758,0.525253,0.000000,0.000000,0.000000,0.175500,0.190500,0.384615,0.384615,0.353535,0.656566,0.909091,0.101010,0.050505,0.373737,0.686869,0.585859,0.292929,0.818182,0.191919,0.070707,0.02,0.060606,0.212121,0.131313,0.080808,0.253333,0.606061,0.060606,0.018182,0.030303,0.000000,0.040404,0.151515,0.565657,0.888889,1.000000,0.010101,0.050505,0.323232,0.564706,0.544444,0.229508,0.100,0.212121,0.383838,0.050505,0.050505,0.060606,0.525253,0.020202,0.525253,0.929293,0.979798,1.000000,0.122863,0.162791,0.929625,0.233333,0.234000,0.250667,0.260667,0.090412,0.131313,0.181818,0.191919,0.282828,0.171717,0.040404,0.02,0.000000,0.010101,0.080808,0.161616,0.252525,0.272727,0.181818,0.040404,0.02,0.000000,0.010101,0.101010,0.050505,0.505051,0.050505,0.848485,0.161616,0.444444,0.777778,0.161616,0.030303,0.010101,0.000000,0.021277,0.00,0.030303,0.010101,0.010101,0.060606,0.200000,0.289474,0.060606,0.595960,0.797980,0.878788,0.696970,0.848485,0.656566,0.686869,0.565657,0.858586,0.474747,0.050505,0.212121,0.101010,0.070707,0.121212,0.171717,0.000000,0.018182,0.090909,0.000000,0.171717,0.010101,0.028571,0.025316,0.000000,0.000000,0.050505,0.151515,0.020202,0.015625,0.020202,0.242424,0.050505,0.040404,0.030303,0.014925,0.050505,0.090909,0.060606,0.181818,0.040404,0.030303,0.393939,0.040404,0.494949,0.020202,0.000000,0.823529,0.020202,0.060606,0.222222,0.363636,0.189189,0.212121,0.060606,0.030928,0.232323,0.033333,0.236111,0.080808,0.113402,0.191919,0.051282,0.262626,0.464646,0.166667,0.393939,0.070707,0.131313,0.383838,0.028169,0.050505,0.032258,0.086957,0.000000,0.000000,0.040404,0.018182,0.054545,0.010101,0.000000,0.000000,0.052632,0.037037,0.0,0.070707,0.282828,0.909091,0.020202,0.070707,0.010101,0.979798,0.616162,0.121212,0.032258,0.076923,0.242424,0.606061,0.707071,0.787879,0.888889,0.121212,0.011111,0.000000,0.252525,0.000000,0.656566,0.040404,0.000000,0.050505,0.939394,0.070707,0.848485,1.000000,1.000000,0.428571,0.6,0.040404,0.030303,0.133333,0.094737,0.315789,0.166667,0.001161,0.004237,0.048780,0.005,0.003003,0.020,0.002874,0.011229,0.907820,1.0,0.333333,0.327869
36232,0.000028,0.515464,0.666667,1.000000,0.000000,0.000000,0.242424,0.434343,0.131313,0.030303,0.010101,0.022989,0.777778,0.022978,0.023689,0.019405,1.000000,0.000000,0.000000,0.505051,0.505051,0.888889,0.030303,0.000000,0.060606,0.090909,0.045455,0.013889,0.010101,0.029851,0.026316,0.000000,0.0,0.052083,0.0,0.0,0.046512,0.345238,0.428571,0.452381,0.345238,0.452381,0.500000,0.400000,0.424242,0.404040,0.181818,0.161616,0.282828,0.262626,0.181818,0.080808,0.049180,0.010101,0.171717,0.171717,0.333333,0.181818,0.151515,0.070707,0.010101,0.040404,0.090909,0.252525,0.666667,0.454545,0.202020,0.060606,0.595960,0.111111,0.027397,0.282828,0.423077,0.470000,1.000000,0.848485,0.010101,0.010101,0.000000,0.000000,0.000000,0.000000,0.000000,0.365833,0.361167,0.692308,0.692308,0.696970,0.313131,0.989899,0.020202,0.000000,0.484848,0.828283,0.696970,0.404040,0.868687,0.141414,0.080808,0.04,0.060606,0.121212,0.101010,0.090909,0.306667,0.616162,0.050505,0.018182,0.020202,0.000000,0.737374,0.949495,1.000000,1.000000,1.000000,0.000000,0.000000,0.858586,0.741176,0.733333,0.295082,0.125,0.010101,0.000000,0.000000,0.303030,0.010101,0.000000,0.000000,1.000000,1.000000,1.000000,1.000000,0.616987,0.100775,0.916005,0.356667,0.347333,0.360667,0.349333,0.094429,0.070707,0.060606,0.121212,0.202020,0.373737,0.151515,0.08,0.000000,0.000000,0.060606,0.070707,0.121212,0.212121,0.373737,0.161616,0.02,0.000000,0.000000,0.101010,0.050505,0.484848,0.040404,0.535354,0.474747,0.111111,0.838384,0.080808,0.030303,0.020202,0.036364,0.000000,0.00,0.020202,0.030303,0.020202,0.313131,0.211111,0.381579,0.101010,0.555556,0.848485,0.888889,0.797980,0.848485,0.757576,0.696970,0.525253,0.727273,1.000000,0.060606,0.131313,0.171717,0.050505,0.151515,0.202020,0.000000,0.036364,0.121212,0.010101,0.090909,0.010101,0.028571,0.037975,0.010101,0.000000,0.050505,0.111111,0.030303,0.109375,0.070707,0.242424,0.111111,0.050505,0.050505,0.029851,0.050505,0.020202,0.080808,0.030303,0.030303,0.010101,0.020202,0.101010,0.777778,0.070707,0.000000,0.823529,0.020202,0.050505,0.252525,0.323232,0.243243,0.181818,0.090909,0.072165,0.262626,0.133333,0.319444,0.070707,0.000000,0.000000,0.000000,0.131313,0.242424,0.066667,0.434343,0.242424,0.131313,0.121212,0.000000,0.040404,0.000000,0.086957,0.021277,0.000000,0.060606,0.072727,0.018182,0.000000,0.069767,0.000000,0.105263,0.000000,0.0,0.101010,0.626263,0.878788,0.050505,0.040404,0.040404,1.000000,0.868687,0.313131,0.161290,0.423077,0.000000,0.000000,0.131313,0.353535,1.000000,0.000000,0.000000,0.000000,0.909091,0.000000,0.101010,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,1.000000,1.000000,0.571429,0.4,0.050505,0.030303,0.466667,0.368421,0.315789,0.166667,0.028402,0.105932,0.341463,0.003,0.002202,0.016,0.001916,0.009611,0.863044,0.0,0.000000,0.180328


In [38]:
# Step 1: One-Hot Encode the categorical features
encoder = OneHotEncoder(handle_unknown='ignore').fit(train_cat)  # Create the encoder object
# Fit the encoder on the training data and transform both training and test data
train_cat_encoded = encoder.transform(train_cat).toarray()
test_cat_encoded = encoder.transform(test_cat).toarray()

# Convert the encoded categorical features back to DataFrames with column names
train_cat_encoded_df = pd.DataFrame(train_cat_encoded,columns=encoder.get_feature_names_out())
test_cat_encoded_df = pd.DataFrame(test_cat_encoded,columns=encoder.get_feature_names_out())

In [39]:
# # Step 2: Concatenate numerical and encoded categorical features
X_train = pd.concat([train_num_scaled_df.reset_index(drop=True), train_cat_encoded_df.reset_index(drop=True)], axis=1)
X_test = pd.concat([test_num_scaled_df.reset_index(drop=True), test_cat_encoded_df.reset_index(drop=True)], axis=1)

# Upsample data

In [40]:
# quicker way to upsample category 1:
trainset = pd.concat([X_train, y_train.reset_index(drop=True)], axis=1)
category_1_upsampled = trainset[trainset['TARGET_B']==1].sample(len(trainset[trainset['TARGET_B']==0]),replace = True)
print(category_1_upsampled.shape)

category_0 = trainset[trainset['TARGET_B']== 0 ]
print(category_0.shape)
trainset_new = pd.concat([category_1_upsampled, category_0], axis = 0)

(67915, 651)
(67915, 651)


In [41]:
trainset_new = trainset_new.sample(frac=1) #randomize the rows
X_train_treated_upsampled = trainset_new.drop(['TARGET_B'], axis=1)
y_train = trainset_new['TARGET_B']

print(X_train_treated_upsampled.shape)

(135830, 650)


# Apply RandomForestClassifier Model for the upsampled data

In [42]:
clf = RandomForestClassifier(max_depth=5, # max number of questions to ask
                             min_samples_split=20, # amount of rows still considered at every question
                             min_samples_leaf =20, # ultimate answer based on at least this many rows
                             max_samples=0.8, # fraction of X-train to use in each tree
                             random_state=42)
clf.fit(X_train_treated_upsampled, y_train)
print(clf.score(X_train_treated_upsampled, y_train))
print(clf.score(X_test, y_test))

y_pred = clf.predict(X_test)
display(y_test.value_counts())
display(confusion_matrix(y_test, y_pred))

0.6236692924979754
0.6149331321007839


TARGET_B
0           22654
1            1199
Name: count, dtype: int64

array([[14027,  8627],
       [  558,   641]], dtype=int64)

# With FeatureSelction(VarainceThreshold)

In [43]:
# new_numerical_scaled was taken from above 
X= pd.concat([categorical,new_numerical_scaled],axis = 1)

In [44]:
X_train, X_test, y_train, y_test = train_test_split(X, y)
train_num = X_train.select_dtypes(include = 'number')
test_num = X_test.select_dtypes(include = 'number')
train_cat = X_train.select_dtypes(object)
test_cat = X_test.select_dtypes(object)

In [45]:
scaler = MinMaxScaler()
transformer = scaler.fit(train_num)
train_num_scaled = transformer.transform(train_num)
test_num_scaled = transformer.transform(test_num)

In [46]:
train_num_scaled_df = pd.DataFrame(train_num_scaled, columns=train_num.columns, index=train_num.index)
test_num_scaled_df = pd.DataFrame(test_num_scaled, columns=test_num.columns, index=test_num.index)
train_num_scaled_df

Unnamed: 0,AGE,INCOME,WEALTH1,VIETVETS,WWIIVETS,WEALTH2,POP90C1,POP90C2,POP90C3,ETH1,ETH2,HHN3,DW1,DW2,DW4,DW5,DW6,HV1,HV2,HV3,HV4,HU1,HU2,HU5,HHD2,HHD3,HHD5,ETHC2,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR2,HUPA2,HUPA3,HUPA6,RP1,RP2,RP3,RP4,MSA,ADI,IC6,HHAS3,MC1,MC2,PEC2,TPE13,LFC2,LFC4,LFC6,LFC7,LFC8,LFC9,VC1,VC3,POBC2,LSC1,VOC2,HC2,HC4,HC5,HC6,HC7,HC8,HC11,HC13,HC17,HC18,HC19,MHUC1,MHUC2,CARDPROM,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2
88445,0.443299,0.833333,1.000000,0.626263,0.050505,1.000000,1.000000,0.000000,0.000000,0.909091,0.040404,0.454545,0.424242,0.333333,0.575758,0.575758,0.565657,0.293667,0.301000,0.461538,0.461538,0.676768,0.333333,0.020202,0.696970,0.555556,0.848485,0.646465,0.181818,0.828283,0.959596,1.000000,1.000000,0.040404,0.404040,0.303030,0.000000,0.323232,0.939394,0.979798,0.989899,0.989899,0.170940,0.079070,0.040404,0.555556,0.515152,0.494949,0.353535,0.636364,0.979798,0.959596,0.626263,0.545455,0.676768,1.000000,0.626263,0.050505,0.545455,0.929293,0.656566,0.288462,0.070707,0.202020,1.000000,1.000000,0.010101,0.777778,0.050505,1.000000,0.010101,1.000000,0.666667,1.0,0.233333,0.483642,1.0,0.000000,0.032787
92848,0.793814,0.166667,1.000000,0.060606,0.838384,1.000000,1.000000,0.000000,0.000000,1.000000,0.000000,0.080808,0.242424,0.212121,0.747475,0.717172,0.676768,0.214667,0.276333,0.384615,0.461538,0.737374,0.272727,0.777778,0.525253,0.474747,0.696970,0.262626,0.232323,0.393939,0.707071,0.929293,0.989899,0.101010,0.161616,0.606061,0.000000,0.181818,0.505051,0.666667,0.858586,0.929293,0.802350,1.000000,0.242424,0.767677,0.444444,0.565657,0.040404,0.363636,0.282828,0.262626,0.747475,0.737374,0.737374,0.000000,0.060606,0.838384,0.030303,0.919192,0.191919,0.403846,0.010101,0.020202,0.525253,0.868687,0.141414,0.030303,0.929293,0.969697,0.040404,0.959596,0.523810,0.6,0.116667,0.016113,1.0,0.000000,0.196721
52844,0.711340,0.166667,0.555556,0.070707,0.747475,0.555556,0.000000,0.585859,0.424242,1.000000,0.000000,0.212121,0.909091,0.878788,0.060606,0.040404,0.020202,0.095667,0.102333,0.230769,0.230769,0.848485,0.161616,0.111111,0.767677,0.707071,0.878788,0.373737,0.000000,0.010101,0.070707,0.202020,0.676768,0.000000,0.373737,0.020202,0.020202,0.020202,0.000000,0.242424,0.565657,0.808081,0.000000,0.662016,0.252525,0.616162,0.414141,0.595960,0.040404,0.202020,0.393939,0.353535,0.929293,0.646465,0.000000,1.000000,0.070707,0.747475,0.272727,0.959596,0.555556,0.288462,0.111111,0.262626,0.838384,0.969697,0.040404,0.666667,0.282828,0.969697,0.040404,0.646465,0.238095,0.4,0.566667,0.584818,1.0,0.666667,0.196721
94523,0.494845,0.833333,1.000000,0.292929,0.383838,0.222222,0.000000,0.676768,0.333333,0.818182,0.010101,0.434343,0.747475,0.707071,0.151515,0.111111,0.080808,0.122500,0.142000,0.230769,0.153846,0.626263,0.383838,0.313131,0.737374,0.585859,0.858586,0.464646,0.030303,0.080808,0.252525,0.484848,0.848485,0.000000,0.383838,0.060606,0.101010,0.080808,0.030303,0.121212,0.383838,0.757576,0.000000,0.103876,0.242424,0.323232,0.474747,0.535354,0.111111,0.393939,0.747475,0.696970,0.525253,0.484848,0.494949,1.000000,0.282828,0.383838,0.626263,0.737374,0.585859,0.615385,0.090909,0.191919,0.363636,0.515152,0.494949,0.646465,0.202020,0.787879,0.222222,0.686869,0.428571,0.6,0.250000,0.899153,0.0,0.666667,0.901639
197,0.587629,1.000000,1.000000,0.313131,0.343434,1.000000,1.000000,0.000000,0.000000,0.050505,0.949495,0.575758,0.979798,0.959596,0.020202,0.010101,0.000000,0.033167,0.036500,0.230769,0.230769,0.535354,0.474747,0.000000,0.767677,0.272727,0.888889,0.020202,0.000000,0.000000,0.000000,0.010101,0.020202,0.000000,0.313131,0.000000,0.000000,0.000000,0.010101,0.070707,0.646465,1.000000,0.230769,0.088372,0.484848,0.070707,0.434343,0.575758,0.252525,0.828283,0.505051,0.414141,0.464646,0.222222,0.343434,1.000000,0.313131,0.343434,0.898990,1.000000,0.181818,0.730769,0.000000,0.000000,0.070707,0.141414,0.868687,0.949495,0.000000,1.000000,0.000000,1.000000,0.238095,0.2,0.316667,0.356447,0.0,0.666667,0.754098
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85593,0.742268,1.000000,1.000000,0.494949,0.252525,1.000000,1.000000,0.000000,0.000000,0.969697,0.020202,0.646465,1.000000,1.000000,0.000000,0.000000,0.000000,0.246000,0.253000,0.692308,0.769231,0.959596,0.050505,0.000000,0.939394,0.878788,0.969697,0.656566,0.070707,0.474747,0.979798,1.000000,1.000000,0.010101,0.969697,0.000000,0.000000,0.000000,1.000000,1.000000,1.000000,1.000000,0.662393,0.426357,0.030303,0.555556,0.343434,0.666667,0.000000,0.808081,0.828283,0.818182,0.626263,0.555556,1.000000,1.000000,0.494949,0.252525,0.303030,0.989899,0.898990,0.307692,0.020202,0.020202,0.959596,1.000000,0.000000,0.484848,0.525253,1.000000,0.000000,1.000000,0.571429,0.8,0.266667,0.048708,1.0,0.000000,0.016393
18467,0.701031,0.500000,0.666667,0.464646,0.141414,1.000000,1.000000,0.000000,0.000000,0.151515,0.777778,0.717172,0.909091,0.727273,0.080808,0.000000,0.000000,0.096333,0.099000,0.307692,0.230769,0.828283,0.181818,0.000000,0.898990,0.575758,0.949495,0.101010,0.000000,0.010101,0.020202,0.141414,0.686869,0.000000,0.404040,0.000000,0.000000,0.000000,0.070707,0.434343,0.707071,0.757576,0.068376,0.314729,0.191919,0.212121,0.363636,0.646465,0.020202,0.696970,0.717172,0.666667,0.808081,0.454545,0.808081,0.000000,0.464646,0.141414,0.808081,0.909091,0.626263,0.211538,0.020202,0.575758,0.939394,0.939394,0.070707,0.959596,0.020202,1.000000,0.000000,1.000000,0.380952,0.8,0.200000,0.662711,0.0,0.666667,0.327869
24270,0.624862,0.500000,0.444444,0.313131,0.070707,0.555556,0.070707,0.000000,0.939394,0.969697,0.010101,0.555556,0.484848,0.474747,0.070707,0.020202,0.010101,0.155833,0.176167,0.307692,0.230769,0.818182,0.191919,0.000000,0.838384,0.727273,0.909091,0.606061,0.050505,0.131313,0.404040,0.787879,0.969697,0.010101,0.494949,0.010101,0.454545,0.010101,0.111111,0.272727,0.808081,0.919192,0.876068,0.162791,0.070707,0.393939,0.575758,0.434343,0.282828,0.737374,0.828283,0.818182,0.737374,0.555556,1.000000,0.858586,0.313131,0.070707,0.616162,0.979798,0.747475,0.230769,0.252525,0.464646,0.828283,0.959596,0.050505,0.000000,0.767677,0.252525,0.717172,0.060606,0.333333,0.4,0.350000,0.996538,0.0,0.000000,0.606557
87262,0.443299,0.666667,1.000000,0.343434,0.171717,0.666667,0.000000,0.979798,0.030303,0.787879,0.020202,0.505051,0.696970,0.696970,0.121212,0.111111,0.111111,0.082500,0.087333,0.230769,0.230769,0.777778,0.232323,0.020202,0.777778,0.606061,0.898990,0.505051,0.000000,0.000000,0.010101,0.101010,0.484848,0.000000,0.343434,0.101010,0.171717,0.111111,0.040404,0.151515,0.484848,0.838384,0.914530,0.417054,0.181818,0.282828,0.696970,0.313131,0.404040,0.525253,0.898990,0.848485,0.535354,0.484848,1.000000,1.000000,0.343434,0.171717,0.656566,0.979798,0.646465,0.173077,0.303030,0.646465,0.878788,0.919192,0.090909,0.737374,0.262626,1.000000,0.000000,0.939394,0.285714,0.4,0.350000,0.595518,0.0,1.000000,0.622951


In [47]:
# Step 1: One-Hot Encode the categorical features
encoder = OneHotEncoder(handle_unknown='ignore').fit(train_cat)  # Create the encoder object
# Fit the encoder on the training data and transform both training and test data
train_cat_encoded = encoder.transform(train_cat).toarray()
test_cat_encoded = encoder.transform(test_cat).toarray()

# Convert the encoded categorical features back to DataFrames with column names
train_cat_encoded_df = pd.DataFrame(train_cat_encoded,columns=encoder.get_feature_names_out())
test_cat_encoded_df = pd.DataFrame(test_cat_encoded,columns=encoder.get_feature_names_out())

In [48]:
# # Step 2: Concatenate numerical and encoded categorical features
X_train = pd.concat([train_num_scaled_df.reset_index(drop=True), train_cat_encoded_df.reset_index(drop=True)], axis=1)
X_test = pd.concat([test_num_scaled_df.reset_index(drop=True), test_cat_encoded_df.reset_index(drop=True)], axis=1)

In [49]:
# quicker way to upsample category 1:
trainset = pd.concat([X_train, y_train.reset_index(drop=True)], axis=1)
category_1_upsampled = trainset[trainset['TARGET_B']==1].sample(len(trainset[trainset['TARGET_B']==0]),replace = True)
print(category_1_upsampled.shape)

category_0 = trainset[trainset['TARGET_B']== 0 ]
print(category_0.shape)
trainset_new = pd.concat([category_1_upsampled, category_0], axis = 0)

(67927, 419)
(67927, 419)


In [50]:
trainset_new = trainset_new.sample(frac=1) #randomize the rows
X_train_treated_upsampled = trainset_new.drop(['TARGET_B'], axis=1)
y_train = trainset_new['TARGET_B']

print(X_train_treated_upsampled.shape)

(135854, 418)


# Apply RandomForestClassifier model 

In [51]:
clf = RandomForestClassifier(max_depth=5, # max number of questions to ask
                             min_samples_split=20, # amount of rows still considered at every question
                             min_samples_leaf =20, # ultimate answer based on at least this many rows
                             max_samples=0.8, # fraction of X-train to use in each tree
                             random_state=42)
clf.fit(X_train_treated_upsampled, y_train)
print(clf.score(X_train_treated_upsampled, y_train))
print(clf.score(X_test, y_test))

y_pred = clf.predict(X_test)
display(y_test.value_counts())
display(confusion_matrix(y_test, y_pred))

0.6257673679096677
0.6126273424726449


TARGET_B
0           22642
1            1211
Name: count, dtype: int64

array([[13968,  8674],
       [  566,   645]], dtype=int64)

In [52]:
# Discuss the output and its impact in the business scenario. Is the cost of a false positive equals to the cost of the false negative? How would you change your algorithm or data in order to maximize the return of the business?

# Adusting the threshold value or add new features or selecting different model may help improve the model

In [69]:
# Concatenate X_train and X_test and reset index
X = pd.concat([X_train_treated_upsampled, X_test], axis=0).reset_index(drop=True)
y_train = pd.DataFrame(y_train)
# Concatenate y_train and y_test and reset index
y = pd.concat([y_train, y_test], axis=0).reset_index(drop=True)

In [70]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X, y)

  return fit_method(estimator, *args, **kwargs)


In [71]:
# Make predictions on all data
y_pred_all = rf_classifier.predict(X)

# Optionally, you can append the predictions to your DataFrame
X['predicted_target'] = y_pred_all

# Print or inspect the predictions
print(X['predicted_target'])

0         1
1         1
2         1
3         1
4         1
         ..
159702    0
159703    0
159704    0
159705    0
159706    0
Name: predicted_target, Length: 159707, dtype: int64


In [72]:
X['actual_target'] = y
X

Unnamed: 0,AGE,INCOME,WEALTH1,VIETVETS,WWIIVETS,WEALTH2,POP90C1,POP90C2,POP90C3,ETH1,ETH2,HHN3,DW1,DW2,DW4,DW5,DW6,HV1,HV2,HV3,HV4,HU1,HU2,HU5,HHD2,HHD3,HHD5,ETHC2,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR2,HUPA2,HUPA3,HUPA6,RP1,RP2,RP3,RP4,MSA,ADI,IC6,HHAS3,MC1,MC2,PEC2,TPE13,LFC2,LFC4,LFC6,LFC7,LFC8,LFC9,VC1,VC3,POBC2,LSC1,VOC2,HC2,HC4,HC5,HC6,HC7,HC8,HC11,HC13,HC17,HC18,HC19,MHUC1,MHUC2,CARDPROM,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2,STATE_CA,STATE_FL,STATE_GA,STATE_IL,STATE_IN,STATE_MI,STATE_MO,STATE_NC,STATE_TX,STATE_WA,STATE_WI,STATE_other,CLUSTER_1,CLUSTER_2,CLUSTER_3,CLUSTER_4,CLUSTER_5,CLUSTER_6,CLUSTER_7,CLUSTER_8,CLUSTER_9,CLUSTER_10,CLUSTER_11,CLUSTER_12,CLUSTER_13,CLUSTER_14,CLUSTER_15,CLUSTER_16,CLUSTER_17,CLUSTER_18,CLUSTER_19,CLUSTER_20,CLUSTER_21,CLUSTER_22,CLUSTER_23,CLUSTER_24,CLUSTER_25,CLUSTER_26,CLUSTER_27,CLUSTER_28,CLUSTER_29,CLUSTER_30,CLUSTER_31,CLUSTER_32,CLUSTER_33,CLUSTER_34,CLUSTER_35,CLUSTER_36,CLUSTER_37,CLUSTER_38,CLUSTER_39,CLUSTER_40,CLUSTER_41,CLUSTER_42,CLUSTER_43,CLUSTER_44,CLUSTER_45,CLUSTER_46,CLUSTER_47,CLUSTER_48,CLUSTER_49,CLUSTER_50,CLUSTER_51,CLUSTER_52,CLUSTER_53,HOMEOWNR_H,HOMEOWNR_U,GENDER_F,GENDER_M,GENDER_other,DATASRCE_1,DATASRCE_2,DATASRCE_3,RFA_2R_L,RFA_2A_D,RFA_2A_E,RFA_2A_F,RFA_2A_G,GEOCODE2_A,GEOCODE2_B,GEOCODE2_C,GEOCODE2_D,DOMAIN_A_C,DOMAIN_A_R,DOMAIN_A_S,DOMAIN_A_T,DOMAIN_A_U,DOMAIN_B_1,DOMAIN_B_2,DOMAIN_B_3,DOMAIN_B_4,ODATEW_YR_83,ODATEW_YR_84,ODATEW_YR_85,ODATEW_YR_86,ODATEW_YR_87,ODATEW_YR_88,ODATEW_YR_89,ODATEW_YR_90,ODATEW_YR_91,ODATEW_YR_92,ODATEW_YR_93,ODATEW_YR_94,ODATEW_YR_95,ODATEW_YR_96,ODATEW_YR_97,ODATEW_MM_1,ODATEW_MM_2,ODATEW_MM_3,ODATEW_MM_4,ODATEW_MM_5,ODATEW_MM_6,ODATEW_MM_7,ODATEW_MM_9,ODATEW_MM_10,ODATEW_MM_11,ODATEW_MM_12,DOB_YR_0,DOB_YR_1,DOB_YR_2,DOB_YR_4,DOB_YR_5,DOB_YR_6,DOB_YR_7,DOB_YR_8,DOB_YR_9,DOB_YR_10,DOB_YR_11,DOB_YR_12,DOB_YR_13,DOB_YR_14,DOB_YR_15,DOB_YR_16,DOB_YR_17,DOB_YR_18,DOB_YR_19,DOB_YR_20,DOB_YR_21,DOB_YR_22,DOB_YR_23,DOB_YR_24,DOB_YR_25,DOB_YR_26,DOB_YR_27,DOB_YR_28,DOB_YR_29,DOB_YR_30,DOB_YR_31,DOB_YR_32,DOB_YR_33,DOB_YR_34,DOB_YR_35,DOB_YR_36,DOB_YR_37,DOB_YR_38,DOB_YR_39,DOB_YR_40,DOB_YR_41,DOB_YR_42,DOB_YR_43,DOB_YR_44,DOB_YR_45,DOB_YR_46,DOB_YR_47,DOB_YR_48,DOB_YR_49,DOB_YR_50,DOB_YR_51,DOB_YR_52,DOB_YR_53,DOB_YR_54,DOB_YR_55,DOB_YR_56,DOB_YR_57,DOB_YR_58,DOB_YR_59,DOB_YR_60,DOB_YR_61,DOB_YR_62,DOB_YR_63,DOB_YR_64,DOB_YR_65,DOB_YR_66,DOB_YR_67,DOB_YR_68,DOB_YR_69,DOB_YR_70,DOB_YR_71,DOB_YR_72,DOB_YR_73,DOB_YR_74,DOB_YR_75,DOB_YR_76,DOB_YR_77,DOB_YR_78,DOB_YR_79,DOB_YR_80,DOB_YR_81,DOB_YR_82,DOB_YR_84,DOB_YR_85,DOB_YR_86,DOB_YR_87,DOB_YR_88,DOB_YR_89,DOB_YR_90,DOB_YR_91,DOB_YR_92,DOB_YR_93,DOB_YR_95,DOB_YR_96,DOB_YR_97,DOB_MM_1,DOB_MM_2,DOB_MM_3,DOB_MM_4,DOB_MM_5,DOB_MM_6,DOB_MM_7,DOB_MM_8,DOB_MM_9,DOB_MM_10,DOB_MM_11,DOB_MM_12,MINRDATE_YR_75,MINRDATE_YR_77,MINRDATE_YR_78,MINRDATE_YR_80,MINRDATE_YR_82,MINRDATE_YR_83,MINRDATE_YR_84,MINRDATE_YR_85,MINRDATE_YR_86,MINRDATE_YR_87,MINRDATE_YR_88,MINRDATE_YR_89,MINRDATE_YR_90,MINRDATE_YR_91,MINRDATE_YR_92,MINRDATE_YR_93,MINRDATE_YR_94,MINRDATE_YR_95,MINRDATE_YR_96,MINRDATE_YR_97,MINRDATE_MM_1,MINRDATE_MM_2,MINRDATE_MM_3,MINRDATE_MM_4,MINRDATE_MM_5,MINRDATE_MM_6,MINRDATE_MM_7,MINRDATE_MM_8,MINRDATE_MM_9,MINRDATE_MM_10,MINRDATE_MM_11,MINRDATE_MM_12,MAXRDATE_YR_75,MAXRDATE_YR_79,MAXRDATE_YR_82,MAXRDATE_YR_83,MAXRDATE_YR_84,MAXRDATE_YR_85,MAXRDATE_YR_86,MAXRDATE_YR_87,MAXRDATE_YR_88,MAXRDATE_YR_89,MAXRDATE_YR_90,MAXRDATE_YR_91,MAXRDATE_YR_92,MAXRDATE_YR_93,MAXRDATE_YR_94,MAXRDATE_YR_95,MAXRDATE_YR_96,MAXRDATE_YR_97,MAXRDATE_MM_1,MAXRDATE_MM_2,MAXRDATE_MM_3,MAXRDATE_MM_4,MAXRDATE_MM_5,MAXRDATE_MM_6,MAXRDATE_MM_7,MAXRDATE_MM_8,MAXRDATE_MM_9,MAXRDATE_MM_10,MAXRDATE_MM_11,MAXRDATE_MM_12,LASTDATE_YR_95,LASTDATE_YR_96,LASTDATE_YR_97,LASTDATE_MM_1,LASTDATE_MM_2,LASTDATE_MM_3,LASTDATE_MM_4,LASTDATE_MM_5,LASTDATE_MM_6,LASTDATE_MM_7,LASTDATE_MM_8,LASTDATE_MM_9,LASTDATE_MM_10,LASTDATE_MM_11,LASTDATE_MM_12,FIRSTDATE_YR_0,FIRSTDATE_YR_49,FIRSTDATE_YR_72,FIRSTDATE_YR_74,FIRSTDATE_YR_75,FIRSTDATE_YR_76,FIRSTDATE_YR_77,FIRSTDATE_YR_78,FIRSTDATE_YR_79,FIRSTDATE_YR_80,FIRSTDATE_YR_81,FIRSTDATE_YR_82,FIRSTDATE_YR_83,FIRSTDATE_YR_84,FIRSTDATE_YR_85,FIRSTDATE_YR_86,FIRSTDATE_YR_87,FIRSTDATE_YR_88,FIRSTDATE_YR_89,FIRSTDATE_YR_90,FIRSTDATE_YR_91,FIRSTDATE_YR_92,FIRSTDATE_YR_93,FIRSTDATE_YR_94,FIRSTDATE_YR_95,FIRSTDATE_YR_96,FIRSTDATE_MM_1,FIRSTDATE_MM_2,FIRSTDATE_MM_3,FIRSTDATE_MM_4,FIRSTDATE_MM_5,FIRSTDATE_MM_6,FIRSTDATE_MM_7,FIRSTDATE_MM_8,FIRSTDATE_MM_9,FIRSTDATE_MM_10,FIRSTDATE_MM_11,FIRSTDATE_MM_12,predicted_target,actual_target
0,0.624862,0.166667,0.666667,0.262626,0.474747,0.666667,0.000000,1.000000,0.000000,0.939394,0.060606,0.242424,0.606061,0.595960,0.393939,0.252525,0.121212,0.096000,0.100833,0.153846,0.153846,0.525253,0.484848,0.161616,0.505051,0.414141,0.717172,0.565657,0.000000,0.010101,0.050505,0.272727,0.646465,0.000000,0.434343,0.020202,0.000000,0.121212,0.020202,0.070707,0.343434,0.676768,0.000000,0.277519,0.373737,0.323232,0.606061,0.404040,0.040404,0.131313,0.848485,0.848485,0.727273,0.303030,1.000000,0.000000,0.262626,0.474747,0.787879,1.000000,0.333333,0.865385,0.060606,0.060606,0.060606,0.101010,0.909091,0.888889,0.121212,1.000000,0.000000,1.000000,0.285714,0.4,0.366667,0.277021,1.0,0.333333,0.836066,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1
1,0.690722,0.833333,0.666667,0.181818,0.252525,0.666667,0.000000,0.000000,1.000000,0.979798,0.010101,0.505051,0.919192,0.909091,0.030303,0.000000,0.000000,0.094667,0.096000,0.230769,0.230769,0.878788,0.131313,0.151515,0.797980,0.676768,0.888889,0.636364,0.000000,0.000000,0.040404,0.171717,0.646465,0.000000,0.585859,0.000000,0.050505,0.000000,0.111111,0.232323,0.646465,0.888889,0.230769,0.088372,0.151515,0.505051,0.343434,0.666667,0.616162,0.666667,0.787879,0.737374,0.575758,0.494949,0.484848,1.000000,0.181818,0.252525,0.929293,0.979798,0.707071,0.615385,0.070707,0.151515,0.363636,0.515152,0.494949,0.666667,0.030303,0.030303,0.979798,0.040404,0.285714,0.6,0.516667,0.364368,1.0,0.000000,0.655738,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1
2,0.762887,0.000000,1.000000,0.191919,0.747475,0.000000,1.000000,0.000000,0.000000,0.959596,0.010101,0.393939,1.000000,0.717172,0.000000,0.000000,0.000000,0.224167,0.226500,0.538462,0.538462,0.919192,0.090909,0.000000,0.787879,0.717172,0.898990,0.686869,0.030303,0.292929,0.838384,1.000000,1.000000,0.000000,0.727273,0.000000,0.000000,0.000000,0.888889,0.929293,0.929293,0.929293,0.021368,0.568992,0.030303,0.727273,0.474747,0.535354,0.020202,0.797980,0.747475,0.747475,0.303030,0.191919,0.000000,0.000000,0.323232,0.333333,0.181818,0.919192,0.818182,0.250000,0.080808,0.393939,1.000000,1.000000,0.000000,0.949495,0.030303,1.000000,0.000000,1.000000,0.476190,0.6,0.250000,0.688459,1.0,0.333333,0.049180,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1
3,0.624862,0.666667,1.000000,0.272727,0.373737,1.000000,0.040404,0.010101,0.959596,0.909091,0.090909,0.444444,0.747475,0.737374,0.090909,0.050505,0.030303,0.079500,0.089667,0.153846,0.076923,0.757576,0.252525,0.090909,0.787879,0.646465,0.909091,0.565657,0.010101,0.010101,0.060606,0.181818,0.464646,0.000000,0.444444,0.010101,0.161616,0.020202,0.000000,0.020202,0.121212,0.313131,0.000000,0.305426,0.242424,0.313131,0.404040,0.606061,0.202020,0.737374,0.797980,0.737374,0.636364,0.535354,0.616162,0.535354,0.272727,0.363636,0.797980,0.989899,0.676768,0.423077,0.080808,0.181818,0.484848,0.666667,0.343434,0.272727,0.202020,0.797980,0.191919,0.282828,0.333333,0.6,0.133333,0.139058,0.0,1.000000,0.704918,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1,1
4,0.624862,0.000000,1.000000,0.333333,0.202020,0.222222,0.000000,0.000000,1.000000,0.101010,0.000000,0.646465,0.707071,0.636364,0.131313,0.050505,0.030303,0.065833,0.069167,0.076923,0.076923,0.474747,0.535354,0.292929,0.808081,0.464646,0.898990,0.070707,0.000000,0.000000,0.010101,0.050505,0.292929,0.000000,0.272727,0.020202,0.161616,0.030303,0.000000,0.010101,0.070707,0.222222,0.000000,0.463566,0.545455,0.171717,0.434343,0.575758,0.030303,0.292929,0.636364,0.414141,0.484848,0.333333,0.575758,0.808081,0.353535,0.232323,0.888889,0.909091,0.404040,0.326923,0.060606,0.191919,0.656566,0.777778,0.232323,0.565657,0.272727,0.848485,0.151515,0.828283,0.333333,0.4,0.616667,0.453774,1.0,0.000000,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159702,0.422680,0.666667,0.777778,0.595960,0.000000,1.000000,1.000000,0.000000,0.000000,0.969697,0.000000,0.707071,1.000000,1.000000,0.000000,0.000000,0.000000,0.111667,0.112333,0.461538,0.461538,0.969697,0.040404,0.040404,0.898990,0.828283,0.959596,0.606061,0.000000,0.000000,0.000000,0.191919,0.989899,0.000000,0.838384,0.000000,0.000000,0.000000,0.888889,0.888889,0.949495,0.949495,0.632479,0.466667,0.020202,0.474747,0.646465,0.363636,0.898990,0.676768,0.939394,0.939394,0.696970,0.626263,1.000000,1.000000,0.595960,0.000000,0.656566,0.979798,0.949495,0.192308,0.363636,0.545455,1.000000,1.000000,0.000000,0.949495,0.060606,1.000000,0.000000,1.000000,0.380952,0.0,0.233333,0.557791,1.0,0.000000,0.311475,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
159703,0.329897,0.833333,0.777778,0.191919,0.565657,0.777778,1.000000,0.000000,0.000000,0.989899,0.000000,0.393939,1.000000,0.969697,0.000000,0.000000,0.000000,0.162833,0.203000,0.461538,0.384615,0.959596,0.050505,0.262626,0.858586,0.767677,0.929293,0.515152,0.070707,0.131313,0.464646,0.919192,1.000000,0.050505,0.888889,0.000000,0.000000,0.000000,0.777778,0.848485,0.898990,0.909091,0.215812,0.510078,0.101010,0.676768,0.414141,0.595960,0.070707,0.696970,0.595960,0.575758,0.707071,0.636364,0.606061,0.000000,0.191919,0.565657,0.252525,0.949495,0.676768,0.269231,0.090909,0.383838,0.949495,1.000000,0.010101,0.000000,0.818182,1.000000,0.000000,0.989899,0.380952,0.6,0.333333,0.169703,1.0,0.333333,0.196721,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0
159704,0.567010,0.500000,0.333333,0.343434,0.363636,1.000000,0.000000,0.828283,0.181818,0.989899,0.010101,0.383838,0.767677,0.757576,0.202020,0.161616,0.111111,0.080000,0.087667,0.153846,0.153846,0.666667,0.343434,0.030303,0.696970,0.616162,0.818182,0.555556,0.000000,0.010101,0.050505,0.161616,0.474747,0.000000,0.545455,0.090909,0.030303,0.111111,0.010101,0.070707,0.242424,0.606061,0.000000,0.513178,0.242424,0.515152,0.434343,0.575758,0.080808,0.171717,0.757576,0.737374,0.767677,0.666667,1.000000,1.000000,0.252525,0.414141,0.787879,0.969697,0.616162,0.557692,0.030303,0.121212,0.343434,0.484848,0.525253,0.636364,0.252525,0.848485,0.161616,0.848485,0.428571,0.4,0.150000,0.558897,1.0,0.000000,0.934426,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0,0
159705,0.690722,0.666667,1.000000,0.242424,0.575758,1.000000,1.000000,0.000000,0.000000,0.979798,0.000000,0.404040,1.000000,1.000000,0.000000,0.000000,0.000000,0.827333,0.808667,0.923077,0.923077,0.959596,0.050505,0.000000,0.797980,0.737374,0.868687,0.535354,0.989899,1.000000,1.000000,1.000000,1.000000,0.898990,0.888889,0.000000,0.000000,0.000000,1.000000,1.000000,1.000000,1.000000,0.782051,0.023256,0.000000,0.828283,0.212121,0.797980,0.030303,0.767677,0.656566,0.656566,0.585859,0.373737,0.000000,0.000000,0.242424,0.575758,0.414141,0.959596,0.868687,0.634615,0.000000,0.020202,0.212121,0.393939,0.616162,0.979798,0.000000,1.000000,0.000000,1.000000,0.523810,0.4,0.066667,0.798823,0.0,0.000000,0.163934,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


In [73]:
num_correct_predictions = (X['predicted_target'] == X['actual_target']).sum()

# Calculate the total number of predictions
total_predictions = len(X)

# Calculate the accuracy
accuracy = num_correct_predictions / total_predictions
accuracy

1.0

In [74]:
X.to_csv('data_with_predictions.csv',index=False)

In [None]:
import pickle

with open('random_forest_model.pkl', 'wb') as file:
    pickle.dump(rf_classifier, file)