In [1]:
#import dependencies
import pandas as pd
import tensorflow as tf
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
#read in merged and cleaned data
df = pd.read_csv('../modified_data/pol_svi_sc_merged.csv')
df

Unnamed: 0,FIPS,3/31/21,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,...,Hopefulness,Income Per Capita,Neuroticism,Openness,Religiosity,Risk Taking,Selflessness,Tolerance,Work Ethic,dem_pct
0,1001,6589,594.443459,55200.0,23315.0,21115.0,8422.0,1065.0,29372.0,4204.0,...,91.163142,26168.0,77.925476,78.222354,91.106719,53.333333,82.142857,70.000000,60.380952,27.018365
1,1003,20505,1589.793007,208107.0,111945.0,78622.0,21653.0,4343.0,31203.0,14310.0,...,82.484017,28069.0,77.232120,80.086368,71.771566,67.272980,75.586018,66.983549,70.972246,22.409030
2,1005,2227,885.001636,25782.0,11937.0,9186.0,6597.0,918.0,18461.0,4901.0,...,61.927181,17249.0,80.375206,78.783778,73.657368,76.066481,78.753019,65.170377,68.704105,45.788173
3,1007,2542,622.461089,22527.0,9161.0,6840.0,2863.0,658.0,20199.0,2650.0,...,85.258871,18988.0,80.813736,77.837027,69.974652,75.136154,76.929754,69.859503,67.931677,20.698280
4,1009,6444,644.830460,57645.0,24222.0,20600.0,8220.0,909.0,22656.0,7861.0,...,79.492703,21033.0,78.764620,78.193105,92.045455,57.603815,79.307632,64.953288,76.000000,9.569378
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3053,56037,4022,10426.975725,44117.0,19628.0,15871.0,5237.0,1213.0,32624.0,2549.0,...,82.403142,30945.0,79.384759,79.347081,68.147062,73.938691,76.390464,67.420658,70.956334,22.894957
3054,56039,3609,3996.844622,23059.0,13680.0,9158.0,1619.0,210.0,53703.0,958.0,...,84.036899,46499.0,71.547359,80.522872,65.399695,79.598153,79.698193,70.877600,70.938645,66.599040
3055,56041,2128,2081.719807,20609.0,8972.0,7735.0,2552.0,614.0,27009.0,934.0,...,84.089095,25636.0,78.771570,77.859042,67.603416,69.705859,73.332067,67.404487,69.299391,16.819960
3056,56043,891,2238.672972,8129.0,3868.0,3422.0,984.0,253.0,27556.0,590.0,...,87.485019,26325.0,76.249370,77.658224,67.412774,82.820701,78.925326,74.628788,70.050103,16.145833


In [3]:
#set index to FIPS
df = df.set_index(df['FIPS'])
df= df.drop('FIPS', axis=1)

In [4]:
#rename target column
df = df.rename(columns={'3/31/21':'first_year_cases'})

In [5]:
#create variable for case% for each counties population
df['case_pct'] = df['first_year_cases']/df['E_TOTPOP']*100
df['case_pct'].head()

FIPS
1001    11.936594
1003     9.853104
1005     8.637809
1007    11.284237
1009    11.178767
Name: case_pct, dtype: float64

In [6]:
df['case_pct'].describe()

count    3058.000000
mean        9.426600
std         3.045809
min         0.000000
25%         7.713422
50%         9.466675
75%        11.176131
max        38.010657
Name: case_pct, dtype: float64

In [7]:
# bin and cut the case_pct column into 2 classifications
# q = df['case_pct'].quantile(.75)
q = df['case_pct'].quantile(.9)
bins = [0, q , 40]
labels = ['low','high']
df['case_class'] = pd.cut(df['case_pct'], bins, labels = labels)
df['case_class']

FIPS
1001      low
1003      low
1005      low
1007      low
1009      low
         ... 
56037     low
56039    high
56041     low
56043     low
56045     low
Name: case_class, Length: 3058, dtype: category
Categories (2, object): ['low' < 'high']

In [8]:
df['case_class'].value_counts()

low     2728
high     306
Name: case_class, dtype: int64

In [9]:
#drop unneeded columns
df = df.drop('case_pct', axis =1)
df

Unnamed: 0_level_0,first_year_cases,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,...,Income Per Capita,Neuroticism,Openness,Religiosity,Risk Taking,Selflessness,Tolerance,Work Ethic,dem_pct,case_class
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,6589,594.443459,55200.0,23315.0,21115.0,8422.0,1065.0,29372.0,4204.0,8050.0,...,26168.0,77.925476,78.222354,91.106719,53.333333,82.142857,70.000000,60.380952,27.018365,low
1003,20505,1589.793007,208107.0,111945.0,78622.0,21653.0,4343.0,31203.0,14310.0,40665.0,...,28069.0,77.232120,80.086368,71.771566,67.272980,75.586018,66.983549,70.972246,22.409030,low
1005,2227,885.001636,25782.0,11937.0,9186.0,6597.0,918.0,18461.0,4901.0,4634.0,...,17249.0,80.375206,78.783778,73.657368,76.066481,78.753019,65.170377,68.704105,45.788173,low
1007,2542,622.461089,22527.0,9161.0,6840.0,2863.0,658.0,20199.0,2650.0,3661.0,...,18988.0,80.813736,77.837027,69.974652,75.136154,76.929754,69.859503,67.931677,20.698280,low
1009,6444,644.830460,57645.0,24222.0,20600.0,8220.0,909.0,22656.0,7861.0,10233.0,...,21033.0,78.764620,78.193105,92.045455,57.603815,79.307632,64.953288,76.000000,9.569378,low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56037,4022,10426.975725,44117.0,19628.0,15871.0,5237.0,1213.0,32624.0,2549.0,4721.0,...,30945.0,79.384759,79.347081,68.147062,73.938691,76.390464,67.420658,70.956334,22.894957,low
56039,3609,3996.844622,23059.0,13680.0,9158.0,1619.0,210.0,53703.0,958.0,3135.0,...,46499.0,71.547359,80.522872,65.399695,79.598153,79.698193,70.877600,70.938645,66.599040,high
56041,2128,2081.719807,20609.0,8972.0,7735.0,2552.0,614.0,27009.0,934.0,2498.0,...,25636.0,78.771570,77.859042,67.603416,69.705859,73.332067,67.404487,69.299391,16.819960,low
56043,891,2238.672972,8129.0,3868.0,3422.0,984.0,253.0,27556.0,590.0,1686.0,...,26325.0,76.249370,77.658224,67.412774,82.820701,78.925326,74.628788,70.050103,16.145833,low


In [10]:
#turn case % classifications into binary 
df = pd.get_dummies(df, columns = ['case_class'])
df

Unnamed: 0_level_0,first_year_cases,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,...,Neuroticism,Openness,Religiosity,Risk Taking,Selflessness,Tolerance,Work Ethic,dem_pct,case_class_low,case_class_high
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,6589,594.443459,55200.0,23315.0,21115.0,8422.0,1065.0,29372.0,4204.0,8050.0,...,77.925476,78.222354,91.106719,53.333333,82.142857,70.000000,60.380952,27.018365,1,0
1003,20505,1589.793007,208107.0,111945.0,78622.0,21653.0,4343.0,31203.0,14310.0,40665.0,...,77.232120,80.086368,71.771566,67.272980,75.586018,66.983549,70.972246,22.409030,1,0
1005,2227,885.001636,25782.0,11937.0,9186.0,6597.0,918.0,18461.0,4901.0,4634.0,...,80.375206,78.783778,73.657368,76.066481,78.753019,65.170377,68.704105,45.788173,1,0
1007,2542,622.461089,22527.0,9161.0,6840.0,2863.0,658.0,20199.0,2650.0,3661.0,...,80.813736,77.837027,69.974652,75.136154,76.929754,69.859503,67.931677,20.698280,1,0
1009,6444,644.830460,57645.0,24222.0,20600.0,8220.0,909.0,22656.0,7861.0,10233.0,...,78.764620,78.193105,92.045455,57.603815,79.307632,64.953288,76.000000,9.569378,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56037,4022,10426.975725,44117.0,19628.0,15871.0,5237.0,1213.0,32624.0,2549.0,4721.0,...,79.384759,79.347081,68.147062,73.938691,76.390464,67.420658,70.956334,22.894957,1,0
56039,3609,3996.844622,23059.0,13680.0,9158.0,1619.0,210.0,53703.0,958.0,3135.0,...,71.547359,80.522872,65.399695,79.598153,79.698193,70.877600,70.938645,66.599040,0,1
56041,2128,2081.719807,20609.0,8972.0,7735.0,2552.0,614.0,27009.0,934.0,2498.0,...,78.771570,77.859042,67.603416,69.705859,73.332067,67.404487,69.299391,16.819960,1,0
56043,891,2238.672972,8129.0,3868.0,3422.0,984.0,253.0,27556.0,590.0,1686.0,...,76.249370,77.658224,67.412774,82.820701,78.925326,74.628788,70.050103,16.145833,1,0


In [11]:
X = df.drop(columns= ['case_class_low','case_class_high']).values
y= df['case_class_high'].values

In [12]:
# Splitting into Train and Test sets.
X_train, X_test, y_train, y_test = train_test_split(X, y,train_size = .7, random_state=78)

In [13]:
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [14]:
rf_model = RandomForestClassifier(n_estimators=256, random_state=78) 

In [15]:
# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

In [16]:
# Making predictions using the testing data.
predictions = rf_model.predict(X_test_scaled)

In [17]:
predictions

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [18]:
# Calculating the confusion matrix.
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,816,2
Actual 1,96,4


In [19]:
# Creating the decision tree classifier instance.
model = tree.DecisionTreeClassifier()
model = model.fit(X_train_scaled, y_train)

In [20]:
# Making predictions using the testing data.
predictions = model.predict(X_test_scaled)
predictions

array([0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [21]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,752,66
Actual 1,70,30


In [22]:
# We can sort the features by their importance.
X = df.drop(['case_class_low','case_class_high'], axis=1).columns
importance_df = pd.DataFrame(sorted(zip(model.feature_importances_, X), reverse=True))
importance_df

Unnamed: 0,0,1
0,0.065265,first_year_cases
1,0.049575,E_TOTPOP
2,0.043317,E_HH
3,0.042389,AREA_SQMI
4,0.040079,Agreeableness
...,...,...
98,0.000000,EP_LIMENG
99,0.000000,EP_AGE65
100,0.000000,EPL_PCI
101,0.000000,EPL_CROWD


In [23]:
impact = importance_df.loc[importance_df[0]>=.01]
impact_col = impact[1].to_list()
impact_col

['first_year_cases',
 'E_TOTPOP',
 'E_HH',
 'AREA_SQMI',
 'Agreeableness',
 'EP_GROUPQ',
 'RPL_THEME4',
 'Employment Rate',
 'Conflict Awareness',
 'EP_CROWD',
 'SPL_THEMES',
 'Tolerance',
 'Empathy',
 'RPL_THEME1',
 'EPL_GROUPQ',
 'Selflessness',
 'E_UNEMP',
 'Conscientiousness',
 'Collectivism',
 'SPL_THEME1',
 'E_CROWD',
 'Extraversion',
 'Belief In Science',
 'Hopefulness',
 'EPL_MUNIT',
 'RPL_THEME2',
 'EPL_DISABL',
 'E_LIMENG',
 'EPL_NOHSDP',
 'E_MINRTY',
 'EPL_SNGPNT',
 'Openness',
 'EP_AGE17',
 'Gender Equality',
 'Income Per Capita',
 'dem_pct',
 'Work Ethic',
 'Neuroticism',
 'EP_MINRTY']

In [24]:
impact_df = df[impact_col]
impact_df

Unnamed: 0_level_0,first_year_cases,E_TOTPOP,E_HH,AREA_SQMI,Agreeableness,EP_GROUPQ,RPL_THEME4,Employment Rate,Conflict Awareness,EP_CROWD,...,E_MINRTY,EPL_SNGPNT,Openness,EP_AGE17,Gender Equality,Income Per Capita,dem_pct,Work Ethic,Neuroticism,EP_MINRTY
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,6589,55200.0,21115.0,594.443459,86.279655,1.0,0.3741,94.7,63.444323,1.4,...,13788.0,0.3792,78.222354,24.2,77.063492,26168.0,27.018365,60.380952,77.925476,25.0
1003,20505,208107.0,78622.0,1589.793007,85.603337,1.4,0.3359,94.6,63.751017,1.3,...,35339.0,0.1391,80.086368,21.9,69.016011,28069.0,22.409030,70.972246,77.232120,17.0
1005,2227,25782.0,9186.0,885.001636,87.711609,11.2,0.9889,91.4,51.165707,3.4,...,13884.0,0.9468,78.783778,21.1,69.323158,17249.0,45.788173,68.704105,80.375206,53.9
1007,2542,22527.0,6840.0,622.461089,84.830261,9.3,0.7189,93.4,61.796095,0.8,...,5726.0,0.1706,77.837027,20.7,68.844059,18988.0,20.698280,67.931677,80.813736,25.4
1009,6444,57645.0,20600.0,644.830460,85.548096,0.9,0.1741,94.5,63.136502,1.6,...,7413.0,0.2961,78.193105,23.4,64.585114,21033.0,9.569378,76.000000,78.764620,12.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56037,4022,44117.0,15871.0,10426.975725,83.811791,1.3,0.4120,94.0,62.498854,1.9,...,9010.0,0.7679,79.347081,26.8,68.112161,30945.0,22.894957,70.956334,79.384759,20.4
56039,3609,23059.0,9158.0,3996.844622,82.886955,4.0,0.6266,96.6,61.166618,7.2,...,4246.0,0.2216,80.522872,19.2,70.618318,46499.0,66.599040,70.938645,71.547359,18.4
56041,2128,20609.0,7735.0,2081.719807,84.272810,1.3,0.6657,94.2,60.175516,2.8,...,2567.0,0.2665,77.859042,29.5,71.058477,25636.0,16.819960,69.299391,78.771570,12.5
56043,891,8129.0,3422.0,2238.672972,80.773973,2.0,0.2751,95.3,55.520155,1.8,...,1463.0,0.2961,77.658224,23.9,67.383553,26325.0,16.145833,70.050103,76.249370,18.0


In [25]:
X = impact_df.values
y= df['case_class_high'].values

In [26]:
# Splitting into Train and Test sets.
X_train, X_test, y_train, y_test = train_test_split(X, y,train_size = .7, random_state=78)
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [27]:
from sklearn.ensemble import GradientBoostingClassifier
learning_rates = [0.05, 0.1, 0.25, 0.5, 0.75, 1]
for learning_rate in learning_rates:
    classifier = GradientBoostingClassifier(n_estimators=20,
    learning_rate=learning_rate,
    max_features=5,
    max_depth=3,
    random_state=0)
    classifier.fit(X_train_scaled, y_train)
    print("Learning rate: ", learning_rate)
    print("Accuracy score (training): {0:.3f}".format(
           classifier.score(
           X_train_scaled,
           y_train)))
    print("Accuracy score (validation): {0:.3f}".format(
           classifier.score(
           X_test_scaled,
           y_test)))

Learning rate:  0.05
Accuracy score (training): 0.905
Accuracy score (validation): 0.891
Learning rate:  0.1
Accuracy score (training): 0.909
Accuracy score (validation): 0.891
Learning rate:  0.25
Accuracy score (training): 0.929
Accuracy score (validation): 0.895
Learning rate:  0.5
Accuracy score (training): 0.946
Accuracy score (validation): 0.882
Learning rate:  0.75
Accuracy score (training): 0.951
Accuracy score (validation): 0.883
Learning rate:  1
Accuracy score (training): 0.957
Accuracy score (validation): 0.903


In [28]:
classifier = GradientBoostingClassifier(n_estimators=20,
   learning_rate=0.25, max_features=5, max_depth=3, random_state=0)

classifier.fit(X_train_scaled, y_train)
predictions = classifier.predict(X_test_scaled)

In [29]:
acc_score = accuracy_score(y_test, predictions)
print(f"Accuracy Score : {acc_score}")

Accuracy Score : 0.8954248366013072


In [30]:
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
   cm, index=["Actual 0", "Actual 1"],
   columns=["Predicted 0", "Predicted 1"]
)
display(cm_df)

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,813,5
Actual 1,91,9


In [31]:
print("Classification Report")
print(classification_report(y_test, predictions))

Classification Report
              precision    recall  f1-score   support

           0       0.90      0.99      0.94       818
           1       0.64      0.09      0.16       100

    accuracy                           0.90       918
   macro avg       0.77      0.54      0.55       918
weighted avg       0.87      0.90      0.86       918

