In [1]:
import os

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from IPython.display import Image
%matplotlib inline
plt.style.use('ggplot')

from sklearn.cross_validation import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report, roc_auc_score, roc_curve
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.datasets import make_classification;

pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 20)
pd.set_option('display.notebook_repr_html', True)



In [2]:
pwd

u'/Users/juliewang/DS-SF-36/ML_Challenge'

In [3]:
def read_dataset():
    return pd.read_csv(os.path.join('/Users/juliewang/DS-SF-36/ML_Challenge/dataset-13-walget-train.csv'))

df = read_dataset()

In [4]:
df

Unnamed: 0,AccountHolderImpliedGender,AccountHolderAddress,RecentlyPurchasedPregnancyTest,RecentlyPurchasedBirthControl,RecentlyPurchasedFeminineHygieneProducts,RecentlyPurchasedFolicAcidSupplements,RecentlyPurchasedPrenatalVitamins,RecentlyPurchasedPrenatalYogaDVD,RecentlyPurchasedBodyPillow,RecentlyPurchasedGingerAle,RecentlyPurchasedSeaBands,PurchasedCigarettesRegularlyUntilRecentlyThenStopped,RecentlyPurchasedCigarettes,RecentlyPurchasedSmokingCessationProducts,PurchasedWineRegularlyUntilRecentlyThenStopped,RecentlyPurchasedWine,RecentlyPurchasedMaternityClothing,IsPregnant
0,Female,Home,False,False,,False,True,False,False,False,True,False,False,False,True,False,False,True
1,Female,PO Box,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True
2,Male,Apartment,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,Male,Home,False,False,False,False,True,False,True,False,False,False,False,False,False,False,False,True
4,Female,PO Box,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5,Female,Home,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False
6,Male,Apartment,False,True,True,False,True,False,False,False,False,False,False,False,True,False,False,False
7,Male,Apartment,False,False,True,False,True,False,False,False,False,False,False,False,False,,False,True
8,Female,Apartment,True,False,False,True,True,False,False,True,False,True,False,False,False,False,False,True
9,Female,Apartment,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True


In [5]:
df.shape[0]

600

In [6]:
df.isnull().sum()

AccountHolderImpliedGender                              58
AccountHolderAddress                                     6
RecentlyPurchasedPregnancyTest                           6
RecentlyPurchasedBirthControl                            7
RecentlyPurchasedFeminineHygieneProducts                10
RecentlyPurchasedFolicAcidSupplements                    1
RecentlyPurchasedPrenatalVitamins                        6
RecentlyPurchasedPrenatalYogaDVD                         6
RecentlyPurchasedBodyPillow                              9
RecentlyPurchasedGingerAle                               6
RecentlyPurchasedSeaBands                               11
PurchasedCigarettesRegularlyUntilRecentlyThenStopped     5
RecentlyPurchasedCigarettes                              9
RecentlyPurchasedSmokingCessationProducts               12
PurchasedWineRegularlyUntilRecentlyThenStopped          12
RecentlyPurchasedWine                                    8
RecentlyPurchasedMaternityClothing                      

In [7]:
df.dtypes

AccountHolderImpliedGender                              object
AccountHolderAddress                                    object
RecentlyPurchasedPregnancyTest                          object
RecentlyPurchasedBirthControl                           object
RecentlyPurchasedFeminineHygieneProducts                object
RecentlyPurchasedFolicAcidSupplements                   object
RecentlyPurchasedPrenatalVitamins                       object
RecentlyPurchasedPrenatalYogaDVD                        object
RecentlyPurchasedBodyPillow                             object
RecentlyPurchasedGingerAle                              object
RecentlyPurchasedSeaBands                               object
PurchasedCigarettesRegularlyUntilRecentlyThenStopped    object
RecentlyPurchasedCigarettes                             object
RecentlyPurchasedSmokingCessationProducts               object
PurchasedWineRegularlyUntilRecentlyThenStopped          object
RecentlyPurchasedWine                                  

In [8]:
# 1 of 17
print pd.crosstab(df.IsPregnant, df.AccountHolderImpliedGender, margins=True)

def percConvert(ser):
  return ser/float(ser[-1])

print pd.crosstab(df["IsPregnant"],df["AccountHolderImpliedGender"],margins=True).apply(percConvert, axis=1)

AccountHolderImpliedGender  Female  Male  All
IsPregnant                                   
False                          136   138  274
True                           165   103  268
All                            301   241  542
AccountHolderImpliedGender    Female      Male  All
IsPregnant                                         
False                       0.496350  0.503650  1.0
True                        0.615672  0.384328  1.0
All                         0.555351  0.444649  1.0


In [9]:
# 2 of 17
print pd.crosstab(df.IsPregnant, df.AccountHolderAddress, margins=True)
print pd.crosstab(df["IsPregnant"],df["AccountHolderAddress"],margins=True).apply(percConvert, axis=1)

AccountHolderAddress  Apartment  Home  PO Box  All
IsPregnant                                        
False                       135   135      28  298
True                        108   160      28  296
All                         243   295      56  594
AccountHolderAddress  Apartment      Home    PO Box  All
IsPregnant                                              
False                  0.453020  0.453020  0.093960  1.0
True                   0.364865  0.540541  0.094595  1.0
All                    0.409091  0.496633  0.094276  1.0


In [10]:
# 3 of 17
print pd.crosstab(df.IsPregnant, df.RecentlyPurchasedPregnancyTest, margins=True)
print pd.crosstab(df["IsPregnant"],df["RecentlyPurchasedPregnancyTest"],margins=True).apply(percConvert, axis=1)

RecentlyPurchasedPregnancyTest  False  True  All
IsPregnant                                      
False                             292     4  296
True                              257    41  298
All                               549    45  594
RecentlyPurchasedPregnancyTest     False      True  All
IsPregnant                                             
False                           0.986486  0.013514  1.0
True                            0.862416  0.137584  1.0
All                             0.924242  0.075758  1.0


In [11]:
# 4 of 17
print pd.crosstab(df.IsPregnant, df.RecentlyPurchasedBirthControl, margins=True)
print pd.crosstab(df["IsPregnant"],df["RecentlyPurchasedBirthControl"],margins=True).apply(percConvert, axis=1)

RecentlyPurchasedBirthControl  False  True  All
IsPregnant                                     
False                            226    70  296
True                             290     7  297
All                              516    77  593
RecentlyPurchasedBirthControl     False      True  All
IsPregnant                                            
False                          0.763514  0.236486  1.0
True                           0.976431  0.023569  1.0
All                            0.870152  0.129848  1.0


In [12]:
# 5 of 17
print pd.crosstab(df.IsPregnant, df.RecentlyPurchasedFeminineHygieneProducts, margins=True)
print pd.crosstab(df["IsPregnant"],df["RecentlyPurchasedFeminineHygieneProducts"],margins=True).apply(percConvert, axis=1)

RecentlyPurchasedFeminineHygieneProducts  False  True  All
IsPregnant                                                
False                                       227    71  298
True                                        275    17  292
All                                         502    88  590
RecentlyPurchasedFeminineHygieneProducts     False      True  All
IsPregnant                                                       
False                                     0.761745  0.238255  1.0
True                                      0.941781  0.058219  1.0
All                                       0.850847  0.149153  1.0


In [13]:
# 6 of 17
print pd.crosstab(df.IsPregnant, df.RecentlyPurchasedFolicAcidSupplements, margins=True)
print pd.crosstab(df["IsPregnant"],df["RecentlyPurchasedFolicAcidSupplements"],margins=True).apply(percConvert, axis=1)

RecentlyPurchasedFolicAcidSupplements  False  True  All
IsPregnant                                             
False                                    300     0  300
True                                     233    66  299
All                                      533    66  599
RecentlyPurchasedFolicAcidSupplements     False      True  All
IsPregnant                                                    
False                                  1.000000  0.000000  1.0
True                                   0.779264  0.220736  1.0
All                                    0.889816  0.110184  1.0


In [14]:
# 7 of 17
print pd.crosstab(df.IsPregnant, df.RecentlyPurchasedPrenatalVitamins, margins=True)
print pd.crosstab(df["IsPregnant"],df["RecentlyPurchasedPrenatalVitamins"],margins=True).apply(percConvert, axis=1)

RecentlyPurchasedPrenatalVitamins  False  True  All
IsPregnant                                         
False                                283    13  296
True                                 229    69  298
All                                  512    82  594
RecentlyPurchasedPrenatalVitamins     False      True  All
IsPregnant                                                
False                              0.956081  0.043919  1.0
True                               0.768456  0.231544  1.0
All                                0.861953  0.138047  1.0


In [15]:
# 8 of 17
print pd.crosstab(df.IsPregnant, df.RecentlyPurchasedPrenatalYogaDVD, margins=True)
print pd.crosstab(df["IsPregnant"],df["RecentlyPurchasedPrenatalYogaDVD"],margins=True).apply(percConvert, axis=1)

RecentlyPurchasedPrenatalYogaDVD  False  True  All
IsPregnant                                        
False                               296     1  297
True                                286    11  297
All                                 582    12  594
RecentlyPurchasedPrenatalYogaDVD     False      True  All
IsPregnant                                               
False                             0.996633  0.003367  1.0
True                              0.962963  0.037037  1.0
All                               0.979798  0.020202  1.0


In [16]:
# 9 of 17
print pd.crosstab(df.IsPregnant, df.RecentlyPurchasedBodyPillow, margins=True)
print pd.crosstab(df["IsPregnant"],df["RecentlyPurchasedBodyPillow"],margins=True).apply(percConvert, axis=1)

RecentlyPurchasedBodyPillow  False  True  All
IsPregnant                                   
False                          294     2  296
True                           288     7  295
All                            582     9  591
RecentlyPurchasedBodyPillow     False      True  All
IsPregnant                                          
False                        0.993243  0.006757  1.0
True                         0.976271  0.023729  1.0
All                          0.984772  0.015228  1.0


In [17]:
# 10 of 17
print pd.crosstab(df.IsPregnant, df.RecentlyPurchasedGingerAle, margins=True)
print pd.crosstab(df["IsPregnant"],df["RecentlyPurchasedGingerAle"],margins=True).apply(percConvert, axis=1)

RecentlyPurchasedGingerAle  False  True  All
IsPregnant                                  
False                         290     6  296
True                          268    30  298
All                           558    36  594
RecentlyPurchasedGingerAle     False      True  All
IsPregnant                                         
False                       0.979730  0.020270  1.0
True                        0.899329  0.100671  1.0
All                         0.939394  0.060606  1.0


In [18]:
# 11 of 17
print pd.crosstab(df.IsPregnant, df.RecentlyPurchasedSeaBands, margins=True)
print pd.crosstab(df["IsPregnant"],df["RecentlyPurchasedSeaBands"],margins=True).apply(percConvert, axis=1)

RecentlyPurchasedSeaBands  False  True  All
IsPregnant                                 
False                        290     3  293
True                         279    17  296
All                          569    20  589
RecentlyPurchasedSeaBands     False      True  All
IsPregnant                                        
False                      0.989761  0.010239  1.0
True                       0.942568  0.057432  1.0
All                        0.966044  0.033956  1.0


In [19]:
# 12 of 17
print pd.crosstab(df.IsPregnant, df.PurchasedCigarettesRegularlyUntilRecentlyThenStopped, margins=True)
print pd.crosstab(df["IsPregnant"],df["PurchasedCigarettesRegularlyUntilRecentlyThenStopped"],margins=True).apply(percConvert, axis=1)

PurchasedCigarettesRegularlyUntilRecentlyThenStopped  False  True  All
IsPregnant                                                            
False                                                   287    12  299
True                                                    251    45  296
All                                                     538    57  595
PurchasedCigarettesRegularlyUntilRecentlyThenStopped     False      True  All
IsPregnant                                                                   
False                                                 0.959866  0.040134  1.0
True                                                  0.847973  0.152027  1.0
All                                                   0.904202  0.095798  1.0


In [20]:
# 13 of 17
print pd.crosstab(df.IsPregnant, df.RecentlyPurchasedCigarettes, margins=True)
print pd.crosstab(df["IsPregnant"],df["RecentlyPurchasedCigarettes"],margins=True).apply(percConvert, axis=1)

RecentlyPurchasedCigarettes  False  True  All
IsPregnant                                   
False                          247    50  297
True                           278    16  294
All                            525    66  591
RecentlyPurchasedCigarettes     False      True  All
IsPregnant                                          
False                        0.831650  0.168350  1.0
True                         0.945578  0.054422  1.0
All                          0.888325  0.111675  1.0


In [21]:
# 14 of 17
print pd.crosstab(df.IsPregnant, df.RecentlyPurchasedSmokingCessationProducts, margins=True)
print pd.crosstab(df["IsPregnant"],df["RecentlyPurchasedSmokingCessationProducts"],margins=True).apply(percConvert, axis=1)

RecentlyPurchasedSmokingCessationProducts  False  True  All
IsPregnant                                                 
False                                        289     3  292
True                                         261    35  296
All                                          550    38  588
RecentlyPurchasedSmokingCessationProducts     False      True  All
IsPregnant                                                        
False                                      0.989726  0.010274  1.0
True                                       0.881757  0.118243  1.0
All                                        0.935374  0.064626  1.0


In [22]:
# 15 of 17
print pd.crosstab(df.IsPregnant, df.PurchasedWineRegularlyUntilRecentlyThenStopped, margins=True)
print pd.crosstab(df["IsPregnant"],df["PurchasedWineRegularlyUntilRecentlyThenStopped"],margins=True).apply(percConvert, axis=1)

PurchasedWineRegularlyUntilRecentlyThenStopped  False  True  All
IsPregnant                                                      
False                                             282    11  293
True                                              228    67  295
All                                               510    78  588
PurchasedWineRegularlyUntilRecentlyThenStopped     False      True  All
IsPregnant                                                             
False                                           0.962457  0.037543  1.0
True                                            0.772881  0.227119  1.0
All                                             0.867347  0.132653  1.0


In [23]:
# 16 of 17
print pd.crosstab(df.IsPregnant, df.RecentlyPurchasedWine, margins=True)
print pd.crosstab(df["IsPregnant"],df["RecentlyPurchasedWine"],margins=True).apply(percConvert, axis=1)

RecentlyPurchasedWine  False  True  All
IsPregnant                             
False                    240    56  296
True                     288     8  296
All                      528    64  592
RecentlyPurchasedWine     False      True  All
IsPregnant                                    
False                  0.810811  0.189189  1.0
True                   0.972973  0.027027  1.0
All                    0.891892  0.108108  1.0


In [24]:
# 17 of 17
print pd.crosstab(df.IsPregnant, df.RecentlyPurchasedMaternityClothing, margins=True)
print pd.crosstab(df["IsPregnant"],df["RecentlyPurchasedMaternityClothing"],margins=True).apply(percConvert, axis=1)

RecentlyPurchasedMaternityClothing  False  True  All
IsPregnant                                          
False                                 287    12  299
True                                  232    66  298
All                                   519    78  597
RecentlyPurchasedMaternityClothing     False      True  All
IsPregnant                                                 
False                               0.959866  0.040134  1.0
True                                0.778523  0.221477  1.0
All                                 0.869347  0.130653  1.0


In [25]:
'''
NOTES: 
Differential > 15% (N=7): 4, 5, 6, 7, 15, 16, 17
Differential > 10% (N=12): 1, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17

1 AccountHolderImpliedGender ()                        
2 AccountHolderAddress                           
3 RecentlyPurchasedPregnancyTest              
4 RecentlyPurchasedBirthControl                            
5 RecentlyPurchasedFeminineHygieneProducts                
6 RecentlyPurchasedFolicAcidSupplements                  
7 RecentlyPurchasedPrenatalVitamins                   
8 RecentlyPurchasedPrenatalYogaDVD                        
9 RecentlyPurchasedBodyPillow                             
10 RecentlyPurchasedGingerAle                              
11 RecentlyPurchasedSeaBands                               
12 PurchasedCigarettesRegularlyUntilRecentlyThenStopped    
13 RecentlyPurchasedCigarettes                             
14 RecentlyPurchasedSmokingCessationProducts               
15 PurchasedWineRegularlyUntilRecentlyThenStopped        
16 RecentlyPurchasedWine                        
17 RecentlyPurchasedMaternityClothing                     

'''

'\nNOTES: Differential > approx. 15%\nN=7\n\n1 AccountHolderImpliedGender                         \n2 AccountHolderAddress                           \n3 RecentlyPurchasedPregnancyTest              \n4 RecentlyPurchasedBirthControl = X1                            \n5 RecentlyPurchasedFeminineHygieneProducts = X2                \n6 RecentlyPurchasedFolicAcidSupplements = X3                   \n7 RecentlyPurchasedPrenatalVitamins = X4                       \n8 RecentlyPurchasedPrenatalYogaDVD                        \n9 RecentlyPurchasedBodyPillow                             \n10 RecentlyPurchasedGingerAle                              \n11 RecentlyPurchasedSeaBands                               \n12 PurchasedCigarettesRegularlyUntilRecentlyThenStopped    \n13 RecentlyPurchasedCigarettes                             \n14 RecentlyPurchasedSmokingCessationProducts               \n15 PurchasedWineRegularlyUntilRecentlyThenStopped = X5         \n16 RecentlyPurchasedWine = X6                     

In [49]:
#df2 = df[['RecentlyPurchasedBirthControl', 'RecentlyPurchasedFeminineHygieneProducts', 'RecentlyPurchasedFolicAcidSupplements', 'RecentlyPurchasedPrenatalVitamins', 'PurchasedWineRegularlyUntilRecentlyThenStopped', 'RecentlyPurchasedWine', 'RecentlyPurchasedMaternityClothing', 'IsPregnant']]
#df2

df3 = df[['RecentlyPurchasedPregnancyTest', 'RecentlyPurchasedBirthControl', 'RecentlyPurchasedFeminineHygieneProducts', 'RecentlyPurchasedFolicAcidSupplements', 'RecentlyPurchasedPrenatalVitamins', 'PurchasedCigarettesRegularlyUntilRecentlyThenStopped', 'RecentlyPurchasedCigarettes', 'RecentlyPurchasedSmokingCessationProducts', 'PurchasedWineRegularlyUntilRecentlyThenStopped', 'RecentlyPurchasedWine', 'RecentlyPurchasedMaternityClothing', 'IsPregnant']]
df3

Unnamed: 0,RecentlyPurchasedPregnancyTest,RecentlyPurchasedBirthControl,RecentlyPurchasedFeminineHygieneProducts,RecentlyPurchasedFolicAcidSupplements,RecentlyPurchasedPrenatalVitamins,PurchasedCigarettesRegularlyUntilRecentlyThenStopped,RecentlyPurchasedCigarettes,RecentlyPurchasedSmokingCessationProducts,PurchasedWineRegularlyUntilRecentlyThenStopped,RecentlyPurchasedWine,RecentlyPurchasedMaternityClothing,IsPregnant
0,False,False,,False,True,False,False,False,True,False,False,True
1,False,False,False,False,False,False,False,False,False,False,False,True
2,False,True,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,True,False,False,False,False,False,False,True
4,False,False,False,False,False,False,False,False,False,False,False,False
5,False,True,False,False,False,False,False,False,False,False,False,False
6,False,True,True,False,True,False,False,False,True,False,False,False
7,False,False,True,False,True,False,False,False,False,,False,True
8,True,False,False,True,True,True,False,False,False,False,False,True
9,False,True,False,False,False,False,False,False,False,False,False,True


In [50]:
#df2 = df2.dropna()
#len(df2)

df3 = df3.dropna()
len(df3)

528

In [44]:
df_AccountHolderImpliedGender = pd.get_dummies(df['AccountHolderImpliedGender'])

# Join the dummy variables to the main dataframe
df3 = pd.concat([df, df_AccountHolderImpliedGender], axis=1)
df3

Unnamed: 0,AccountHolderImpliedGender,AccountHolderAddress,RecentlyPurchasedPregnancyTest,RecentlyPurchasedBirthControl,RecentlyPurchasedFeminineHygieneProducts,RecentlyPurchasedFolicAcidSupplements,RecentlyPurchasedPrenatalVitamins,RecentlyPurchasedPrenatalYogaDVD,RecentlyPurchasedBodyPillow,RecentlyPurchasedGingerAle,RecentlyPurchasedSeaBands,PurchasedCigarettesRegularlyUntilRecentlyThenStopped,RecentlyPurchasedCigarettes,RecentlyPurchasedSmokingCessationProducts,PurchasedWineRegularlyUntilRecentlyThenStopped,RecentlyPurchasedWine,RecentlyPurchasedMaternityClothing,IsPregnant,Female,Male
0,Female,Home,False,False,,False,True,False,False,False,True,False,False,False,True,False,False,True,1,0
1,Female,PO Box,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,1,0
2,Male,Apartment,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0,1
3,Male,Home,False,False,False,False,True,False,True,False,False,False,False,False,False,False,False,True,0,1
4,Female,PO Box,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,1,0
5,Female,Home,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,1,0
6,Male,Apartment,False,True,True,False,True,False,False,False,False,False,False,False,True,False,False,False,0,1
7,Male,Apartment,False,False,True,False,True,False,False,False,False,False,False,False,False,,False,True,0,1
8,Female,Apartment,True,False,False,True,True,False,False,True,False,True,False,False,False,False,False,True,1,0
9,Female,Apartment,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,1,0


In [51]:
#df2 = df2*1
#df2.head()

df3 = df3*1
df3.head()

Unnamed: 0,RecentlyPurchasedPregnancyTest,RecentlyPurchasedBirthControl,RecentlyPurchasedFeminineHygieneProducts,RecentlyPurchasedFolicAcidSupplements,RecentlyPurchasedPrenatalVitamins,PurchasedCigarettesRegularlyUntilRecentlyThenStopped,RecentlyPurchasedCigarettes,RecentlyPurchasedSmokingCessationProducts,PurchasedWineRegularlyUntilRecentlyThenStopped,RecentlyPurchasedWine,RecentlyPurchasedMaternityClothing,IsPregnant
1,0,0,0,0,0,0,0,0,0,0,0,1
2,0,1,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,1,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,0,0
5,0,1,0,0,0,0,0,0,0,0,0,0


In [53]:
# Create X and y variables

#X = df2.drop("IsPregnant", axis=1)
#y = df2.IsPregnant

X = df3.drop("IsPregnant", axis=1)
y = df3.IsPregnant

In [37]:
# Step 1
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.4,
                                                    random_state=42)

In [54]:
# Step 2
lr = LogisticRegression()
lr.fit(X_train, y_train)

# Step 3
preds = lr.predict(X_test)

# Step 4
testing_score = accuracy_score(y_test, preds)

print ("The model accurately classified {:.2f} percent of the testing data".format(testing_score*100))

The model accurately classified 83.02 percent of the testing data


In [55]:
# Null accuracy of y_test
y_test.value_counts(normalize=True)

0    0.518868
1    0.481132
Name: IsPregnant, dtype: float64

In [56]:
# Pass the predictions and y_test into a confusion matrix
confusion_matrix(y_test, preds)

array([[101,   9],
       [ 27,  75]])

In [57]:
# Use cross_val_score method to generate the average accuracy score for 5 CVs
mean_cv_score = cross_val_score(LogisticRegression(), X,y, cv=5, scoring="accuracy").mean()

print ("The cross validated accuracy score is {:.2f} percent").format(mean_cv_score*100)

The cross validated accuracy score is 83.71 percent


In [58]:
# Calculate precision and recall scores
ps = float(precision_score(y_test, preds))
rs = float(recall_score(y_test, preds))

print ("The precision score is {:.2f} and the recall score is {:.2f}".format(ps*100, rs*100))

The precision score is 89.29 and the recall score is 73.53


In [None]:
# NOTE: still playing around with plots (to be continued...)