# Classification Individual Assignment

<hr style="height:.9px;border:none;color:#333;background-color:#333;" /><br>
<strong>Paolo Musone </strong><br>

In [1]:
# importing libraries
import pandas as pd 
import matplotlib.pyplot as plt                      # data visualization
import seaborn as sns                                # enhanced data viz
import numpy as np
from sklearn.linear_model import LogisticRegression  # logistic regression
import statsmodels.formula.api as smf                # logistic regression
from sklearn.model_selection import train_test_split # train/test split
from sklearn.metrics import confusion_matrix         # confusion matrix
from sklearn.metrics import roc_auc_score            # auc score
from sklearn.neighbors import KNeighborsClassifier   # KNN for classification
from sklearn.neighbors import KNeighborsRegressor    # KNN for regression
from sklearn.preprocessing import StandardScaler     # standard scaler
from sklearn.tree import DecisionTreeClassifier      # classification trees
from sklearn.tree import plot_tree                   # plot trees
from sklearn.ensemble import RandomForestClassifier     # random forest
from sklearn.ensemble import GradientBoostingClassifier # gbm
from sklearn.model_selection import RandomizedSearchCV     # hyperparameter tuning
from sklearn.metrics import make_scorer              # customizable scorer            

# reading the file into Python and naming the data set
df = pd.read_excel("C:/Users/paomu/Desktop/GOT_character_predictions.xlsx")
    
# checking the file
df.head(n = 5) # check just in case




Unnamed: 0,S.No,name,title,male,culture,dateOfBirth,mother,father,heir,house,...,book3_A_Storm_Of_Swords,book4_A_Feast_For_Crows,book5_A_Dance_with_Dragons,isAliveFather,isAliveMother,isAliveSpouse,isAliveHeir,age,popularity,isAlive
0,1,Erreg,King,0,Andal,,,,,,...,1,0,0,,,,,,0.016722,1
1,2,Hugor of the Hill,King of the Andals,1,Andals,,,,,,...,0,0,1,,,,,,0.050167,1
2,3,Azor Ahai,,0,Asshai,,,,,,...,1,1,1,,,,1.0,,0.117057,1
3,4,Melisandre,,0,Asshai,,,,,R'hllor,...,1,1,1,,,,,,0.745819,1
4,5,Kraznys mo Nakloz,,1,Astapor,,,,,Good Masters,...,1,0,1,,,,,64.0,0.076923,0


# Analyzing and Cleaning Data

<hr style="height:.9px;border:none;color:#333;background-color:#333;" /><br>
<strong>Descriptive statistics,Correlation,Replacing missing values </strong><br>

In [2]:
#showing the size of the data set
print(f"""
Observations: {df.shape[0]}
Features:     {df.shape[1]}
""")

#General desccriptive statistics of numerical variabels in the data set
df.describe(include = 'number').round(2)


Observations: 1836
Features:     23



Unnamed: 0,S.No,male,dateOfBirth,book1_A_Game_Of_Thrones,book2_A_Clash_Of_Kings,book3_A_Storm_Of_Swords,book4_A_Feast_For_Crows,book5_A_Dance_with_Dragons,isAliveFather,isAliveMother,isAliveSpouse,isAliveHeir,age,popularity,isAlive
count,1836.0,1836.0,391.0,1836.0,1836.0,1836.0,1836.0,1836.0,21.0,26.0,23.0,256.0,410.0,1836.0,1836.0
mean,918.5,0.61,1717.91,0.19,0.58,0.47,0.37,0.38,0.71,0.19,0.65,0.77,-1368.04,0.08,0.73
std,530.15,0.49,20587.05,0.39,0.49,0.5,0.48,0.48,0.46,0.4,0.49,0.42,20104.31,0.15,0.44
min,1.0,0.0,-28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-298001.0,0.0,0.0
25%,459.75,0.0,237.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,18.0,0.01,0.0
50%,918.5,1.0,267.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,27.0,0.03,1.0
75%,1377.25,1.0,285.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,49.75,0.08,1.0
max,1836.0,1.0,298299.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,100.0,1.0,1.0


In [3]:
#Looking for missing values 
df.isnull().sum()

S.No                             0
name                             0
title                          956
male                             0
culture                       1209
dateOfBirth                   1445
mother                        1815
father                        1810
heir                          1813
house                          414
spouse                        1580
book1_A_Game_Of_Thrones          0
book2_A_Clash_Of_Kings           0
book3_A_Storm_Of_Swords          0
book4_A_Feast_For_Crows          0
book5_A_Dance_with_Dragons       0
isAliveFather                 1815
isAliveMother                 1810
isAliveSpouse                 1813
isAliveHeir                   1580
age                           1426
popularity                       0
isAlive                          0
dtype: int64

In [4]:
#checking the variables skewness
print(df.skew(axis=0))


S.No                           0.000000
male                          -0.456215
dateOfBirth                   13.953491
book1_A_Game_Of_Thrones        1.576485
book2_A_Clash_Of_Kings        -0.324714
book3_A_Storm_Of_Swords        0.137692
book4_A_Feast_For_Crows        0.559188
book5_A_Dance_with_Dragons     0.513219
isAliveFather                 -1.023275
isAliveMother                  1.658711
isAliveSpouse                 -0.684484
isAliveHeir                   -1.261513
age                          -14.291223
popularity                     3.859161
isAlive                       -1.039222
dtype: float64


  print(df.skew(axis=0))


In [5]:
#Replacing categorical missing values

# replace na in title with 'no_title'
fill = 'unknown'
df['title'] = df['title'].fillna(value = fill)

#culture
fill = 'unknown'
df['culture'] = df['culture'].fillna(value = fill)


#mother
fill = 'unknown'
df['mother'] = df['mother'].fillna(value = fill)


#father
fill = 'unknown'
df['father'] = df['father'].fillna(value = fill)


#heir
fill = 'unknown'
df['heir'] = df['heir'].fillna(value = fill)

#house
fill = 'unknown'
df['house'] = df['house'].fillna(value = fill)

#spouse
fill = 'unknown'
df['spouse'] = df['spouse'].fillna(value = fill)



#isAliveFather
fill = 0
df['isAliveFather'] = df['isAliveFather'].fillna(value = fill)

#isAliveFather
fill = 0
df['isAliveMother'] = df['isAliveMother'].fillna(value = fill)

#isAliveFather
fill = 0
df['isAliveSpouse'] = df['isAliveSpouse'].fillna(value = fill)

#isAliveFather
fill = 0
df['isAliveHeir'] = df['isAliveHeir'].fillna(value = fill)





#fill missing values with median for age 
fill = df['age'].median()
df['age'] = df['age'].fillna(value = fill)

#fill missing values with median for dateOfBirth
fill = df['dateOfBirth'].median()
df['dateOfBirth'] = df['dateOfBirth'].fillna(value = fill)

In [6]:
#Looking for missing values 
df.isnull().sum()

S.No                          0
name                          0
title                         0
male                          0
culture                       0
dateOfBirth                   0
mother                        0
father                        0
heir                          0
house                         0
spouse                        0
book1_A_Game_Of_Thrones       0
book2_A_Clash_Of_Kings        0
book3_A_Storm_Of_Swords       0
book4_A_Feast_For_Crows       0
book5_A_Dance_with_Dragons    0
isAliveFather                 0
isAliveMother                 0
isAliveSpouse                 0
isAliveHeir                   0
age                           0
popularity                    0
isAlive                       0
dtype: int64

In [7]:
# creating a (Pearson) correlation matrix to get some insights on what are the most valuable variables
df_corr = df.corr().round(2) 


# printing (Pearson) correlations with bwght
print(df_corr.loc['isAlive'].sort_values(ascending = False))

isAlive                       1.00
book2_A_Clash_Of_Kings        0.26
age                           0.05
book5_A_Dance_with_Dragons    0.01
book3_A_Storm_Of_Swords      -0.01
isAliveHeir                  -0.02
isAliveMother                -0.04
dateOfBirth                  -0.05
book4_A_Feast_For_Crows      -0.08
isAliveSpouse                -0.08
S.No                         -0.12
isAliveFather                -0.12
male                         -0.16
book1_A_Game_Of_Thrones      -0.17
popularity                   -0.22
Name: isAlive, dtype: float64


# Feature Engineering

<hr style="height:.9px;border:none;color:#333;background-color:#333;" /><br>
<strong>Creating new variables based on the original data set </strong><br>

In [8]:
# grouping categorical variables for culture in a new column
df['m_culture'] = df['culture'].map({'unknown': 'No_Culture',
                                     'Northmen': 'Major_Culture',
                                     'Ironborn': 'Major_Culture',
                                     'Free Folk': 'Major_Culture',
                                     'Valyrian': 'Major_Culture',
                                     'Braavosi': 'Major_Culture',
                                     'Ghiscari': 'Major_Culture',
                                     'Dornish': 'Major_Culture',
                                     'Dothraki': 'Major_Culture',
                                     'Rivermen': 'Minor_Culture',
                                     'Valemen': 'Minor_Culture',
                                     'Reach': 'Minor_Culture',
                                     'Vale mountain clans': 'Minor_Culture',
                                     'Dornishmen': 'Minor_Culture',
                                     'Westeros': 'Minor_Culture',
                                     'Free Folk': 'Minor_Culture',
                                     'northmen': 'Lower_Culture',
                                     'Westerman': 'Lower_Culture',
                                     'Stormlands': 'Lower_Culture',
                                     'Tyroshi': 'Lower_Culture',
                                     'Qartheen': 'Lower_Culture',
                                     'Ironmen': 'Lower_Culture',
                                     'Summer Isles': 'Lower_Culture',
                                     'Northern mountain clans': 'Lower_Culture',
                                     'Lysene': 'Lower_Culture',
                                     'Westermen': 'Lower_Culture',
                                     'Crannogmen': 'Lower_Culture',
                                     'Astapori': 'Lower_Culture',
                                     'Pentoshi': 'Lower_Culture',                      
                                    'Meereenese': 'Lower_Culture',                 
                                    'First Men': 'Lower_Culture',                     
                                    'Lyseni': 'Lower_Culture',                        
                                    'Myrish': 'Lower_Culture',                        
                                    'Wildling': 'Lower_Culture',                      
                                    'Sistermen': 'Lower_Culture',                    
                                    'Dorne': 'Lower_Culture',                         
                                    'Lhazareen': 'Lower_Culture',                     
                                    'Westerlands': 'Lower_Culture',                   
                                    'Riverlands': 'Lower_Culture',                    
                                    'Wildlings': 'Lower_Culture',                     
                                    'Asshai': 'Lower_Culture',                        
                                    'Qohor': 'Lower_Culture',                         
                                    'westermen': 'Lower_Culture',                     
                                    'Stormlander': 'Lower_Culture',                   
                                    'Astapor': 'Lower_Culture',                       
                                    'Norvos': 'Lower_Culture',                        
                                    'Meereen': 'Lower_Culture',                       
                                    "Asshai'i": 'Lower_Culture',                      
                                    'Rhoynar': 'Lower_Culture',                       
                                    'Naathi': 'Lower_Culture',                        
                                    'Ibbenese': 'Lower_Culture',                      
                                    'Reachmen': 'Lower_Culture',                      
                                    'ironborn': 'Lower_Culture',                      
                                    'Summer Islands': 'Lower_Culture',                
                                    'Summer Islander': 'Lower_Culture',               
                                    'free folk': 'Lower_Culture',                     
                                    'Vale': 'Lower_Culture',                          
                                    'Andals': 'Lower_Culture',                        
                                    'Braavos': 'Lower_Culture',                       
                                    'Ghiscaricari': 'Lower_Culture',                  
                                    'Andal': 'Lower_Culture',                         
                                    'Norvoshi': 'Lower_Culture',                      
                                    'Qarth': 'Lower_Culture',                         
                                    'Lhazarene': 'Lower_Culture',                     
                                    'The Reach': 'Lower_Culture'})       


# grouping categorical variables for house in a new column
df['m_house'] = df['house'].map({'unknown': 'No_House',                                  
                                    "Night's Watch": 'Major_House',                              
                                    'House Frey': 'Major_House',                                  
                                    'House Stark': 'Major_House',                                 
                                    'House Targaryen': 'Major_House',                             
                                    'House Lannister': 'Major_House',                            
                                    'House Greyjoy': 'Major_House',                               
                                    'House Tyrell': 'Major_House',                                
                                    'House Martell': 'Major_House',                               
                                    'House Osgrey': 'Major_House',                                
                                    'Faith of the Seven': 'Minor_House',                          
                                    'House Arryn': 'Minor_House',                                 
                                    'House Hightower': 'Minor_House',                             
                                    'House Bracken': 'Minor_House',                               
                                    'House Botley': 'Minor_House',                                
                                    'House Bolton': 'Minor_House',                                
                                    'House Baratheon': 'Minor_House',                             
                                    'House Florent': 'Minor_House',                               
                                    'House Tully': 'Minor_House',                                 
                                    'Brave Companions': 'Minor_House',                            
                                    'House Whent': 'Minor_House',                                 
                                    'Brotherhood without banners': 'Minor_House',                 
                                    'House Velaryon': 'Minor_House',                              
                                    'House Crakehall': 'Minor_House',
                                    'House Westerling': 'Lower_House',                             
                                    'House Redwyne': 'Lower_House',                                
                                    'House Royce': 'Lower_House',                                  
                                    'Stone Crows': 'Lower_House',                                  
                                    'House Baratheon of Dragonstone': 'Lower_House',               
                                    'House Clegane': 'Lower_House',                                
                                    'House Waynwood': 'Lower_House',                               
                                    'House Seaworth': 'Lower_House',                               
                                    'House Brax': 'Lower_House',                                   
                                    'House Swyft': 'Lower_House',                                  
                                    'House Mormont': 'Lower_House',                                
                                    'House Wylde': 'Lower_House',                                  
                                    'House Drumm': 'Lower_House',                                  
                                    'House Karstark': 'Lower_House',                               
                                    'House Swann': 'Lower_House',                                  
                                    'House Paege': 'Lower_House',                                  
                                    'House of Loraq': 'Lower_House',                               
                                    'House Royce of the Gates of the Moon': 'Lower_House',         
                                    'House Baelish': 'Lower_House',                                
                                    'House Manderly': 'Lower_House',                               
                                    'House Plumm': 'Lower_House',                                  
                                    "Alchemists' Guild": 'Lower_House',                            
                                    'House Goodbrother': 'Lower_House',                            
                                    'House Webber': 'Lower_House',                                 
                                    'House Stokeworth': 'Lower_House',                             
                                    'House Umber': 'Lower_House',                                  
                                    'House Darry': 'Lower_House',                                  
                                    'House Beesbury': 'Lower_House',                               
                                    'Brotherhood Without Banners': 'Lower_House',                  
                                    'House Tallhart': 'Lower_House',                               
                                    'House Mallister': 'Lower_House',                              
                                    'House Redfort': 'Lower_House',                                
                                    'House Estermont': 'Lower_House',                              
                                    'House Dayne': 'Lower_House',                                  
                                    'House Haigh': 'Lower_House',                                  
                                    'House Ryswell': 'Lower_House',                                
                                    'House Harlaw': 'Lower_House',                                 
                                    "Chataya's brothel": 'Lower_House',                            
                                    'House Norcross': 'Lower_House',                               
                                    'House Glover': 'Lower_House',                                 
                                    'House Oakheart': 'Lower_House',                               
                                    'House Caron': 'Lower_House',                                  
                                    'House Vance of Atranta': 'Lower_House',                       
                                    'House Crane': 'Lower_House',                                  
                                    'Second Sons': 'Lower_House',                                  
                                    'House Caswell': 'Lower_House',                                
                                    'Kingsguard': 'Lower_House',                                   
                                    'House Crabb': 'Lower_House',                                  
                                    'House Blackwood': 'Lower_House',                              
                                    'House Blackfyre': 'Lower_House',                              
                                    'House Connington': 'Lower_House',                             
                                    'House Hollard': 'Lower_House',                                
                                    'Drowned men': 'Lower_House',                                  
                                    'House Ashford': 'Lower_House',                                
                                    'House Hornwood': 'Lower_House',                               
                                    'Blacks': 'Lower_House',                                       
                                    'House Darklyn': 'Lower_House',                                
                                    'House Manwoody': 'Lower_House',                               
                                    'House Tarth': 'Lower_House',                                  
                                    'House Hunter': 'Lower_House',                                 
                                    'House Corbray': 'Lower_House',                                
                                    'House Ironmaker': 'Lower_House',                              
                                    'House Vance': 'Lower_House',                                  
                                    "House Vance of Wayfarer's Rest": 'Lower_House',               
                                    'House Cassel': 'Lower_House',                                 
                                    'House Farring': 'Lower_House',                                
                                    'Happy Port': 'Lower_House',                                   
                                    'House Lefford': 'Lower_House',                                
                                    'House Strong': 'Lower_House',                                 
                                    'Kingswood Brotherhood': 'Lower_House',                        
                                    'House Lothston': 'Lower_House',                               
                                    'House Farwynd of the Lonely Light': 'Lower_House',            
                                    'House Goodbrook': 'Lower_House',                              
                                    "House Baratheon of King's Landing": 'Lower_House',            
                                    'House Fossoway of Cider Hall': 'Lower_House',                 
                                    'House Ambrose': 'Lower_House',                                
                                    'House Reed': 'Lower_House',                                   
                                    'House Yronwood': 'Lower_House',                               
                                    'House Qorgyle': 'Lower_House',                                
                                    'House Kettleblack': 'Lower_House',                            
                                    'House Piper': 'Lower_House',                                  
                                    'Moon Brothers': 'Lower_House',                                
                                    'House Sharp': 'Lower_House',                                  
                                    'House of Galare': 'Lower_House',                              
                                    "R'hllor": 'Lower_House',                                      
                                    'House Morrigen': 'Lower_House',                               
                                    'House Mooton': 'Lower_House',                                 
                                    'House Blackmont': 'Lower_House',                              
                                    'House Locke': 'Lower_House',                                  
                                    'House Penrose': 'Lower_House',                                
                                    'House Grafton': 'Lower_House',                                
                                    'House Allyrion': 'Lower_House',                               
                                    'House Hardyng': 'Lower_House',                                
                                    'House Marbrand': 'Lower_House',                               
                                    'House Vypren': 'Lower_House',                                 
                                    'House Frey of Riverrun': 'Lower_House',                       
                                    'House Fossoway of New Barrel': 'Lower_House',                 
                                    'House Tarly': 'Lower_House',                                  
                                    'House Santagar': 'Lower_House',                               
                                    'House Stackspear': 'Lower_House',                             
                                    'House Hewett': 'Lower_House',                                 
                                    'House Costayne': 'Lower_House',                               
                                    'Stormcrows': 'Lower_House',                                   
                                    'House Bulwer': 'Lower_House',                                 
                                    'House Smallwood': 'Lower_House',                              
                                    'House Payne': 'Lower_House',                                  
                                    'House Staunton': 'Lower_House',                              
                                    'House Uller': 'Lower_House',                                  
                                    'House Butterwell': 'Lower_House',                             
                                    'House Stout': 'Lower_House',                                  
                                    'House Slynt': 'Lower_House',                                  
                                    'House Spicer': 'Lower_House',                                 
                                    'Burned Men': 'Lower_House',                                   
                                    'House Shepherd': 'Lower_House',                               
                                    'House Poole': 'Lower_House',                                  
                                    'House Wynch': 'Lower_House',                                  
                                    'House Jordayne': 'Lower_House',                               
                                    'House Tollett': 'Lower_House',                                
                                    'House Belmore': 'Lower_House',                                
                                    'House Brune of Brownhollow': 'Lower_House',                   
                                    'House Wythers': 'Lower_House',                                
                                    'Golden Company': 'Lower_House',                               
                                    'House Kenning of Harlaw': 'Lower_House',                      
                                    'House Lydden': 'Lower_House',                                 
                                    'House Rosby': 'Lower_House',                                  
                                    'House Inchfield': 'Lower_House',                              
                                    'House Wull': 'Lower_House',                                   
                                    'House Flint': 'Lower_House',                                  
                                    'House Sunglass': 'Lower_House',                               
                                    'Good Masters': 'Lower_House',                                 
                                    'House Peake': 'Lower_House',                                  
                                    'House Wells': 'Lower_House',                                  
                                    'House Humble': 'Lower_House',                                 
                                    'House Toyne': 'Lower_House',                                  
                                    'House Greenfield': 'Lower_House',                             
                                    'House Kenning of Kayce': 'Lower_House',                       
                                    'House Cuy': 'Lower_House',                                    
                                    'House Prester': 'Lower_House',                                
                                    'House Fell': 'Lower_House',                                   
                                    'House of Pahl': 'Lower_House',                                
                                    "House Flint of Widow's Watch": 'Lower_House',                 
                                    'House Thorne': 'Lower_House',                                 
                                    'House Hetherspoon': 'Lower_House',                            
                                    'House Selmy': 'Lower_House',                                  
                                    'House Vaith': 'Lower_House',                                  
                                    'House Cerwyn': 'Lower_House',                                 
                                    'House Norrey': 'Lower_House',                                 
                                    'House Mullendore': 'Lower_House',                             
                                    'Iron Bank of Braavos': 'Lower_House',                         
                                    'House Dalt': 'Lower_House',                                   
                                    'House Penny': 'Lower_House',                                  
                                    'House Rowan': 'Lower_House',                                  
                                    'House Lonmouth': 'Lower_House',                               
                                    'House Hunt': 'Lower_House',                                   
                                    'House Blackberry': 'Lower_House',                             
                                    'House Clifton': 'Lower_House',                                
                                    'House Uffering': 'Lower_House',                               
                                    'Kingdom of the Three Daughters': 'Lower_House',               
                                    'House Charlton': 'Lower_House',                               
                                    'House Willum': 'Lower_House',                                 
                                    'House Codd': 'Lower_House',                                   
                                    'House Blacktyde': 'Lower_House',                              
                                    'House Merryweather': 'Lower_House',                           
                                    'Unsullied': 'Lower_House',                                   
                                    'House Nayland': 'Lower_House',                                
                                    'Black Ears': 'Lower_House',                                   
                                    'House Lorch': 'Lower_House',                                  
                                    'House Meadows': 'Lower_House',                                
                                    'House Heddle': 'Lower_House',                                 
                                    'House Lynderly': 'Lower_House',                               
                                    'House Serry': 'Lower_House',                                  
                                    'House Ryger': 'Lower_House',                                  
                                    'Faceless Men': 'Lower_House',                                 
                                    'House Massey': 'Lower_House',                                 
                                    'House Moreland': 'Lower_House',                               
                                    'House Horpe': 'Lower_House',                                  
                                    'House Toland': 'Lower_House',                                 
                                    'House Banefort': 'Lower_House',                               
                                    'House Hogg': 'Lower_House',                                   
                                    'brotherhood without banners': 'Lower_House',                  
                                    'House Longthorpe': 'Lower_House',                             
                                    'House Coldwater': 'Lower_House',                              
                                    'House Leek': 'Lower_House',                                   
                                    'House Farman': 'Lower_House',                                 
                                    'House Bywater': 'Lower_House',                                
                                    'Brotherhood without Banners': 'Lower_House',                  
                                    'House Longwaters': 'Lower_House',                             
                                    'Mance Rayder': 'Lower_House',                                 
                                    'House Ball': 'Lower_House',                                   
                                    'House Rykker': 'Lower_House',                                 
                                    'House Potter': 'Lower_House',                                 
                                    'House of Kandaq': 'Lower_House',                              
                                    'House of Reznak': 'Lower_House',                              
                                    'House Peckledon': 'Lower_House',                              
                                    'House Harlaw of Harlaw Hall': 'Lower_House',                  
                                    'House Foote': 'Lower_House',                                  
                                    'House Estren': 'Lower_House',                                 
                                    'House Rhysling': 'Lower_House',                               
                                    'House Chelsted': 'Lower_House',                               
                                    'House Ruttiger': 'Lower_House',                               
                                    'House Cox': 'Lower_House',                                    
                                    'House Dustin': 'Lower_House',                                 
                                    'House Nymeros Martell': 'Lower_House',                        
                                    'House Templeton': 'Lower_House',                              
                                    'House Stonehouse': 'Lower_House',                             
                                    'House Grell': 'Lower_House',                                  
                                    'Undying Ones': 'Lower_House',                                 
                                    'House Yarwyck': 'Lower_House',                                
                                    'Maesters': 'Lower_House',                                     
                                    'House Blanetree': 'Lower_House',                              
                                    'House Blount': 'Lower_House',                                 
                                    'House Trant': 'Lower_House',                                  
                                    'House Moore': 'Lower_House',                                  
                                    'Company of the Cat': 'Lower_House',                           
                                    'House Suggs': 'Lower_House',                                  
                                    'House Lannister of Casterly Rock': 'Lower_House',             
                                    'Khal': 'Lower_House',                                         
                                    'House Condon': 'Lower_House',                                 
                                    'Summer Islands': 'Lower_House',                               
                                    'House Harlaw of Grey Garden': 'Lower_House',                  
                                    'House Wayn': 'Lower_House',                                   
                                    'House Erenford': 'Lower_House',                               
                                    'House Egen': 'Lower_House',                                   
                                    'Thirteen': 'Lower_House',                                     
                                    'House Strickland': 'Lower_House',                             
                                    'House Dayne of High Hermitage': 'Lower_House',                
                                    'House Errol': 'Lower_House',                                  
                                    'House Drinkwater': 'Lower_House',                             
                                    'House Liddle': 'Lower_House',                                 
                                    'Graces': 'Lower_House',                                       
                                    'House Gargalen': 'Lower_House',                               
                                    'House Mudd': 'Lower_House',                                   
                                    'House Farwynd': 'Lower_House',                                
                                    'House Cole': 'Lower_House',                                   
                                    'House Sunderland': 'Lower_House',                             
                                    'House Goodbrother of Shatterstone': 'Lower_House',            
                                    'House Hasty': 'Lower_House',                                  
                                    'Thenn': 'Lower_House',                                        
                                    'House Bolton of the Dreadfort': 'Lower_House',                
                                    'House Tyrell of Brightwater Keep': 'Lower_House',             
                                    'House Pemford': 'Lower_House',                                
                                    'House Chester': 'Lower_House',                                
                                    'House Greenhill': 'Lower_House',                              
                                    'House Risley': 'Lower_House',                                 
                                    'Pureborn': 'Lower_House',                                     
                                    'House Gaunt': 'Lower_House',                                  
                                    'House Grandison': 'Lower_House',                              
                                    'House Sawyer': 'Lower_House',                                 
                                    'House Bolling': 'Lower_House',                                
                                    'House Cupps': 'Lower_House',                                  
                                    'House Tawney': 'Lower_House',                                 
                                    'Windblown': 'Lower_House',                                    
                                    'House Roote': 'Lower_House',                                  
                                    'House Hardy': 'Lower_House',                                 
                                    'Queensguard': 'Lower_House',                                  
                                    'House Dondarrion': 'Lower_House',                             
                                    'House Yew': 'Lower_House',                                    
                                    'House Mertyns': 'Lower_House',                                
                                    'House Boggs': 'Lower_House',                                  
                                    'House Woods': 'Lower_House',                                  
                                    'House Deddings': 'Lower_House',                               
                                    'House Conklyn': 'Lower_House',                                
                                    "City Watch of King's Landing": 'Lower_House',                 
                                    'House Buckler': 'Lower_House',                                
                                    'House Wode': 'Lower_House',                                   
                                    'wildling': 'Lower_House',                                     
                                    'House Farrow': 'Lower_House',                                 
                                    'House Reyne': 'Lower_House',                                  
                                    'House Weaver': 'Lower_House',                                 
                                    'House Harclay': 'Lower_House',                                
                                    'House Lannister of Lannisport': 'Lower_House',                
                                    'Antler Men': 'Lower_House',                                   
                                    'House Stonetree': 'Lower_House',                              
                                    'House Leygood': 'Lower_House',                                
                                    'House Sparr': 'Lower_House',                                  
                                    'House Varner': 'Lower_House',                                 
                                    'Peach': 'Lower_House',                                        
                                    'Sea watch': 'Lower_House',                                    
                                    'Band of Nine': 'Lower_House',                                
                                    'House Bushy': 'Lower_House',                                  
                                    'House Cafferen': 'Lower_House',                               
                                    'House Staedmon': 'Lower_House',                               
                                    'House Cockshaw': 'Lower_House',                               
                                    'House of Merreq': 'Lower_House',                              
                                    'House Mollen': 'Lower_House',                                 
                                    'House Rambton': 'Lower_House',                                
                                    'House Harlaw of the Tower of Glimmering': 'Lower_House',      
                                    'House Wagstaff': 'Lower_House',                               
                                    'House Vyrwel': 'Lower_House',                                 
                                    'House Bettley': 'Lower_House',                                
                                    'House Myre': 'Lower_House',                                   
                                    'House Turnberry': 'Lower_House',                              
                                    'House Blackbar': 'Lower_House',                               
                                    'House Woolfield': 'Lower_House',                              
                                    'House Fossoway': 'Lower_House',                               
                                    'House Mallery': 'Lower_House',                                
                                    'House Chyttering': 'Lower_House',                             
                                    'House Lychester': 'Lower_House',                              
                                    'House Vikary': 'Lower_House',                                 
                                    'House Volmark': 'Lower_House',                                
                                    'House Merlyn': 'Lower_House',                                 
                                    'House Sarsfield': 'Lower_House',                              
                                    'House Hoare': 'Lower_House',                                  
                                    'The Citadel': 'Lower_House',                                  
                                    'House Graceford': 'Lower_House',                              
                                    'House Grimm': 'Lower_House',                                  
                                    'House Jast': 'Lower_House',                                   
                                    'House Celtigar': 'Lower_House',                               
                                    'House of Ghazeen': 'Lower_House',                             
                                    'House Byrch': 'Lower_House',                                  
                                    'House Hawick': 'Lower_House',                                 
                                    'House Broom': 'Lower_House',                                  
                                    'House Harlaw of Harridan Hill': 'Lower_House',                
                                    'House Shett of Gull Tower': 'Lower_House',                    
                                    'House Bar Emmon': 'Lower_House',                              
                                    'House Norridge': 'Lower_House',                               
                                    'House Hayford': 'Lower_House',                                
                                    'House Brune of the Dyre Den': 'Lower_House',                  
                                    'House Fowler': 'Lower_House',                                 
                                    'House Gower': 'Lower_House',                                  
                                    'House Borrell': 'Lower_House',                                
                                    'Citadel': 'Lower_House',                                      
                                    'Wise Masters': 'Lower_House',                                 
                                    'Three-eyed crow': 'Lower_House'})  

df['m_title'] = df['title'].map({ 'unknown' : 'No_Title',                                                       
                                    'Ser' : 'Common_Title',                                                            
                                    'Maester' : 'Common_Title',                                                        
                                    'Archmaester' : 'Common_Title',                                                      
                                    'Lord' : 'Common_Title',                                                            
                                    'Septon' : 'Common_Title',                                                           
                                    'Winterfell' : 'Common_Title',                                                       
                                    'Princess' : 'Common_Title',                                                         
                                    'Lady' : 'Common_Title',                                                             
                                    'Septa' : 'Common_Title',                                                            
                                    'King in the North' : 'NotCommon_Title',                                                 
                                    'Prince' : 'NotCommon_Title',                                                            
                                    "Lord Commander of the Night's Watch" : 'NotCommon_Title',                              
                                    'Grand Maester' : 'NotCommon_Title',                                                     
                                    'Cupbearer' : 'NotCommon_Title',                                                         
                                    'Khal' : 'NotCommon_Title',                                                              
                                    'Bloodrider' : 'NotCommon_Title',                                                        
                                    'Casterly Rock' : 'NotCommon_Title',                                                     
                                    'King-Beyond-the-Wall' : 'NotCommon_Title',                                              
                                    'Knight' : 'NotCommon_Title',                                                             
                                    'Eyrie' : 'NotCommon_Title',                                                              
                                    'Wisdom' : 'NotCommon_Title',                                                               
                                    'Prince of Dragonstone' : 'NotCommon_Title',                                                
                                    'Harrenhal' : 'NotCommon_Title',                                                            
                                    'Hand of the King' : 'NotCommon_Title',                                                     
                                    'Lord of the Tides' : 'NotCommon_Title',                                                    
                                    'Last Hearth' : 'NotCommon_Title',                                                          
                                    'Lord Paramount of the Mander' : 'NotCommon_Title',                                         
                                    'Bitterbridge' : 'NotCommon_Title',                                                         
                                    'PrincessQueen' : 'NotCommon_Title',                                                     
                                    'Starpike' : 'NotCommon_Title',                                                          
                                    'Karhold' : 'NotCommon_Title',                                                           
                                    'Coldmoat' : 'NotCommon_Title',                                                          
                                    'Lord of Harrenhal' : 'NotCommon_Title',                                                 
                                    'Queen' : 'NotCommon_Title',                                                             
                                    'Riverrun' : 'NotCommon_Title',                                                          
                                    'Duskendale' : 'NotCommon_Title',                                                          
                                    'Brother' : 'NotCommon_Title',                                                             
                                    'First Ranger' : 'NotCommon_Title',                                                        
                                    'Highgarden' : 'NotCommon_Title',                                                          
                                    'Castellan' : 'NotCommon_Title',                                                           
                                    'Yronwood' : 'NotCommon_Title',                                                            
                                    'Blackcrown' : 'NotCommon_Title',                                                          
                                    'Golden Tooth' : 'NotCommon_Title',                                                       
                                    'Nightsong' : 'NotCommon_Title',                                                          
                                    'Runestone' : 'NotCommon_Title',                                                          
                                    'Stonehelm' : 'NotCommon_Title',                                                          
                                    'Brightwater' : 'NotCommon_Title',                                                        
                                    'Master of Coin' : 'NotCommon_Title',                                                     
                                    'Lord of Southshield' : 'NotCommon_Title',                                                
                                    'Andals' : 'NotCommon_Title',                                                             
                                    'Goldengrove' : 'NotCommon_Title',                                                        
                                    'Crag' : 'NotCommon_Title',                                                               
                                    'Hightower' : 'NotCommon_Title',                                                          
                                    'KhalKo (formerly)' : 'NotCommon_Title',                                                  
                                    'Lord Reaper of Pyke' : 'NotCommon_Title',                                                
                                    'Cerwyn' : 'NotCommon_Title',                                                              
                                    'Lord Paramount of the Trident' : 'NotCommon_Title',                                        
                                    'Sunspear' : 'NotCommon_Title',                                                             
                                    "Storm's End" : 'NotCommon_Title',                                                          
                                    'Barrowton' : 'NotCommon_Title',                                                            
                                    'Acorn Hall' : 'NotCommon_Title',                                                           
                                    'King' : 'NotCommon_Title',                                                                 
                                    'Seneschal' : 'NotCommon_Title',                                                           
                                    'Stokeworth' : 'NotCommon_Title',                                                        
                                    'Captain of the guard' : 'NotCommon_Title',                                              
                                    'Master of coin' : 'NotCommon_Title',                                                    
                                    'LadyQueen' : 'NotCommon_Title',                                                         
                                    'Hornwood' : 'NotCommon_Title',                                                          
                                    'Steward' : 'NotCommon_Title',                                                           
                                    'Lordsport' : 'NotCommon_Title',                                                         
                                    'Khalakka' : 'NotCommon_Title',                                                           
                                    'Good Master' : 'NotCommon_Title',                                                       
                                    'Goodwife' : 'NotCommon_Title',                                                           
                                    'Ashford' : 'NotCommon_Title',                                                            
                                    'Prince of Dorne' : 'NotCommon_Title',                                                    
                                    'Seagard' : 'NotCommon_Title',                                                            
                                    'Rain House' : 'Unique',                                                         
                                    'Keeper of the Gates of the Moon' : 'Unique',                                      
                                    'Arbor' : 'Unique',                                                                
                                    'Lord of Oakenshield' : 'Unique',                                                  
                                    'Castellan of Harrenhal' : 'Unique',                                               
                                    'First Builder' : 'Unique',                                                        
                                    'Warlock' : 'Unique',                                                              
                                    'Sandship' : 'Unique',                                                                
                                    'Red Jester' : 'Unique',                                                           
                                    'master of ships' : 'Unique',                                                      
                                    'Red Flower Vale' : 'Unique',                                                      
                                    'Lord of Hellholt' : 'Unique',                                                     
                                    'Lord of the Tor' : 'Unique',                                                      
                                    'Salt Shore' : 'Unique',                                                           
                                    'Sealskin Point' : 'Unique',                                                     
                                    'Three Sisters' : 'Unique',                                                      
                                    'Grey Glen' : 'Unique',                                                          
                                    'Dragonstone' : 'Unique',                                                        
                                    'Lord of Iron Holt' : 'Unique',                                                  
                                    'Lord of Honeyholt' : 'Unique',                                                  
                                    'Maidenpool' : 'Unique',                                                         
                                    'Protector of the Realm' : 'Unique',                                               
                                    'Lady of Darry' : 'Unique',                                                        
                                    'Horn Hill' : 'Unique',                                                            
                                    'Commander of the Second Sons' : 'Unique',                                         
                                    'Prince of Winterfell' : 'Unique',                                                 
                                    'Light of the West' : 'Unique',                                                    
                                    'King of Astapor' : 'Unique',                                                      
                                    'Eastwatch-by-the-Sea' : 'Unique',                                               
                                    'Foamdrinker' : 'Unique',                                                        
                                    'Lord of Kingsgrave' : 'Unique',                                                 
                                    'Lord of Starfall' : 'Unique',                                                   
                                    'Master of Deepwood Motte' : 'Unique',                                           
                                    'Lord of the Hornwood' : 'Unique',                                               
                                    'Greywater Watch' : 'Unique',                                                    
                                    'Darry' : 'Unique',                                                               
                                    'Lady of Bear Island' : 'Unique',                                                 
                                    'Dreadfort' : 'Unique',                                                           
                                    'Lord Paramount of the Stormlands' : 'Unique',                                    
                                    'green lands' : 'Unique',                                                             
                                    'Harlaw' : 'Unique',                                                                  
                                    'PrincessQueenDowager Queen' : 'Unique',                                              
                                    'Lord of White Harbor' : 'Unique',                                                     
                                    'Raventree Hall' : 'Unique',                                                           
                                    "Lord of Griffin's Roost" : 'Unique',                                                  
                                    'QueenDowager Queen' : 'Unique',                                                       
                                    'Lord of the Seven Kingdoms' : 'Unique',                                               
                                    'Master of Whisperers' : 'Unique',                                                     
                                    'Prince of the Narrow Sea' : 'Unique',                                                 
                                    'Lord Captain of the Iron Fleet' : 'Unique',                                           
                                    'Ruddy Hall' : 'Unique',                                                               
                                    'SerCastellan of Casterly Rock' : 'Unique',                                            
                                    'Longtable' : 'Unique',                                                               
                                    'Bear Island' : 'Unique',                                                             
                                    'Master of whisperers' : 'Unique',                                                    
                                    'Prince of WinterfellHeir to Winterfell' : 'Unique',                                  
                                    'Haystack Hall' : 'Unique',                                                         
                                    'Old Wyk' : 'Unique',                                                               
                                    'the Crossing' : 'Unique',                                                          
                                    'the Dreadfort' : 'Unique',                                                         
                                    'Godswife' : 'Unique',                                                              
                                    'Lady of the Vale' : 'Unique',                                                      
                                    'Magnar of Thenn' : 'Unique',                                                       
                                    'Twins' : 'Unique',                                                                  
                                    'Golden Storm' : 'Unique',                                                           
                                    'Iron Islands' : 'Unique',                                                           
                                    'Old Oak' : 'Unique',                                                                
                                    'Black Wind' : 'Unique',                                                             
                                    'Blacktyde' : 'Unique',                                                              
                                    'Lord of Blackhaven' : 'Unique',                                                     
                                    'The NorreyLord Norrey' : 'Unique',                                                
                                    'Lord Steward' : 'Unique',                                                         
                                    'King of Winter' : 'Unique',                                                       
                                    'Vaith' : 'Unique',                                                                
                                    'Commander of the City Watch' : 'Unique',                                          
                                    'Green Grace' : 'Unique',                                                          
                                    'Lord of Hammerhorn' : 'Unique',                                                   
                                    '[1]' : 'Unique',                                                                  
                                    'Lonely Light' : 'Unique',                                                         
                                    'LordWisdom' : 'Unique',                                                           
                                    'Captain-General' : 'Unique',                                                      
                                    'High Septon' : 'Unique',                                                          
                                    'Three Towers' : 'Unique',                                                         
                                    'Oakenshield' : 'Unique',                                                          
                                    'Magister' : 'Unique',                                                              
                                    'Stone Hedge' : 'Unique',                                                           
                                    'Lady of the Leaves' : 'Unique',                                                    
                                    'The LiddleLord Liddle' : 'Unique',                                                 
                                    'Lord of Oldcastle' : 'Unique',                                                     
                                    'Ten Towers' : 'Unique',                                                            
                                    'Redfort' : 'Unique',                                                               
                                    'Lord of Sunflower Hall' : 'Unique',                                                
                                    'Lord of the Marches' : 'Unique',                                                   
                                    'Pinkmaiden' : 'Unique',                                                            
                                    'Lord of the Red Dunes' : 'Unique',                                                 
                                    'Godsgrace' : 'Unique',                                                             
                                    'Sharp Point' : 'Unique',                                                           
                                    "Lady of Torrhen's Square" : 'Unique',                                              
                                    'Grassy Vale' : 'Unique',                                                        
                                    'Longbow Hall' : 'Unique',                                                       
                                    'Hayford' : 'Unique',                                                            
                                    'Dyre Den' : 'Unique',                                                           
                                    'Sealord' : 'Unique',                                                            
                                    'Skyreach' : 'Unique',                                                           
                                    'Feastfires': 'Unique',                                                           
                                    'Lord Seneschal': 'Unique',                                                         
                                    'Gulltown': 'Unique',                                                               
                                    'Sweetsister': 'Unique',                                                            
                                    'Sweetport Sound': 'Unique',                                                        
                                    'Lord of Greyshield': 'Unique',                                                     
                                    'High Steward of Highgarden': 'Unique',                                             
                                    'Felwood': 'Unique',                                                                
                                    'Harridan Hill': 'Unique',                                                         
                                    'Strongsong': 'Unique',                                                            
                                    'Ironoaks': 'Unique',                                                              
                                    'Lord Steward of the Iron Islands': 'Unique',                                      
                                    'Greenstone': 'Unique',                                                            
                                    'red hand' : 'Unique',                                                            
                                    'Seven Kingdoms' : 'Unique',                                                      
                                    'Lord of the Iron Islands' : 'Unique',                                            
                                    'Lord of Coldmoat' : 'Unique',                                                    
                                    'PrincessSepta' : 'Unique',                                                       
                                    'LadyQueenDowager Queen' : 'Unique',                                              
                                    'Lord of Dragonstone' : 'Unique',                                                 
                                    'Goodman' : 'Unique',                                                             
                                    'Blue Grace' : 'Unique',                                                          
                                    'Claw Isle' : 'Unique',                                                          
                                    'Red Priest' : 'Unique',                                                         
                                    'QueenBlack Bride' : 'Unique',                                                   
                                    'Uplands' : 'Unique',                                                             
                                    'BrotherProctor' : 'Unique',                                                     
                                    'Wind Witch' : 'Unique',                                                         
                                    'Broad Arch' : 'Unique',                                                           
                                    'Lady Marya' : 'Unique',                                                           
                                    'Whitewalls' : 'Unique',                                                          
                                    'Hornvale' : 'Unique',                                                             
                                    'Goldgrass' : 'Unique',                                                            
                                    'Tower of Glimmering' : 'Unique',                                                  
                                    'Lord of the Ten TowersLord Harlaw of HarlawHarlaw of Harlaw' : 'Unique',          
                                    'Big BucketThe Wull' : 'Unique',                                                 
                                    'Banefort' : 'Unique',                                                           
                                    'Sandstone' : 'Unique',                                                          
                                    'Serthe Knight of Saltpans' : 'Unique',                                         
                                    'CastellanCommander' : 'Unique',                                                
                                    'Wyndhall' : 'Unique',                                                          
                                    'Chief Undergaoler' : 'Unique',                                                 
                                    "Lord of Flint's Finger" : 'Unique',                                            
                                    'Rills'  : 'Unique',                                                                 
                                    'Longsister'  : 'Unique',                                                            
                                    "Knight of Griffin's Roost" : 'Unique',                                             
                                    'Cobblecat' : 'Unique',                                                             
                                    'Coldwater Burn' : 'Unique',                                                        
                                    'Undergaoler' : 'Unique',                                                           
                                    'Fair Isle' : 'Unique',                                                             
                                    'Master of Harlaw Hall' : 'Unique',                                                 
                                    "Rook's Rest" : 'Unique',                                                         
                                    'Crakehall' : 'Unique',                                                           
                                    'King of the Iron Islands' : 'Unique',                                            
                                    'Deepwood Motte' : 'Unique',                                                      
                                    'Captain' : 'Unique',                                                               
                                    'Kayce' : 'Unique',                                                                 
                                    'First Sword of Braavos' : 'Unique',                                                
                                    'Magister of Pentos' : 'Unique',                                                    
                                    'Lord of the Crossing' : 'Unique',                                                  
                                    'Castle Lychester' : 'Unique',                                                      
                                    'King of the Andals' : 'Unique',                                                    
                                    'Lord of the Snakewood' : 'Unique',                                                 
                                    "Karl's Hold" : 'Unique',                                                           
                                    'Blackmont': 'Unique',                                                              
                                    'Master-at-Arms': 'Unique',                                                         
                                    'Lord of Crows Nest': 'Unique',                                                     
                                    'Lord of the Deep Den': 'Unique',                                                 
                                    "Widow's Watch": 'Unique',                                                        
                                    'Lord of Darry': 'Unique',                                                        
                                    'Wraith': 'Unique',                                                               
                                    'Ghost Hill': 'Unique',                                                           
                                    "Heart's Home": 'Unique',                                                         
                                    'Oarmaster' : 'Unique',                                                         
                                    'Volmark' : 'Unique',                                                           
                                    'Lord of Pebbleton' : 'Unique',                                                 
                                    'Tradesman-Captain' : 'Unique',                                                 
                                    'Greenshield' : 'Unique',                                                       
                                    'Mistress of whisperers' : 'Unique',                                            
                                    'Shatterstone' : 'Unique',                                                      
                                    'Lord of Atranta' : 'Unique',                                                   
                                    "Slave of R'hllor" : 'Unique'})      
                                   
#save filled value as new df
df_full = df

In [9]:
#create dummy variables for house
df_full = pd.get_dummies(df_full, columns = ['m_house'], prefix = 'p_', drop_first = True)

#create dummies variables for culture
df_full = pd.get_dummies(df_full, columns = ['m_culture'], prefix = 'p_', drop_first = True)

#create dummies variables for title
df_full = pd.get_dummies(df_full, columns = ['m_title'], prefix = 'p_', drop_first = True)

#check first rows
df_full.head()

Unnamed: 0,S.No,name,title,male,culture,dateOfBirth,mother,father,heir,house,...,isAlive,p__Major_House,p__Minor_House,p__No_House,p__Major_Culture,p__Minor_Culture,p__No_Culture,p__No_Title,p__NotCommon_Title,p__Unique
0,1,Erreg,King,0,Andal,267.0,unknown,unknown,unknown,unknown,...,1,0,0,1,0,0,0,0,1,0
1,2,Hugor of the Hill,King of the Andals,1,Andals,267.0,unknown,unknown,unknown,unknown,...,1,0,0,1,0,0,0,0,0,1
2,3,Azor Ahai,unknown,0,Asshai,267.0,unknown,unknown,unknown,unknown,...,1,0,0,1,0,0,0,1,0,0
3,4,Melisandre,unknown,0,Asshai,267.0,unknown,unknown,unknown,R'hllor,...,1,0,0,0,0,0,0,1,0,0
4,5,Kraznys mo Nakloz,unknown,1,Astapor,267.0,unknown,unknown,unknown,Good Masters,...,0,0,0,0,0,0,0,1,0,0


In [10]:
# creating a (Pearson) correlation matrix to get some insights on what are the most valuable variables
df_corr = df_full.corr().round(2) 


# printing (Pearson) correlations with bwght
print(df_corr.loc['isAlive'].sort_values(ascending = False))

isAlive                       1.00
book2_A_Clash_Of_Kings        0.26
age                           0.05
p__No_Title                   0.05
p__No_Culture                 0.05
p__No_House                   0.05
p__Unique                     0.04
book5_A_Dance_with_Dragons    0.01
p__Major_Culture             -0.01
book3_A_Storm_Of_Swords      -0.01
isAliveHeir                  -0.02
isAliveMother                -0.04
p__Minor_Culture             -0.04
dateOfBirth                  -0.05
isAliveSpouse                -0.08
book4_A_Feast_For_Crows      -0.08
p__Minor_House               -0.08
p__NotCommon_Title           -0.10
p__Major_House               -0.10
isAliveFather                -0.12
S.No                         -0.12
male                         -0.16
book1_A_Game_Of_Thrones      -0.17
popularity                   -0.22
Name: isAlive, dtype: float64


In [11]:
#create dictionary for explanatory variable sets

candidate_dict = {

 # full model
 'var_full'   : [ 'dateOfBirth','male',
             'book2_A_Clash_Of_Kings','book3_A_Storm_Of_Swords',
             'book4_A_Feast_For_Crows','book5_A_Dance_with_Dragons',
             'age','numDeadRelations','popularity',
             'isAliveFather ',
             'isAliveMother ','isAliveSpouse ','isAliveHeir ','p__Major_House',
             'p__Minor_House','p__No_House','p__Major_Culture', 'p__Minor_Culture','p__No_Culture',
             'p__NotCommon_Title', 'p__No_Title', 'p__Unique'],
 

    # significant variables 
  'var_sig2'  : ['book1_A_Game_Of_Thrones','book2_A_Clash_Of_Kings','book3_A_Storm_Of_Swords',
                 'book4_A_Feast_For_Crows','male','popularity', 'isAliveFather',
                 'p__Major_House','p__Minor_House', 'p__NotCommon_Title'],

}

# Logistic Regression Model

<hr style="height:.9px;border:none;color:#333;background-color:#333;" /><br>
<strong>Preparing the training and testing set.Running the Logistic Regression Model and testing it. Confusion Matrix. </strong><br>

In [12]:
#Preparing Explanatory and Response Data
# declaring explanatory variables
df_data = df_full.drop('isAlive', axis = 1)


# declaring response variable
df_target = df_full.loc[:,'isAlive']

# prepar train-test split with stratification
x_train, x_test, y_train, y_test = train_test_split(
            df_data,
            df_target,
            test_size    = 0.1,
            random_state = 219,
            stratify     = df_target) # preserving balance


# merging training data for statsmodels
df_train = pd.concat([x_train, y_train], axis = 1)
# instantiating a logistic regression model object
logistic_full = smf.logit(formula = """isAlive~
male +
book1_A_Game_Of_Thrones +
book2_A_Clash_Of_Kings +
book3_A_Storm_Of_Swords +
book4_A_Feast_For_Crows +
isAliveFather +
popularity +
p__Major_House +
p__Minor_House +
p__NotCommon_Title  
""", data = df_train)


# fitting the model object
results_full = logistic_full.fit()


# checking the results SUMMARY
results_full.summary2()

Optimization terminated successfully.
         Current function value: 0.495654
         Iterations 6


0,1,2,3
Model:,Logit,Pseudo R-squared:,0.149
Dependent Variable:,isAlive,AIC:,1659.6406
Date:,2022-03-27 20:37,BIC:,1719.1478
No. Observations:,1652,Log-Likelihood:,-818.82
Df Model:,10,LL-Null:,-962.51
Df Residuals:,1641,LLR p-value:,7.2288000000000005e-56
Converged:,1.0000,Scale:,1.0
No. Iterations:,6.0000,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,1.4049,0.1381,10.1707,0.0000,1.1341,1.6756
male,-0.6174,0.1319,-4.6790,0.0000,-0.8760,-0.3588
book1_A_Game_Of_Thrones,-0.5650,0.1593,-3.5476,0.0004,-0.8772,-0.2529
book2_A_Clash_Of_Kings,1.5980,0.1446,11.0474,0.0000,1.3145,1.8815
book3_A_Storm_Of_Swords,-0.3625,0.1463,-2.4777,0.0132,-0.6493,-0.0758
book4_A_Feast_For_Crows,-0.2262,0.1439,-1.5718,0.1160,-0.5082,0.0559
isAliveFather,-1.1890,0.8434,-1.4098,0.1586,-2.8421,0.4640
popularity,-2.0372,0.4421,-4.6081,0.0000,-2.9036,-1.1707
p__Major_House,-0.3876,0.1392,-2.7843,0.0054,-0.6605,-0.1148


In [13]:
# train/test split with the full model
df_data_dict   =  df_full.loc[ : ,candidate_dict['var_sig2']]
df_target_dict =  df_full.loc[ : , 'isAlive']


# This is the exact code we were using before
x_train, x_test, y_train, y_test = train_test_split(
            df_data_dict,
            df_target_dict,
            test_size    = 0.1,
            random_state = 219,
            stratify     = df_target_dict)


# INSTANTIATING a logistic regression model
logreg = LogisticRegression(solver = 'lbfgs',
                            C = 1,
                            random_state = 219)


# FITTING the training data
logreg_fit = logreg.fit(x_train, y_train)


# PREDICTING based on the testing set
logreg_pred = logreg_fit.predict(x_test)

# saving scoring data for future use
logreg_train_score = logreg_fit.score(x_train, y_train).round(4) # accuracy
logreg_test_score  = logreg_fit.score(x_test, y_test).round(4) # accuracy
gap_logreg         = abs(logreg_train_score-logreg_test_score).round(4)
# saving AUC score
logreg_auc_score   = roc_auc_score(y_true  = y_test,
                                 y_score = logreg_pred).round(4)

# SCORING the results
print('Training ACCURACY:', logreg_fit.score(x_train, y_train).round(4))
print('Testing  ACCURACY:', logreg_fit.score(x_test, y_test).round(4))
print('GAP              :',abs(logreg_train_score-logreg_test_score).round(4))
print('AUC Score        :', roc_auc_score(y_true  = y_test,
                                          y_score = logreg_pred).round(4))



Training ACCURACY: 0.7476
Testing  ACCURACY: 0.7663
GAP              : 0.0187
AUC Score        : 0.6452


In [14]:
# creating a confusion matrix
print(confusion_matrix(y_true = y_test,
                       y_pred = logreg_pred))

# unpacking the confusion matrix
logreg_tn, \
logreg_fp, \
logreg_fn, \
logreg_tp = confusion_matrix(y_true = y_test, y_pred = logreg_pred).ravel()


# printing each result one-by-one
print(f"""
True Negatives : {logreg_tn}
False Positives: {logreg_fp}
False Negatives: {logreg_fn}
True Positives : {logreg_tp}
""")

[[ 19  31]
 [ 12 122]]

True Negatives : 19
False Positives: 31
False Negatives: 12
True Positives : 122



# Classification Tree Model

<hr style="height:.9px;border:none;color:#333;background-color:#333;" /><br>
<strong>Running the Classification Tree Model and testing it. Confusion Matrix.  </strong><br>

In [15]:

# INSTANTIATING a classification tree object
full_tree = DecisionTreeClassifier(max_depth = 8)


# FITTING the training data
full_tree_fit = full_tree.fit(x_train, y_train)


# PREDICTING on new data
full_tree_pred = full_tree_fit.predict(x_test)

# saving scoring data for future use
full_tree_train_score = full_tree_fit.score(x_train, y_train).round(4) # accuracy
full_tree_test_score  = full_tree_fit.score(x_test, y_test).round(4)   # accuracy
gap_tree              = abs(full_tree_train_score-full_tree_test_score).round(4)

# saving AUC
full_tree_auc_score   = roc_auc_score(y_true  = y_test,
                                      y_score = full_tree_pred).round(4) # auc


# SCORING the model
print('Full Tree Training ACCURACY:', full_tree_fit.score(x_train,
                                                     y_train).round(4))

print('Full Tree Testing ACCURACY :', full_tree_fit.score(x_test,
                                                     y_test).round(4))
print('GAP                        :',abs(full_tree_train_score-full_tree_test_score).round(4))

print('Full Tree AUC Score        :', roc_auc_score(y_true  = y_test,
                                            y_score = full_tree_pred).round(4))




Full Tree Training ACCURACY: 0.8529
Full Tree Testing ACCURACY : 0.7554
GAP                        : 0.0975
Full Tree AUC Score        : 0.6315


In [16]:
# unpacking the confusion matrix
full_tree_tn, \
full_tree_fp, \
full_tree_fn, \
full_tree_tp = confusion_matrix(y_true = y_test, y_pred = full_tree_pred).ravel()


# printing each result one-by-one
print(f"""
True Negatives : {full_tree_tn}
False Positives: {full_tree_fp}
False Negatives: {full_tree_fn}
True Positives : {full_tree_tp}
""")


True Negatives : 18
False Positives: 32
False Negatives: 13
True Positives : 121



# GBM Model

<hr style="height:.9px;border:none;color:#333;background-color:#333;" /><br>
<strong>Running the GBM Model with the 'Best Estimators' and testing it. Confusion Matrix  </strong><br>

In [17]:
# INSTANTIATING with best_estimators
gbm_tuned = GradientBoostingClassifier(learning_rate = 1.5,
                                       max_depth     = 6,
                                       n_estimators  = 1001,
                                       warm_start    = False,
                                       random_state  = 219,
                                       loss          = 'exponential',
                                       criterion     = 'friedman_mse')


# FITTING to the FULL DATASET (due to cross-validation)
gbm_tuned_fit = gbm_tuned.fit(df_data_dict, df_target_dict)


# PREDICTING based on the testing set
gbm_tuned_pred = gbm_tuned_fit.predict(x_test)

#saving the results for future use
gbm_train_acc = gbm_tuned_fit.score(x_train, y_train).round(4)
gbm_test_acc  = gbm_tuned_fit.score(x_test, y_test).round(4)
gbm_auc       = roc_auc_score(y_true  = y_test,
                              y_score = gbm_tuned_pred).round(4)
gap_gbm       = abs(gbm_train_acc-gbm_test_acc).round(4)
# SCORING the results
print('Training ACCURACY:', gbm_tuned_fit.score(x_train, y_train).round(4))
print('Testing  ACCURACY:', gbm_tuned_fit.score(x_test, y_test).round(4))
print('GAP              :',abs(gbm_train_acc-gbm_test_acc).round(4))
print('AUC Score        :', roc_auc_score(y_true  = y_test, y_score = gbm_tuned_pred).round(4))



Training ACCURACY: 0.9237
Testing  ACCURACY: 0.9348
GAP              : 0.0111
AUC Score        : 0.8988


In [18]:
# unpacking the confusion matrix
gbm_tuned_tn, \
gbm_tuned_fp, \
gbm_tuned_fn, \
gbm_tuned_tp = confusion_matrix(y_true = y_test, y_pred = gbm_tuned_pred).ravel()


# printing each result one-by-one
print(f"""
True Negatives : {gbm_tuned_tn}
False Positives: {gbm_tuned_fp}
False Negatives: {gbm_tuned_fn}
True Positives : {gbm_tuned_tp}
""")


True Negatives : 41
False Positives: 9
False Negatives: 3
True Positives : 131



# Final Result

<hr style="height:.9px;border:none;color:#333;background-color:#333;" /><br>
<strong>Showing and comparing the Model scores   </strong><br>

In [19]:
#CREATE EXCEL FILE TO COMPARE RESULTS
# comparing results
print(f"""
Model         AUC Score     Trainig Score     Testing score     GAP     TN, FP, FN, TP
-----         ---------     -------------     -------------     ---     --------------
Logistic      {logreg_auc_score}           {logreg_train_score}             {logreg_test_score}       {gap_logreg}   {logreg_tn, logreg_fp, logreg_fn, logreg_tp}
Full Tree     {full_tree_auc_score}           {full_tree_train_score}             {full_tree_test_score}       {gap_tree}    {full_tree_tn, full_tree_fp, full_tree_fn, full_tree_tp}
GBM*          {gbm_auc }           {gbm_train_acc}             {gbm_test_acc}       {gap_gbm}    {gbm_tuned_tn, gbm_tuned_fp,gbm_tuned_fn,gbm_tuned_tp}

*Final Model""")






Model         AUC Score     Trainig Score     Testing score     GAP     TN, FP, FN, TP
-----         ---------     -------------     -------------     ---     --------------
Logistic      0.6452           0.7476             0.7663       0.0187   (19, 31, 12, 122)
Full Tree     0.6315           0.8529             0.7554       0.0975    (18, 32, 13, 121)
GBM*          0.8988           0.9237             0.9348       0.0111    (41, 9, 3, 131)

*Final Model
