In [4]:
# Importing necessary packages and libraries
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split as ttsplit
from sklearn.tree import DecisionTreeClassifier as dtree_cl
from sklearn.metrics import classification_report as classrep
from sklearn.metrics import confusion_matrix as confm
from sklearn.metrics import accuracy_score as accscore

from sklearn import tree
from sklearn.tree import export_graphviz

import pydotplus
import matplotlib.pyplot as plt
import matplotlib.image as img

#loading the dataframe

work_file = pd.read_csv("InfantMortalityRateTest4.csv",encoding='latin-1')

#designating the target and feature variables

TV = work_file[['Infant Mortality']]
FV = work_file[['Life Expectancy at Birth', 'Life Expectancy at 60 years', 'Maternal mortality ratio', 'Adolescent fertility rate', 'Total fertility rate']]

#train-test split

FV_train, FV_test, TV_train, TV_test = ttsplit(FV, TV, test_size = 0.25)

#designate and fit the decision tree model 

tree_model = dtree_cl()
tree_model.fit(FV_train, TV_train)

#prediction
TV_pred = tree_model.predict(FV_test)

print(TV_test)

#diagnostics

print('\n')
print("Model Accuracy")
print(accscore(TV_test,TV_pred))
print('\n')

print('\n')
print('Confusion Matrix')
print(confm(TV_test,TV_pred))
print('\n')

print('\n')
print('Classification Report')
print(classrep (TV_test,TV_pred))
print('\n')

    Infant Mortality
63     Below Average
99     Above Average
97     Above Average
108    Below Average
36     Above Average
26     Above Average
126    Below Average
116    Below Average
6      Below Average
157    Below Average
58     Above Average
135    Below Average
141    Below Average
144    Above Average
54     Below Average
12     Below Average
79     Below Average
174    Below Average
56     Below Average
145    Above Average
161    Below Average
28     Below Average
172    Above Average
88     Below Average
27     Above Average
76     Below Average
151    Above Average
153    Below Average
140    Above Average
48     Below Average
24     Above Average
175    Below Average
131    Below Average
147    Below Average
18     Above Average
22     Below Average
117    Below Average
11     Above Average
16     Above Average
110    Below Average
5      Below Average
112    Below Average
170    Below Average
171    Below Average
155    Above Average


Model Accuracy
0.911111111111111

In [5]:
#decision tree visualization
TREE_MODEL2 = tree_model.fit(FV_train, TV_train)
cols = list(FV_train.columns.values)
tree_graph = export_graphviz(TREE_MODEL2, filled=True, rounded=True, special_characters=True,feature_names = cols)
print(tree_graph)

digraph Tree {
node [shape=box, style="filled, rounded", color="black", fontname="helvetica"] ;
edge [fontname="helvetica"] ;
0 [label=<Life Expectancy at Birth &le; 71.0<br/>gini = 0.468<br/>samples = 134<br/>value = [50, 84]>, fillcolor="#afd7f4"] ;
1 [label=<Adolescent fertility rate &le; 19.0<br/>gini = 0.042<br/>samples = 47<br/>value = [46, 1]>, fillcolor="#e6843d"] ;
0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
2 [label=<Adolescent fertility rate &le; 17.5<br/>gini = 0.444<br/>samples = 3<br/>value = [2, 1]>, fillcolor="#f2c09c"] ;
1 -> 2 ;
3 [label=<gini = 0.0<br/>samples = 2<br/>value = [2, 0]>, fillcolor="#e58139"] ;
2 -> 3 ;
4 [label=<gini = 0.0<br/>samples = 1<br/>value = [0, 1]>, fillcolor="#399de5"] ;
2 -> 4 ;
5 [label=<gini = 0.0<br/>samples = 44<br/>value = [44, 0]>, fillcolor="#e58139"] ;
1 -> 5 ;
6 [label=<Life Expectancy at Birth &le; 74.5<br/>gini = 0.088<br/>samples = 87<br/>value = [4, 83]>, fillcolor="#43a2e6"] ;
0 -> 6 [labeldistance=2.5, labela

In [6]:
dec_tree_image = pydotplus.graph_from_dot_data(tree_graph)
dec_tree_image.write_png('DT_Health_Global.png')

InvocationException: GraphViz's executables not found

In [24]:
tree_model.feature_importances_

array([0.89842364, 0.00285784, 0.05162052, 0.01582802, 0.03126999])

In [26]:
#Continents

c1 = 'North America'
c2 = 'Europe'
c3 = 'Asia'
c4 = 'Africa'
c5 = 'Oceania'
c6 = 'South America'

In [30]:
# Continent America
work_file_America = work_file[work_file['Continent'] == c1]

#designating the target and feature variables

TV_America = work_file_America[['Infant Mortality']]
FV_America = work_file_America[['Life Expectancy at Birth', 'Life Expectancy at 60 years', 'Maternal mortality ratio', 'Adolescent fertility rate', 'Total fertility rate']]

#train-test split

FV_train, FV_test, TV_train, TV_test = ttsplit(FV_America, TV_America, test_size = 0.25, random_state= 0)

#designate and fit the decision tree model 

tree_model = dtree_cl()
tree_model.fit(FV_train, TV_train)

#prediction
TV_pred = tree_model.predict(FV_test)

print("Test...", TV_test)
print("PRed...", TV_pred)

#comp = pd.DataFrame({
#   'Real GDP': TV_test,
#   'Predicted GDP': TV_pred
#})

print('\n')
print('This is the comparision table for predicted and actual  Infant Mortality')
print('........', '\n')
#print(comparative_table)
print('\n')

#diagnostics

print('\n')
print("Model Accuracy")
print(accscore(TV_test,TV_pred))
print('\n')

print('\n')
print('Confusion Matrix')
print(confm(TV_test,TV_pred))
print('\n')

print('\n')
print('Classification Report')
print(classrep (TV_test,TV_pred))
print('\n')

#decision tree visualization

TREE_MODEL3 = tree_model.fit(FV_train, TV_train)

print(tree_model.feature_importances_)

tree_graph_America = export_graphviz(TREE_MODEL2)
print(tree_graph)

dec_tree_image = pydotplus.graph_from_dot_data(tree_graph_America)
dec_tree_image.write_png('DT_Health_Global_America.png')

Test...     Infant Mortality
63     Below Average
101    Below Average
170    Below Average
12     Below Average
69     Below Average
68     Above Average
PRed... ['Below Average' 'Below Average' 'Below Average' 'Below Average'
 'Below Average' 'Below Average']


This is the comparision table for predicted and actual  Infant Mortality
........ 





Model Accuracy
0.8333333333333334




Confusion Matrix
[[0 1]
 [0 5]]




Classification Report
               precision    recall  f1-score   support

Above Average       0.00      0.00      0.00         1
Below Average       0.83      1.00      0.91         5

     accuracy                           0.83         6
    macro avg       0.42      0.50      0.45         6
 weighted avg       0.69      0.83      0.76         6



[0. 0. 0. 0. 0.]
digraph Tree {
node [shape=box, style="filled, rounded", color="black", fontname="helvetica"] ;
edge [fontname="helvetica"] ;
0 [label=<Life Expectancy at Birth &le; 72.5<br/>gini = 0.471<br/>samples 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


True

In [31]:
# Continent Europe
work_file_Europe = work_file[work_file['Continent'] == c2]

#designating the target and feature variables

TV_Europe = work_file_Europe[['Infant Mortality']]
FV_Europe = work_file_Europe[['Life Expectancy at Birth', 'Life Expectancy at 60 years', 'Maternal mortality ratio', 'Adolescent fertility rate', 'Total fertility rate']]

#train-test split

FV_train, FV_test, TV_train, TV_test = ttsplit(FV_Europe, TV_Europe, test_size = 0.25, random_state= 0)

#designate and fit the decision tree model 

tree_model = dtree_cl()
tree_model.fit(FV_train, TV_train)

#prediction
TV_pred = tree_model.predict(FV_test)

print("Test...", TV_test)
print("PRed...", TV_pred)

#comp = pd.DataFrame({
#   'Real GDP': TV_test,
#   'Predicted GDP': TV_pred
#})

print('\n')
print('This is the comparision table for predicted and actual Infant Mortality')
print('........', '\n')
#print(comparative_table)
print('\n')

#diagnostics

print('\n')
print("Model Accuracy")
print(accscore(TV_test,TV_pred))
print('\n')

print('\n')
print('Confusion Matrix')
print(confm(TV_test,TV_pred))
print('\n')

print('\n')
print('Classification Report')
print(classrep (TV_test,TV_pred))
print('\n')

#decision tree visualization

TREE_MODEL3 = tree_model.fit(FV_train, TV_train)
tree_graph_Europe = export_graphviz(TREE_MODEL2)
print(tree_graph)

print(tree_model.feature_importances_)

dec_tree_image = pydotplus.graph_from_dot_data(tree_graph_Europe)
dec_tree_image.write_png('DT_Health_Global_Europe.png')


Test...     Infant Mortality
19     Below Average
129    Below Average
130    Below Average
147    Below Average
152    Below Average
115    Below Average
55     Below Average
98     Below Average
56     Below Average
125    Below Average
PRed... ['Below Average' 'Below Average' 'Below Average' 'Below Average'
 'Below Average' 'Below Average' 'Below Average' 'Below Average'
 'Below Average' 'Below Average']


This is the comparision table for predicted and actual Infant Mortality
........ 





Model Accuracy
1.0




Confusion Matrix
[[10]]




Classification Report
               precision    recall  f1-score   support

Below Average       1.00      1.00      1.00        10

     accuracy                           1.00        10
    macro avg       1.00      1.00      1.00        10
 weighted avg       1.00      1.00      1.00        10



digraph Tree {
node [shape=box, style="filled, rounded", color="black", fontname="helvetica"] ;
edge [fontname="helvetica"] ;
0 [label=<Life Expect

True

In [32]:
# Continent Asia
work_file_Asia = work_file[work_file['Continent'] == c3]

#designating the target and feature variables

TV_Asia = work_file_Asia[['Infant Mortality']]
FV_Asia = work_file_Asia[['Life Expectancy at Birth', 'Life Expectancy at 60 years', 'Maternal mortality ratio', 'Adolescent fertility rate', 'Total fertility rate']]

#train-test split

FV_train, FV_test, TV_train, TV_test = ttsplit(FV_Asia, TV_Asia, test_size = 0.25, random_state= 0)

#designate and fit the decision tree model 

tree_model = dtree_cl()
tree_model.fit(FV_train, TV_train)

#prediction
TV_pred = tree_model.predict(FV_test)

print("Test...", TV_test)
print("PRed...", TV_pred)

#comp = pd.DataFrame({
#   'Real GDP': TV_test,
#   'Predicted GDP': TV_pred
#})

print('\n')
print('This is the comparision table for predicted and actual Infant Mortality')
print('........', '\n')
#print(comparative_table)
print('\n')

#diagnostics

print('\n')
print("Model Accuracy")
print(accscore(TV_test,TV_pred))
print('\n')

print('\n')
print('Confusion Matrix')
print(confm(TV_test,TV_pred))
print('\n')

print('\n')
print('Classification Report')
print(classrep (TV_test,TV_pred))
print('\n')

#decision tree visualization

TREE_MODEL3 = tree_model.fit(FV_train, TV_train)
tree_graph_Asia = export_graphviz(TREE_MODEL2)
print(tree_graph)

print(tree_model.feature_importances_)

dec_tree_image = pydotplus.graph_from_dot_data(tree_graph_Asia)
dec_tree_image.write_png('DT_Health_Global_Asia.png')


Test...     Infant Mortality
117    Below Average
131    Below Average
123    Below Average
11     Above Average
84     Below Average
73     Below Average
164    Above Average
127    Below Average
95     Below Average
72     Above Average
116    Below Average
128    Below Average
PRed... ['Below Average' 'Below Average' 'Above Average' 'Above Average'
 'Below Average' 'Above Average' 'Above Average' 'Below Average'
 'Below Average' 'Above Average' 'Below Average' 'Below Average']


This is the comparision table for predicted and actual Infant Mortality
........ 





Model Accuracy
0.8333333333333334




Confusion Matrix
[[3 0]
 [2 7]]




Classification Report
               precision    recall  f1-score   support

Above Average       0.60      1.00      0.75         3
Below Average       1.00      0.78      0.88         9

     accuracy                           0.83        12
    macro avg       0.80      0.89      0.81        12
 weighted avg       0.90      0.83      0.84        1

True

In [33]:
# Continent Africa
work_file_Africa = work_file[work_file['Continent'] == c4]

#designating the target and feature variables

TV_Africa = work_file_Africa[['Infant Mortality']]
FV_Africa = work_file_Africa[['Life Expectancy at Birth', 'Life Expectancy at 60 years', 'Maternal mortality ratio', 'Adolescent fertility rate', 'Total fertility rate']]

#train-test split

FV_train, FV_test, TV_train, TV_test = ttsplit(FV_Africa, TV_Africa, test_size = 0.25, random_state= 0)

#designate and fit the decision tree model 

tree_model = dtree_cl()
tree_model.fit(FV_train, TV_train)

#prediction
TV_pred = tree_model.predict(FV_test)

print("Test...", TV_test)
print("PRed...", TV_pred)

#comp = pd.DataFrame({
#   'Real GDP': TV_test,
#   'Predicted GDP': TV_pred
#})

print('\n')
print('This is the comparision table for predicted and actual Infant Mortality')
print('........', '\n')
#print(comparative_table)
print('\n')

#diagnostics

print('\n')
print("Model Accuracy")
print(accscore(TV_test,TV_pred))
print('\n')

print('\n')
print('Confusion Matrix')
print(confm(TV_test,TV_pred))
print('\n')

print('\n')
print('Classification Report')
print(classrep (TV_test,TV_pred))
print('\n')

#decision tree visualization

TREE_MODEL3 = tree_model.fit(FV_train, TV_train)
tree_graph_Africa = export_graphviz(TREE_MODEL2)
print(tree_graph)

print(tree_model.feature_importances_)

dec_tree_image = pydotplus.graph_from_dot_data(tree_graph_Africa)
dec_tree_image.write_png('DT_Health_Global_Africa.png')


Test...     Infant Mortality
97     Above Average
36     Above Average
35     Above Average
65     Above Average
16     Above Average
94     Above Average
159    Above Average
105    Below Average
90     Above Average
24     Above Average
106    Above Average
145    Above Average
93     Above Average
PRed... ['Above Average' 'Above Average' 'Above Average' 'Above Average'
 'Above Average' 'Above Average' 'Above Average' 'Below Average'
 'Above Average' 'Above Average' 'Above Average' 'Above Average'
 'Above Average']


This is the comparision table for predicted and actual Infant Mortality
........ 





Model Accuracy
1.0




Confusion Matrix
[[12  0]
 [ 0  1]]




Classification Report
               precision    recall  f1-score   support

Above Average       1.00      1.00      1.00        12
Below Average       1.00      1.00      1.00         1

     accuracy                           1.00        13
    macro avg       1.00      1.00      1.00        13
 weighted avg       1.00  

True

In [34]:
# Continent Oceania
work_file_Oceania = work_file[work_file['Continent'] == c5]

#designating the target and feature variables

TV_Oceania = work_file_Oceania[['Infant Mortality']]
FV_Oceania = work_file_Oceania[['Life Expectancy at Birth', 'Life Expectancy at 60 years', 'Maternal mortality ratio', 'Adolescent fertility rate', 'Total fertility rate']]

#train-test split

FV_train, FV_test, TV_train, TV_test = ttsplit(FV_Oceania, TV_Oceania, test_size = 0.25, random_state= 0)

#designate and fit the decision tree model 

tree_model = dtree_cl()
tree_model.fit(FV_train, TV_train)

#prediction
TV_pred = tree_model.predict(FV_test)

print("Test...", TV_test)
print("PRed...", TV_pred)

#comp = pd.DataFrame({
#   'Real GDP': TV_test,
#   'Predicted GDP': TV_pred
#})

print('\n')
print('This is the comparision table for predicted and actual Infant Mortality')
print('........', '\n')
#print(comparative_table)
print('\n')

#diagnostics

print('\n')
print("Model Accuracy")
print(accscore(TV_test,TV_pred))
print('\n')

print('\n')
print('Confusion Matrix')
print(confm(TV_test,TV_pred))
print('\n')

print('\n')
print('Classification Report')
print(classrep (TV_test,TV_pred))
print('\n')

#decision tree visualization

TREE_MODEL3 = tree_model.fit(FV_train, TV_train)
tree_graph_Oceania = export_graphviz(TREE_MODEL2)
print(tree_graph)

print(tree_model.feature_importances_)

dec_tree_image = pydotplus.graph_from_dot_data(tree_graph_Oceania)
dec_tree_image.write_png('DT_Health_Oceania.png')


Test...     Infant Mortality
160    Below Average
102    Below Average
54     Below Average
PRed... ['Below Average' 'Above Average' 'Below Average']


This is the comparision table for predicted and actual Infant Mortality
........ 





Model Accuracy
0.6666666666666666




Confusion Matrix
[[0 0]
 [1 2]]




Classification Report
               precision    recall  f1-score   support

Above Average       0.00      0.00      0.00         0
Below Average       1.00      0.67      0.80         3

     accuracy                           0.67         3
    macro avg       0.50      0.33      0.40         3
 weighted avg       1.00      0.67      0.80         3



digraph Tree {
node [shape=box, style="filled, rounded", color="black", fontname="helvetica"] ;
edge [fontname="helvetica"] ;
0 [label=<Life Expectancy at Birth &le; 72.5<br/>gini = 0.471<br/>samples = 134<br/>value = [51, 83]>, fillcolor="#b3d9f5"] ;
1 [label=<Life Expectancy at Birth &le; 71.5<br/>gini = 0.075<br/>samples = 51

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


True

In [35]:
# Continent South America
work_file_SouthAM = work_file[work_file['Continent'] == c6]

#designating the target and feature variables

TV_SouthAM = work_file_SouthAM[['Infant Mortality']]
FV_SouthAM = work_file_SouthAM[['Life Expectancy at Birth', 'Life Expectancy at 60 years', 'Maternal mortality ratio', 'Adolescent fertility rate', 'Total fertility rate']]

#train-test split

FV_train, FV_test, TV_train, TV_test = ttsplit(FV_SouthAM, TV_SouthAM, test_size = 0.25, random_state= 0)

#designate and fit the decision tree model 

tree_model = dtree_cl()
tree_model.fit(FV_train, TV_train)

#prediction
TV_pred = tree_model.predict(FV_test)

print("Test...", TV_test)
print("PRed...", TV_pred)

#comp = pd.DataFrame({
#   'Real GDP': TV_test,
#   'Predicted GDP': TV_pred
#})

print('\n')
print('This is the comparision table for predicted and actual Infant Mortality')
print('........', '\n')
#print(comparative_table)
print('\n')

#diagnostics

print('\n')
print("Model Accuracy")
print(accscore(TV_test,TV_pred))
print('\n')

print('\n')
print('Confusion Matrix')
print(confm(TV_test,TV_pred))
print('\n')

print('\n')
print('Classification Report')
print(classrep (TV_test,TV_pred))
print('\n')

#decision tree visualization

TREE_MODEL3 = tree_model.fit(FV_train, TV_train)
tree_graph_SouthAM = export_graphviz(TREE_MODEL2)
print(tree_graph)

print(tree_model.feature_importances_)

dec_tree_image = pydotplus.graph_from_dot_data(tree_graph_SouthAM)
dec_tree_image.write_png('DT_Health_Global_SouthAM.png')


Test...     Infant Mortality
67     Above Average
174    Below Average
34     Below Average
PRed... ['Below Average' 'Below Average' 'Below Average']


This is the comparision table for predicted and actual Infant Mortality
........ 





Model Accuracy
0.6666666666666666




Confusion Matrix
[[0 1]
 [0 2]]




Classification Report
               precision    recall  f1-score   support

Above Average       0.00      0.00      0.00         1
Below Average       0.67      1.00      0.80         2

     accuracy                           0.67         3
    macro avg       0.33      0.50      0.40         3
 weighted avg       0.44      0.67      0.53         3



digraph Tree {
node [shape=box, style="filled, rounded", color="black", fontname="helvetica"] ;
edge [fontname="helvetica"] ;
0 [label=<Life Expectancy at Birth &le; 72.5<br/>gini = 0.471<br/>samples = 134<br/>value = [51, 83]>, fillcolor="#b3d9f5"] ;
1 [label=<Life Expectancy at Birth &le; 71.5<br/>gini = 0.075<br/>samples = 51

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


True