In this notebook, we predict whether the health of a fetus is classified as normal, suspect, or pathological based on CTG data. We will practice Classification Algorithms to achieve the lowest prediction error.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
train=pd.read_csv("../input/fetal-health-classification/fetal_health.csv")

In [None]:
train.head()

In [None]:
train.describe()

In [None]:
train.info()

#Checking null values

In [None]:
train.isnull().sum()

#Analyze target column

In [None]:
sns.countplot(x="fetal_health", data=train)

Above plot shows class imbalance.

In [None]:
plt.figure(figsize=(20,10))
sns.heatmap(train.corr(),annot=True, cmap="viridis")

Remove highly correlated features.

In [None]:
train.drop(['histogram_median','histogram_mean', 'histogram_mode'], 1, inplace=True)

In [None]:
X=train.drop(['fetal_health'], 1)

In [None]:
y=train['fetal_health']

In [None]:
print(X.shape)
print(y.shape)

In [None]:
# Importing train_test_split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42, stratify = y)

In [None]:
from sklearn import preprocessing
scaler=preprocessing.StandardScaler()
scaler=scaler.fit(X_train)
X_train=scaler.transform(X_train)
X_test=scaler.transform(X_test)


In [None]:

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,VotingClassifier
from xgboost.sklearn import XGBClassifier
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,f1_score

In [None]:
classification_models = [
                         'DecisionTreeClassifier',
                         'RandomForestClassifier',
                         'XGBClassifier']

In [None]:
cm = []
acc = []
models = []
estimators = []

In [None]:
for classfication_model in classification_models:
    
    model = eval(classfication_model)()
    
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    
    models.append(type(model).__name__)
    estimators.append((type(model).__name__,model))
    cm.append(confusion_matrix(y_test,y_pred))
    acc.append(accuracy_score(y_test,y_pred))
    

In [None]:
vc = VotingClassifier(estimators)
vc.fit(X_train,y_train)

In [None]:
y_pred = vc.predict(X_test)
    
models.append(type(vc).__name__)

cm.append(confusion_matrix(y_test,y_pred))
acc.append(accuracy_score(y_test,y_pred))


In [None]:
model_dict = {"Models":models,
             "CM":cm,
             "Accuracy":acc
             }

In [None]:
model_df = pd.DataFrame.from_dict(model_dict,orient='index')
model_df.transpose()