# FETAL HEALTH CLASSIFICATION

This dataset contains 2126 records of features extracted from Cardiotocogram exams, which were then classified by three expert obstetritians into 3 classes:

1. Normal
2. Suspect
3. Pathological

In [None]:
import pandas as pd
import numpy as np

from joblib import dump, load
import pickle


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import  MinMaxScaler

from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.gaussian_process import GaussianProcessClassifier 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC

import matplotlib.pyplot as plt
import seaborn as sns


import warnings
%matplotlib inline

In [None]:
data = pd.read_csv('../input/fetal-health-classification/fetal_health.csv')

## Data summary

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.describe()

### Cheking for null value

In [None]:
print(data.isnull().sum())

### Data scaling

In [None]:
def scale(data): 
    min_max_scaler = MinMaxScaler()
    scaled_data =  min_max_scaler.fit_transform(data) 
    return scaled_data

In [None]:
drop_col = ['fetal_health', 'histogram_width', 'histogram_min', 'histogram_max',
            'histogram_number_of_peaks', 'histogram_number_of_zeroes', 'histogram_mode','histogram_median', 
            'histogram_mean', 'histogram_variance', 'histogram_tendency'  ]
X = data.drop(drop_col, axis=1)
y = pd.DataFrame(data['fetal_health'])
y = y.astype('int')

predictions = {1: "Normal", 2 : "Suspect", 3: "Pathological"}


# scaled_data = scale(X)
# scaled_data = pd.DataFrame(scaled_data, columns = X.columns)
# scaled_data

### Splitting data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## Training Models

In [None]:
classifier_dict = {
                   'OnevsRest':OneVsRestClassifier(SVC()), 
                   'MLP': MLPClassifier(), 
                   'KNN': KNeighborsClassifier(),
                   'Gaussian_process': GaussianProcessClassifier(),
                   'Decison_tree': DecisionTreeClassifier(),
                   'Random_forest': RandomForestClassifier(),
                   'ADA_boost': AdaBoostClassifier(),
                   'Gaussian_NB': GaussianNB(),
                   'Quadratic_Discriminant_Analysis': QuadraticDiscriminantAnalysis()
                  }

In [None]:
f = plt.figure(figsize=(80,8))
acc_list = []
    
for index, key in enumerate(classifier_dict):
    
    print(f'Training {key} ........')
    clf = classifier_dict[key].fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    # Classification report
    target_names = ['Normal', 'Suspect', 'Pathological']
    print(classification_report(y_test, y_pred, target_names=target_names))
    
    
    # Model Accuracy
    acc = accuracy_score(y_test, y_pred)
    print(f'Accuracy of {key} is {acc.round(3)}' ) 
    

    # Confusion matrix plot    
    f.add_subplot(1,9, index+1)
    con_mat = confusion_matrix(y_test, y_pred) 
    sns.heatmap(con_mat, annot=True, fmt='d', cmap='PuBu')
    
    acc_list.append(acc)
    

### Choosing the most accurate model

In [None]:
for index, value in enumerate(classifier_dict):
    if index == acc_list.index(max(acc_list)):
        print(f'{value} classifier has the maximum accuracy  = {max(acc_list).round(3)*100}%')
        model = classifier_dict[value].fit(X_train, y_train) 
    

### DEMO

In [None]:
demo = pd.DataFrame(data.iloc[:1])  
demo_pred = demo.drop(drop_col, axis=1) 
actual = (data.iloc[:1, -1 ][0]).astype('int')

pred = model.predict(demo_pred)[0]

predictions.keys()
output = ''

for value in predictions.keys():
    if value == pred:
        output = predictions[value]

In [None]:
probability = max(model.predict_proba(demo_scaled)[0])
print(f'The Probaility of the fetus being {output} is {probability*100}%')
print(f'The actual value was {predictions[actual]}' )


### Saving the model

In [None]:
# pickle.dump(model, open('model.pkl', 'wb'))

In [None]:
# models = pickle.load(open('model.pkl','rb'))