**The task is to classify fetal health. I have used Random Forest and XGBoost Algorithms to classify 3 stages of fetal health such as Normal, Suspect, and Pathological stages.**

In [1]:
#import libraries
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
#load dataset
fetal_data = pd.read_csv('../input/fetal-health-classification/fetal_health.csv')

In [3]:
#Statistical Descriptions
fetal_data.describe()

In [4]:
#First 5 rows of the dataset
fetal_data.head()

In [5]:
#info
fetal_data.info()

In [6]:
#Check Null Values
fetal_data.isnull().sum()

In [7]:
import seaborn as sns
sns.countplot(data = fetal_data, x = "fetal_health")

Here, 1.0, 2.0, 3.0 represent Normal, Suspect, and Pathological respectively. As I noticed several class imbalance problem in this dataset, I chose Random Forest and XGBoost Algorithms.

In [8]:
#Feature Selection
X = fetal_data.drop(["fetal_health"], axis = 1) #Remove fetal_health column, select all other columns as features
y = fetal_data["fetal_health"]   #Only featl_health as target

In [9]:
#Split the dataset into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)

In [10]:
#Shape of the original dataset, training set, and test set
print(fetal_data.shape)
print((X_train.shape, y_train.shape))
print((X_test.shape, y_test.shape))

# Random Forest Classifier

In [11]:
#Random Forest Classifier
rf = RandomForestClassifier(random_state = 42)
#fit the model
rf.fit(X_train, y_train)

In [12]:
#Evaulate on test data
rf_pred = rf.predict(X_test)
test_acc = accuracy_score(y_test, rf_pred)
print('Accuracy {:.3f}%'.format(test_acc * 100))

In [13]:
#Making the Confusion Matrix
rf_cm = confusion_matrix(y_test, rf_pred)
ConfusionMatrixDisplay(confusion_matrix = rf_cm, display_labels = rf.classes_).plot()
plt.show()

In [14]:
#Performance Evaluation
print('Precision: {:.3f}%'.format(precision_score(y_test, rf_pred,average = 'micro')*100))
print('Recall: {:.3f}%'.format(recall_score(y_test, rf_pred,average = 'micro')*100))
print('F1-score: {:.3f}%\n'.format(f1_score(y_test, rf_pred,average = 'micro')*100))

print('\nClassification Report:\n')
print(classification_report(y_test, rf_pred, target_names=['1.0', '2.0', '3.0']))

# XGBoost Classifier

In [15]:
#Fitting XGBoost Classifier to the Training set
xgb = XGBClassifier()
xgb.fit(X_train, y_train)

#Predicting the Test set results
xgb_pred = xgb.predict(X_test)

#Acuracy on test data
print('Accuracy: {:.3f}%'.format(accuracy_score(y_test, xgb_pred)*100))

In [16]:
#Making the Confusion Matrix
xgb_cm = confusion_matrix(y_test, xgb_pred)
ConfusionMatrixDisplay(confusion_matrix = xgb_cm, display_labels = xgb.classes_).plot()
plt.show()

In [17]:
#Performance Evaluation
print('Precision: {:.3f}%'.format(precision_score(y_test, xgb_pred,average = 'micro')*100))
print('Recall: {:.3f}%'.format(recall_score(y_test, xgb_pred,average = 'micro')*100))
print('F1-score: {:.3f}%\n'.format(f1_score(y_test, xgb_pred,average = 'micro')*100))

print('\nClassification Report:\n')
print(classification_report(y_test, xgb_pred, target_names=['1.0', '2.0', '3.0']))