In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv('../input/fetal-health-classification/fetal_health.csv')

### Setting option to display max columns

In [None]:
pd.set_option('display.max_columns',None)

In [None]:
data.head()

### Finding any null values

In [None]:
data.isnull().sum()

### Checking datatypes of all columns

In [None]:
data.dtypes

### Splitting data into independent and target variables

In [None]:
X = data.iloc[:,:-1]
y = data.iloc[:,-1]

### Finding the correlation between all independent variables

In [None]:
sns.heatmap(X.corr())

### Checking the distribution of all the continuous features

In [None]:
for i in X.columns:
    sns.distplot(X[i],kde=False)
    plt.show()

### Performing feature scaling (Standard Scaler)

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scale = StandardScaler()
X = scale.fit_transform(X)
X = pd.DataFrame(X,columns=data.iloc[:,:-1].columns)

### Checking if the dataset is balanced or not

In [None]:
data.fetal_health.value_counts()

### Using oversampling technique for imbalanced dataset

In [None]:
from imblearn.over_sampling import RandomOverSampler as ROS

In [None]:
ros = ROS(random_state=42)

In [None]:
X_res, y_res = ros.fit_resample(X,y)

In [None]:
from collections import Counter
print('Resampled dataset shape %s' % Counter(y_res))

### Performing train test split

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_res,y_res,test_size=0.3)

### SVM with GridSearchCV

In [None]:
from sklearn.svm import SVC

In [None]:
model = SVC(decision_function_shape='ovo')

In [None]:
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
              {'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]
grid_search = GridSearchCV(estimator = model,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search = grid_search.fit(X_train, y_train)

In [None]:
accuracy = grid_search.best_score_
accuracy

In [None]:
grid_search.best_params_

In [None]:
model = SVC(C=1000,gamma=0.9,kernel='rbf',decision_function_shape='ovo',probability=True)

In [None]:
model.fit(X_train, y_train)

In [None]:
pred = model.predict(X_test)

### Finding accuracy score, auc score, f1 score for kernal SVM

In [None]:
from sklearn.metrics import confusion_matrix as cm

In [None]:
cm(y_test,pred)

In [None]:
from sklearn.metrics import accuracy_score as acc

In [None]:
acc(y_test,pred)

In [None]:
from sklearn.metrics import roc_auc_score, roc_curve, f1_score, auc

In [None]:
f1_score(y_test,pred,average='micro')

In [None]:
probs = model.predict_proba(X_test)

In [None]:
roc_auc_score(y_test,probs,multi_class='ovo')

### XGBoost with GridSearchCV

In [None]:
from xgboost import XGBClassifier

In [None]:
model = XGBClassifier()

In [None]:
from sklearn.model_selection import GridSearchCV
parameters = [{'max_depth':[3,6,9,12], 'learning_rate':[0.1],'n_estimators':[50,100,200]},
              {'max_depth':[3,6,9,12], 'learning_rate':[0.01],'n_estimators':[50,100,200]}]
grid_search = GridSearchCV(estimator = model,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search = grid_search.fit(X_train, y_train)

In [None]:
grid_search.best_params_

In [None]:
accuracy = grid_search.best_score_
accuracy

In [None]:
model = XGBClassifier(max_depth=12,n_estimators=200)
model.fit(X_train,y_train)
pred = model.predict(X_test)

### Calculating accuracy, f1-score and auc score for xgboost ensamble technique

In [None]:
from sklearn.metrics import roc_auc_score, roc_curve, f1_score, auc, confusion_matrix as cm, accuracy_score as acc
print('confusion\n',cm(y_test,pred))
print('\n')
print('accuracy',acc(y_test,pred))
print('\n')
print('f1',f1_score(y_test,pred,average='micro'))
print('\n')
probs = model.predict_proba(X_test)
print('auc',roc_auc_score(y_test,probs,multi_class='ovo'))
print('\n')