In [248]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [249]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import KFold, cross_validate, train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import time

In [250]:
np.random.seed(0)
data = pd.read_csv("../input/fetal-health-classification/fetal_health.csv")
data.head()

In [251]:
data.shape

In [252]:
data.describe()

In [253]:
data.info()

In [254]:
data.isnull().sum()

In [255]:
sns.set_theme(style="darkgrid")
sns.countplot(x="fetal_health", data=data)
data.fetal_health.value_counts()

In [256]:
corrs=data.corr()
plt.figure(figsize=(20,20))
sns.heatmap(corrs,annot=True,fmt='0.1f',center=0)

In [257]:
X=data.iloc[:,:-1]
y=data['fetal_health']

In [258]:
X.head()

In [259]:
y.head()

In [260]:
y.value_counts()

In [261]:
# Splitting into train and test
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 100)
X_trainc, X_testc, y_trainc, y_testc=X_train, X_test, y_train, y_test

scaler = StandardScaler()
scaler.fit(X_train)
X_train=scaler.transform(X_train)
X_test=scaler.transform(X_test)

print("size of X_train is: ", X_train.shape)
print("size of X_test is: ", X_test.shape)
print("size of y_train is: ", y_train.shape)
print("size of y_test is: ", y_test.shape)

In [262]:
# Training a linear support vector machine and results for training data set Model_c_01
Model_c_01 = svm.SVC(kernel='linear',C=0.01,random_state=42)
Model_c_01.fit(X_train, y_train)
y_train_pred = Model_c_01.predict(X_train)
print('Training Data :classification_report: \n',classification_report(y_train, y_train_pred))
print('Training Data :confusion_matrix: \n',confusion_matrix(y_train, y_train_pred))
print('\n Training Data :accuracy_score: ',accuracy_score(y_train, y_train_pred))

In [263]:
# fig = plt.figure(1, figsize=(20,20))
# plt.cla()
# pca = decomposition.PCA(n_components=2)
# pca.fit(X_test)
# X_PLT=pca.transform(X_test)

# Y_PLT = y_test
# plt.scatter(X_PLT[:, 0], X_PLT[:, 1], c=Y_PLT, cmap=plt.cm.nipy_spectral,marker='o',linewidths=20)

# plt.show()

In [264]:
pca = PCA(n_components=2)
pca_result = pca.fit_transform(X_train)
print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))

plt.figure(figsize=(15,15))
sns.scatterplot(pca_result[:,0],pca_result[:,1],c=y_train,cmap=plt.cm.nipy_spectral,palette=sns.color_palette("hls", 90),legend="full",alpha=1,marker='o',linewidths=30)

In [265]:
time_start = time.time()
tsne = TSNE(n_components=2, verbose=1, perplexity=200, n_iter=300)
tsne_results = tsne.fit_transform(df)

plt.figure(figsize=(16,10))
sns.scatterplot(tsne_results[:,0],tsne_results[:,1],c=y_test,legend="full",alpha=1,cmap=plt.cm.nipy_spectral)

In [266]:
# Predicting for the testing set for Model_c_01
y_test_pred = Model_c_01.predict(X_test)
print('C=0.01 classification_report: \n',classification_report(y_test, y_test_pred))
print('C=0.01 confusion_matrix: \n',confusion_matrix(y_test, y_test_pred))
print('\nC=0.01 accuracy_score: ',accuracy_score(y_test, y_test_pred))

In [267]:
pca_result = pca.fit_transform(X_test)
print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))

tsne = TSNE(n_components=2, verbose=1, perplexity=200, n_iter=300)
tsne_results = tsne.fit_transform(pca_result)

plt.figure(figsize=(16,10))
sns.scatterplot(tsne_results[:,0],tsne_results[:,1],c=y_test_pred,legend="full",alpha=1,cmap=plt.cm.nipy_spectral)

In [268]:
# Training a linear support vector machine and results for training data set Model_c_1
Model_c_1 = svm.SVC(kernel='linear',C=1,random_state=42)
Model_c_1.fit(X_train, y_train)
y_train_pred = Model_c_1.predict(X_train)
y_test_pred = Model_c_1.predict(X_test)
print('C=1 classification_report: \n',classification_report(y_test, y_test_pred))
print('C=1 confusion_matrix: \n',confusion_matrix(y_test, y_test_pred))
print('\n C=1 accuracy_score: ',accuracy_score(y_test, y_test_pred))

In [269]:
plt.figure(figsize=(16,10))
sns.scatterplot(tsne_results[:,0],tsne_results[:,1],c=y_test_pred,legend="full",alpha=1,cmap=plt.cm.nipy_spectral)

In [270]:
# Training a linear support vector machine and results for training data set Model_c_10
Model_c_10 = svm.SVC(kernel='linear',C=10,random_state=42)
Model_c_10.fit(X_train, y_train)
y_train_pred = Model_c_10.predict(X_train)
y_test_pred = Model_c_10.predict(X_test)
print('C=10 classification_report: \n',classification_report(y_test, y_test_pred))
print('C=10 confusion_matrix: \n',confusion_matrix(y_test, y_test_pred))
print('\nC=10 accuracy_score: ',accuracy_score(y_test, y_test_pred))

In [271]:
plt.figure(figsize=(16,10))
sns.scatterplot(tsne_results[:,0],tsne_results[:,1],c=y_test_pred,legend="full",alpha=1,cmap=plt.cm.nipy_spectral)

In [272]:
# Training a linear support vector machine and results for training data set Model_c_100
Model_c_100 = svm.SVC(kernel='linear',C=100,random_state=42)
Model_c_100.fit(X_train, y_train)
y_train_pred = Model_c_100.predict(X_train)
y_test_pred = Model_c_100.predict(X_test)
print('C=10 classification_report: \n',classification_report(y_test, y_test_pred))
print('C=10 confusion_matrix: \n',confusion_matrix(y_test, y_test_pred))
print('\nC=10 accuracy_score: ',accuracy_score(y_test, y_test_pred))

In [273]:
plt.figure(figsize=(16,10))
sns.scatterplot(tsne_results[:,0],tsne_results[:,1],c=y_test_pred,legend="full",alpha=1,cmap=plt.cm.nipy_spectral)

In [274]:
# Training a linear support vector machine and results for training data set Model_c_1 poly
Model_c_1p = svm.SVC(kernel='poly',C=1,random_state=42)
Model_c_1p.fit(X_train, y_train)
y_train_pred = Model_c_1p.predict(X_train)
y_test_pred = Model_c_1p.predict(X_test)
print('poly C=1 classification_report: \n',classification_report(y_test, y_test_pred))
print('ploy C=1 confusion_matrix: \n',confusion_matrix(y_test, y_test_pred))
print('\n poly C=1 accuracy_score: ',accuracy_score(y_test, y_test_pred))

In [275]:
plt.figure(figsize=(16,10))
sns.scatterplot(tsne_results[:,0],tsne_results[:,1],c=y_test_pred,legend="full",alpha=1,cmap=plt.cm.nipy_spectral)

In [276]:
# Training a linear support vector machine and results for training data set Model_c_1 rbf
Model_c_1r = svm.SVC(kernel='rbf',C=1,random_state=42)
Model_c_1r.fit(X_train, y_train)
y_train_pred = Model_c_1r.predict(X_train)
y_test_pred = Model_c_1r.predict(X_test)
print('rbf C=1 classification_report: \n',classification_report(y_test, y_test_pred))
print('rbf C=1 confusion_matrix: \n',confusion_matrix(y_test, y_test_pred))
print('\n rbf C=1 accuracy_score: ',accuracy_score(y_test, y_test_pred))

In [277]:
plt.figure(figsize=(16,10))
sns.scatterplot(tsne_results[:,0],tsne_results[:,1],c=y_test_pred,legend="full",alpha=1,cmap=plt.cm.nipy_spectral)

In [278]:
# Training a linear support vector machine and results for training data set Model_c_1 sigmoid
Model_c_1s = svm.SVC(kernel='sigmoid',C=1,random_state=42)
Model_c_1s.fit(X_train, y_train)
y_train_pred = Model_c_1s.predict(X_train)
y_test_pred = Model_c_1s.predict(X_test)
print('sigmoid C=1 classification_report: \n',classification_report(y_test, y_test_pred))
print('sigmoid C=1 confusion_matrix: \n',confusion_matrix(y_test, y_test_pred))
print('\n rbsigmoid C=1 accuracy_score: ',accuracy_score(y_test, y_test_pred))

In [279]:
plt.figure(figsize=(16,10))
sns.scatterplot(tsne_results[:,0],tsne_results[:,1],c=y_test_pred,legend="full",alpha=1,cmap=plt.cm.nipy_spectral)