# Polycystic Ovary Syndrome (PCOS) detection using Machine Learning

## Support Vector Machine (SVM)

In [18]:
# importing the necessary libraries

import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [19]:
# mounting to google drive folder

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [20]:
# load dataset

df = pd.read_csv("/content/drive/My Drive/Colab Notebooks/PCOS/Datasets/PCOS_Selected_Features.csv")

In [21]:
# selecting the features

features = ['Cycle(R/I)', 'Weight gain(Y/N)', 'hair growth(Y/N)', 'Skin darkening (Y/N)', 'Follicle No. (L)', 'Follicle No. (R)']

In [22]:
# defining the target variable

target = 'PCOS (Y/N)'

In [23]:
# splitting the data into train, validation, and test sets

X_train_val, X_test, y_train_val, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, random_state=42)

In [24]:
# scaling the data

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [25]:
# training the SVM model

svm_model = SVC(kernel='rbf', gamma='auto', random_state=42)
svm_model.fit(X_train_scaled, y_train)

In [26]:
# making predictions on the validation set

y_val_pred = svm_model.predict(X_val_scaled)

In [27]:
# evaluating the model's performance on the validation set

val_accuracy = accuracy_score(y_val, y_val_pred)
val_precision = precision_score(y_val, y_val_pred, pos_label=1)
val_recall = recall_score(y_val, y_val_pred, pos_label=1)
val_f1 = f1_score(y_val, y_val_pred, pos_label=1)

In [28]:
# printing the results of validation data

print('Validation Set Results:')
print('Accuracy:', val_accuracy * 100)
print('Precision:', val_precision)
print('Recall:', val_recall)
print('F1-score:', val_f1)

Validation Set Results:
Accuracy: 88.88888888888889
Precision: 0.8571428571428571
Recall: 0.8108108108108109
F1-score: 0.8333333333333334


In [29]:
# making predictions on the test set

y_test_pred = svm_model.predict(X_test_scaled)

In [30]:
# evaluating the model's performance on the test set

test_accuracy = accuracy_score(y_test, y_test_pred)
test_precision = precision_score(y_test, y_test_pred, pos_label=1)
test_recall = recall_score(y_test, y_test_pred, pos_label=1)
test_f1 = f1_score(y_test, y_test_pred, pos_label=1)

In [31]:
# printing the results of test data

print('Test Set Results:')
print('Accuracy:', test_accuracy * 100)
print('Precision:', test_precision)
print('Recall:', test_recall)
print('F1-score:', test_f1)

Test Set Results:
Accuracy: 85.18518518518519
Precision: 0.7407407407407407
Recall: 0.6896551724137931
F1-score: 0.7142857142857143


In [32]:
from joblib import dump

# save the trained model to disk

dump(svm_model, 'svm_model.pkl')

['svm_model.pkl']