# Polycystic Ovary Syndrome (PCOS) detection using Machine Learning

## Random Forest

In [30]:
# importing the necessary libraries

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

In [31]:
# mounting to google drive folder

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [32]:
# load dataset

df = pd.read_csv("/content/drive/My Drive/Colab Notebooks/PCOS/Datasets/PCOS_Selected_Features.csv")

In [33]:
# selecting the features

features = ['Cycle(R/I)', 'Weight gain(Y/N)', 'hair growth(Y/N)', 'Skin darkening (Y/N)', 'Follicle No. (L)', 'Follicle No. (R)']

In [34]:
# defining the target variable

target = 'PCOS (Y/N)'

In [35]:
# splitting the data into train, validation, and test sets

X_train_val, X_test, y_train_val, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, random_state=42)

In [36]:
# training the Random Forest model

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

In [37]:
# making predictions on the validation set

y_val_pred = rf_model.predict(X_val)

In [38]:
# evaluating the model's performance on the validation set

val_accuracy = accuracy_score(y_val, y_val_pred)
val_precision = precision_score(y_val, y_val_pred, pos_label=1)
val_recall = recall_score(y_val, y_val_pred, pos_label=1)
val_f1 = f1_score(y_val, y_val_pred, pos_label=1)

In [39]:
# printing the results of validation data

print('Validation Set Results:')
print('Accuracy:', val_accuracy * 100)
print('Precision:', val_precision)
print('Recall:', val_recall)
print('F1-score:', val_f1)

Validation Set Results:
Accuracy: 86.11111111111111
Precision: 0.84375
Recall: 0.7297297297297297
F1-score: 0.7826086956521738


In [40]:
# making predictions on the test set

y_test_pred = rf_model.predict(X_test)

In [41]:
# evaluating the model's performance on the test set

test_accuracy = accuracy_score(y_test, y_test_pred)
test_precision = precision_score(y_test, y_test_pred, pos_label=1)
test_recall = recall_score(y_test, y_test_pred, pos_label=1)
test_f1 = f1_score(y_test, y_test_pred, pos_label=1)

In [42]:
# printing the results of test data

print('Test Set Results:')
print('Accuracy:', round(test_accuracy, 2) * 100)
print('Precision:', test_precision)
print('Recall:', test_recall)
print('F1-score:', test_f1)

Test Set Results:
Accuracy: 87.0
Precision: 0.8
Recall: 0.6896551724137931
F1-score: 0.7407407407407408


In [46]:
from joblib import dump

# save the trained model to disk

dump(rf_model, 'random_forest_model.pkl')

['random_forest_model.pkl']