In [25]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report
import pandas as pd

In [26]:
df = pd.read_csv(r'C:\userspace\daya\python\samu_proj\pcos_preprocessed.csv')
X = df.drop(columns=['PCOS (Y/N)'])  # Features
y = df['PCOS (Y/N)'] 

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [28]:
# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [29]:
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),  # Increase max_iter
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Support Vector Machine': SVC(max_iter=5000),
    'LinearSVC': LinearSVC(max_iter=5000),  # Increase max_iter for LinearSVC
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Gaussian Naive Bayes': GaussianNB()
}

In [30]:
for name, model in models.items():
    if name == 'Logistic Regression':
        # Fit logistic regression model on scaled data
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    
    print(f"Classification Report for {name}:")
    print(classification_report(y_test, y_pred, zero_division=1))  # Set zero_division parameter to 1
    print("=" * 60)

Classification Report for Logistic Regression:
              precision    recall  f1-score   support

           0       0.88      0.94      0.91        77
           1       0.81      0.69      0.75        32

    accuracy                           0.86       109
   macro avg       0.85      0.81      0.83       109
weighted avg       0.86      0.86      0.86       109

Classification Report for Random Forest:
              precision    recall  f1-score   support

           0       0.86      0.92      0.89        77
           1       0.77      0.62      0.69        32

    accuracy                           0.83       109
   macro avg       0.81      0.77      0.79       109
weighted avg       0.83      0.83      0.83       109

Classification Report for Gradient Boosting:
              precision    recall  f1-score   support

           0       0.81      0.94      0.87        77
           1       0.75      0.47      0.58        32

    accuracy                           0.80      

