Connected to Python 3.13.2

In [None]:
# pcos_model_training.py

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Step 1: Load the dataset
df = pd.read_csv("PCOS_data.csv")  # Replace with your actual CSV filename

# Optional: Drop irrelevant columns (if present)
df = df.drop(columns=['Sl. No', 'Patient File No.'], errors='ignore')

# Step 2: Handle missing values (basic cleaning)
df = df.dropna()

# Step 3: Encode categorical features (Y/N → 1/0)
binary_columns = ['Hair growth(Y/N)', 'Skin darkening (Y/N)', 'Fast food (Y/N)',
                  'Pimples(Y/N)', 'Weight gain(Y/N)', 'Cycle(R/I)', 'Exercise(Y/N)']

label_enc = LabelEncoder()
for col in binary_columns:
    if col in df.columns:
        df[col] = label_enc.fit_transform(df[col])

# Step 4: Select features (X) and target (y)
features = ['Age (yrs)', 'Weight (Kg)', 'BMI', 'Cycle length(days)',
            'Hair growth(Y/N)', 'Skin darkening (Y/N)', 'Fast food (Y/N)', 
            'Pimples(Y/N)', 'Exercise(Y/N)']

target = 'PCOS (Y/N)'  # 1 = PCOS, 0 = No PCOS

X = df[features]
y = df[target]

# Step 5: Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Train the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 7: Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"\n✅ Model trained successfully!")
print(f"🔍 Accuracy: {accuracy * 100:.2f}%")
print("📊 Classification Report:")
print(classification_report(y_test, y_pred))

# Step 8: Save the trained model
joblib.dump(model, "pcos_rf_model.pkl")
print("💾 Model saved as 'pcos_rf_model.pkl'")

ModuleNotFoundError: No module named 'pandas'

In [None]:
# pcos_model_training.py

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Step 1: Load the dataset
df = pd.read_csv("PCOS_data.csv")  # Replace with your actual CSV filename

# Optional: Drop irrelevant columns (if present)
df = df.drop(columns=['Sl. No', 'Patient File No.'], errors='ignore')

# Step 2: Handle missing values (basic cleaning)
df = df.dropna()

# Step 3: Encode categorical features (Y/N → 1/0)
binary_columns = ['Hair growth(Y/N)', 'Skin darkening (Y/N)', 'Fast food (Y/N)',
                  'Pimples(Y/N)', 'Weight gain(Y/N)', 'Cycle(R/I)', 'Exercise(Y/N)']

label_enc = LabelEncoder()
for col in binary_columns:
    if col in df.columns:
        df[col] = label_enc.fit_transform(df[col])

# Step 4: Select features (X) and target (y)
features = ['Age (yrs)', 'Weight (Kg)', 'BMI', 'Cycle length(days)',
            'Hair growth(Y/N)', 'Skin darkening (Y/N)', 'Fast food (Y/N)', 
            'Pimples(Y/N)', 'Exercise(Y/N)']

target = 'PCOS (Y/N)'  # 1 = PCOS, 0 = No PCOS

X = df[features]
y = df[target]

# Step 5: Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Train the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 7: Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"\n✅ Model trained successfully!")
print(f"🔍 Accuracy: {accuracy * 100:.2f}%")
print("📊 Classification Report:")
print(classification_report(y_test, y_pred))

# Step 8: Save the trained model
joblib.dump(model, "pcos_rf_model.pkl")
print("💾 Model saved as 'pcos_rf_model.pkl'")

ModuleNotFoundError: No module named 'pandas'

In [None]:
# pcos_model_training_debugged.py

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Step 1: Load dataset
try:
    df = pd.read_csv("PCOS_data.csv")  # Make sure this file is in the same directory
    print("✅ Dataset loaded successfully.")
except FileNotFoundError:
    print("❌ Error: 'PCOS_data.csv' not found. Please check the file path.")
    exit()

# Step 2: Drop irrelevant columns if they exist
df.drop(columns=['Sl. No', 'Patient File No.'], inplace=True, errors='ignore')

# Step 3: Clean data - drop rows with missing values
df.dropna(inplace=True)
print(f"📊 Remaining data rows after dropping NA: {len(df)}")

# Step 4: Encode binary (Y/N) columns if present
binary_columns = ['Hair growth(Y/N)', 'Skin darkening (Y/N)', 'Fast food (Y/N)',
                  'Pimples(Y/N)', 'Weight gain(Y/N)', 'Cycle(R/I)', 'Exercise(Y/N)']

label_enc = LabelEncoder()
for col in binary_columns:
    if col in df.columns:
        df[col] = label_enc.fit_transform(df[col])
        print(f"🔁 Encoded column: {col}")
    else:
        print(f"⚠️ Column missing (skipped): {col}")

# Step 5: Select features and label
# Check for missing features
features =

SyntaxError: invalid syntax (<ipython-input-3-cadaf417d267>, line 39)

No kernel connected