In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
file_path = r"C:\Users\seshu\Desktop\kiran ml\medical_conditions_dataset.csv"
data = pd.read_csv(file_path)

# Drop unnecessary columns
data = data.drop(columns=['id', 'full_name'])

# Handle missing values
# Age and BMI will be filled with the median, while blood_pressure and glucose_levels with their mean
imputer_median = SimpleImputer(strategy='median')
imputer_mean = SimpleImputer(strategy='mean')

data['age'] = imputer_median.fit_transform(data[['age']])
data['bmi'] = imputer_median.fit_transform(data[['bmi']])
data['blood_pressure'] = imputer_mean.fit_transform(data[['blood_pressure']])
data['glucose_levels'] = imputer_mean.fit_transform(data[['glucose_levels']])

# Encode categorical features
label_encoder = LabelEncoder()
data['gender'] = label_encoder.fit_transform(data['gender'])
data['smoking_status'] = label_encoder.fit_transform(data['smoking_status'])
data['condition'] = label_encoder.fit_transform(data['condition'])

# Split the dataset into features (X) and target (y)
X = data.drop(columns=['condition'])
y = data['condition']

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.51

Classification Report:
               precision    recall  f1-score   support

           0       0.15      0.06      0.09       298
           1       0.60      0.76      0.67      1217
           2       0.22      0.15      0.18       485

    accuracy                           0.51      2000
   macro avg       0.33      0.33      0.31      2000
weighted avg       0.44      0.51      0.47      2000



Accuracy: 0.51

Classification Report:
               precision    recall  f1-score   support

           0       0.15      0.06      0.09       298
           1       0.60      0.76      0.67      1217
           2       0.22      0.15      0.18       485

    accuracy                           0.51      2000
   macro avg       0.33      0.33      0.31      2000
weighted avg       0.44      0.51      0.47      2000



In [5]:
import joblib

# Save the model to a file
model_file_path = r"C:\Users\seshu\Desktop\kiran ml\random_forest_model.joblib"
joblib.dump(clf, model_file_path)

print(f"Model saved to {model_file_path}")


Model saved to C:\Users\seshu\Desktop\kiran ml\random_forest_model.joblib
