In [1]:
import pandas as pd #dataframe
import os #os library
from sklearn.ensemble import RandomForestClassifier #random forest
from sklearn.preprocessing import LabelEncoder #convert text to digit
from sklearn.metrics import accuracy_score ,classification_report #acruacy of model,and classification
import joblib #saving loading models
import warnings #warning library

In [2]:
file_path = '/content/drive/MyDrive/AI/AI_Project/Training.csv' #Trainset

test_file_path = '/content/drive/MyDrive/AI/AI_Project/Training.csv' # Testset

print(f"Attempting to load training data from: {file_path}")
df_train = None

try:
    df_train = pd.read_csv(file_path)
    print("‚úÖ Training dataset loaded successfully!")

    if 'Unnamed: 133' in df_train.columns: #Remove unnamed 133 created by pd
        df_train = df_train.drop('Unnamed: 133', axis=1)
        print("Dropped 'Unnamed: 133' column.")

    print("\nFirst 5 rows of the training data:")
    print(df_train.head())

except FileNotFoundError:
    print(f"\n‚ùå Error: File not found at the specified path: '{file_path}'")
except Exception as e:
    print(f"\n‚ùå An unexpected error occurred while loading the training data: {e}")

if df_train is not None:
    print("\n---> df_train DataFrame is ready for preprocessing.")
else:
     print("\n---> df_train DataFrame could not be loaded.")

Attempting to load training data from: /content/drive/MyDrive/AI/AI_Project/Training.csv
‚úÖ Training dataset loaded successfully!
Dropped 'Unnamed: 133' column.

First 5 rows of the training data:
   itching  skin_rash  nodal_skin_eruptions  continuous_sneezing  shivering  \
0        1          1                     1                    0          0   
1        0          1                     1                    0          0   
2        1          0                     1                    0          0   
3        1          1                     0                    0          0   
4        1          1                     1                    0          0   

   chills  joint_pain  stomach_pain  acidity  ulcers_on_tongue  ...  \
0       0           0             0        0                 0  ...   
1       0           0             0        0                 0  ...   
2       0           0             0        0                 0  ...   
3       0           0             0        

In [3]:
warnings.filterwarnings("ignore", category=FutureWarning) #Ignores warning if due to updates

if 'df_train' not in globals() or df_train is None: #checks if trainset present
    print("‚ùå Error: 'df_train' not found. Please load your data first.")
else:
    print("Processing the loaded 'df_train' DataFrame...")

    print("Preprocessing data...")

    if df_train.isnull().sum().any(): #Remove missing values
        print("Warning: Missing values detected.")
        df_train.dropna(inplace=True)
        print("Dropped rows with missing values.")

    if 'prognosis' not in df_train.columns: #Check for disease column.
         print("‚ùå Error: 'prognosis' column not found in the dataset!")
    else:
        X = df_train.drop('prognosis', axis=1) # x=input columns

        y = df_train['prognosis'] # y=output column(prognosis)

        for col in X.columns:
             try:
                 X[col] = pd.to_numeric(X[col], errors='coerce').fillna(0).astype(int) #convert text to numeric error to NaN
             except Exception as e:
                 print(f"Warning: Could not convert column {col} to numeric. Error: {e}. Skipping.")

        le = LabelEncoder()
        y = le.fit_transform(y) #unique diseases numbers 123 etc
        print(f"Target variable ('prognosis') encoded into {len(le.classes_)} numeric classes.")

        rf_model = RandomForestClassifier(n_estimators=100, random_state=42) #random forest on entire dataset

        rf_model.fit(X, y) #Training
        print("‚úÖ Model training complete.")

        save_path_prefix = '/content/drive/MyDrive/AI/AI_Project/'

        os.makedirs(save_path_prefix, exist_ok=True) #create directory

        model_filename = os.path.join(save_path_prefix, 'symptom_predictor_model.pkl')
        encoder_filename = os.path.join(save_path_prefix, 'symptom_label_encoder.pkl')

        joblib.dump(rf_model, model_filename)
        joblib.dump(le, encoder_filename)

        print(f"‚úÖ Model saved as '{model_filename}'.")
        print(f"‚úÖ Label encoder saved as '{encoder_filename}'.")
        print(f"\nSaved at '{save_path_prefix}'.")

        print("\n--- Phase 1 Model Training Complete ---")

Processing the loaded 'df_train' DataFrame...
Preprocessing data...
Target variable ('prognosis') encoded into 41 numeric classes.
‚úÖ Model training complete.
‚úÖ Model saved as '/content/drive/MyDrive/AI/AI_Project/symptom_predictor_model.pkl'.
‚úÖ Label encoder saved as '/content/drive/MyDrive/AI/AI_Project/symptom_label_encoder.pkl'.

Saved at '/content/drive/MyDrive/AI/AI_Project/'.

--- Phase 1 Model Training Complete ---


In [4]:
TEST_FILE = '/content/drive/MyDrive/AI/AI_Project/Testing.csv'       # Path to your test data
MODEL_PATH = '/content/drive/MyDrive/AI/AI_Project/symptom_predictor_model.pkl' # Path to saved model
ENCODER_PATH = '/content/drive/MyDrive/AI/AI_Project/symptom_label_encoder.pkl' # Path to saved encoder

print("Loading resources...") # --- 2. Load Data, Model, and Encoder --
try:
    df_test = pd.read_csv(TEST_FILE)
    model = joblib.load(MODEL_PATH)
    encoder = joblib.load(ENCODER_PATH)
    print("‚úÖ Data, model, and encoder loaded.")
except FileNotFoundError as e:
    print(f"‚ùå Error loading file: {e}. Please check paths.")
    exit()
except Exception as e:
    print(f"‚ùå An unexpected error occurred during loading: {e}")
    exit()

print("Preprocessing test data...")
try:
    X_test = df_test.drop('prognosis', axis=1)
    y_test_text = df_test['prognosis']

    expected_features = model.feature_names_in_

    for col in expected_features:
        if col in X_test.columns:
            X_test[col] = pd.to_numeric(X_test[col], errors='coerce').fillna(0).astype(int) # Convert features to numeric (0/1)
        else:
            print(f"Warning: Expected feature '{col}' not in test data. Adding it as zeros.") # Add missing columns with 0s
            X_test[col] = 0

    X_test = X_test[expected_features] # Ensure column order matches the model's expectation

    extra_cols = set(X_test.columns) - set(expected_features) # Drop any extra columns
    if extra_cols:
        print(f"Warning: Dropping extra columns found only in test data: {extra_cols}")
        X_test = X_test.drop(columns=list(extra_cols))

    y_test_encoded = encoder.transform(y_test_text) # Encode the text labels using the loaded encoder
    print("‚úÖ Preprocessing complete.")

except KeyError as e:
    print(f"‚ùå Error during preprocessing: Missing expected column '{e}'. Check test data format.")
    exit()
except ValueError as e:
    print(f"‚ùå Error encoding test labels: {e}. Test set might have unknown disease names.")
    exit()
except Exception as e:
    print(f"‚ùå An unexpected error occurred during preprocessing: {e}")
    exit()

print("\nMaking predictions and evaluating...")
try:
    y_pred = model.predict(X_test) # Predict diseases using the loaded model

    accuracy = accuracy_score(y_test_encoded, y_pred)     # Calculate accuracy
    print(f"üìä Test Accuracy: {accuracy * 100:.2f}%")

    print("\nClassification Report (Test Set):")     # Display detailed report
    print(classification_report(y_test_encoded, y_pred, target_names=encoder.classes_, zero_division=0))
    print("\n--- Evaluation Complete ---")

except Exception as e:
    print(f"‚ùå An error occurred during prediction or evaluation: {e}")

Loading resources...
‚úÖ Data, model, and encoder loaded.
Preprocessing test data...
‚úÖ Preprocessing complete.

Making predictions and evaluating...
üìä Test Accuracy: 97.62%

Classification Report (Test Set):
                                         precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00         1
                                   AIDS       1.00      1.00      1.00         1
                                   Acne       1.00      1.00      1.00         1
                    Alcoholic hepatitis       1.00      1.00      1.00         1
                                Allergy       1.00      1.00      1.00         1
                              Arthritis       1.00      1.00      1.00         1
                       Bronchial Asthma       1.00      1.00      1.00         1
                   Cervical spondylosis       1.00      1.00      1.00         1
                            Chicken pox       1.00      1