In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Load and preprocess the dataset
df = pd.read_csv("aiml.csv")
df = df.dropna()  # Drop rows with missing values

# Label Encoding for categorical features
label_encoders = {}
for col in ['Blood Group', 'Gender', 'Cancer Stage', 'Symptom 1', 'Symptom 2', 'Symptom 3', 'Symptom 4', 'Symptom 5', 'Symptom 6', 'Symptom 7', 'Symptom 8']:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col])

# Define features and target
symptom_columns = ['Symptom 1', 'Symptom 2', 'Symptom 3', 'Symptom 4', 'Symptom 5', 'Symptom 6', 'Symptom 7', 'Symptom 8']
x = df[['Blood Group', 'Gender'] + symptom_columns]
y = df['Cancer Stage']

# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Train the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(x_train, y_train)

# Evaluate model accuracy
accuracy = accuracy_score(y_test, model.predict(x_test))
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Prediction function with flexible symptom input
def predict_cancer_stage():
    try:
        # Get user input for blood group, gender, and symptoms
        user_name=input("enter your name:")
        blood_group = input("Enter Blood Group (A+, A-, B+, etc.): ")
        gender = input("Enter Gender (Male/Female): ")
        
        # Collect symptoms input
        symptoms = []
        print("Enter symptoms (type 'done' when finished):")
        for i in range(1, 9):
            symptom = input(f"Enter Symptom {i} (or type 'done' if you have no more symptoms): ")
            if symptom.lower() == 'done':
                break
            symptoms.append(symptom)
        
        # Encoding blood group and gender
        blood_group_encoded = label_encoders['Blood Group'].transform([blood_group])[0]
        gender_encoded = label_encoders['Gender'].transform([gender])[0]

        # Create a list for symptom encoding
        symptom_encoded = [0] * 8  # Initialize all symptoms as '0' (absent)
        
        # Encode symptoms provided by the user
        for symptom in symptoms:
            try:
                # Transform the entered symptom to its encoded form (if it exists in the training data)
                symptom_index = label_encoders[f'Symptom {symptoms.index(symptom) + 1}'].transform([symptom])[0]
                symptom_encoded[symptoms.index(symptom)] = symptom_index
            except ValueError:
                print(f"Warning: '{symptom}' is not a recognized symptom.")
                continue

        # Combine all inputs into a single list for prediction
        user_input = [blood_group_encoded, gender_encoded] + symptom_encoded
        predicted_stage = model.predict([user_input])[0]

        # Output the predicted cancer stage
        print(f"Predicted Cancer Stage: {label_encoders['Cancer Stage'].inverse_transform([predicted_stage])[0]}")
        
    except ValueError as e:
        print(f"Invalid input: {e}")

# Run the prediction function
predict_cancer_stage()

Model Accuracy: 32.52%
