In [5]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np

# Create a sample dataset with 50 members
np.random.seed(42)

data = pd.DataFrame({
    'Gender': np.random.choice(['Male', 'Female'], 50),
    'Socioeconomic_Status': np.random.choice(['Low', 'Middle', 'High'], 50),
    'Age': np.random.randint(18, 70, 50),
    'Health_Status': np.random.choice([0, 1], 50),  # 1 for healthy, 0 for not healthy
    'Survived': np.random.choice([0, 1], 50)  # 1 for survived, 0 for not survived
})

# Preview the data
print("Sample Data:\n", data.head())

# Data Preprocessing
label_enc = LabelEncoder()
data['Gender'] = label_enc.fit_transform(data['Gender'])
data['Socioeconomic_Status'] = label_enc.fit_transform(data['Socioeconomic_Status'])

# Separate features and target variable
X = data.drop(columns=['Survived'])  # Features
y = data['Survived']  # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model Training
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Feature Importance
importances = model.feature_importances_
features = X.columns
importance_df = pd.DataFrame({'Feature': features, 'Importance': importances}).sort_values(by='Importance', ascending=False)
print("Feature Importance:\n", importance_df)

Sample Data:
    Gender Socioeconomic_Status  Age  Health_Status  Survived
0    Male                 High   33              0         0
1  Female               Middle   62              0         1
2    Male                 High   35              0         1
3    Male                 High   64              1         1
4    Male                  Low   41              0         1
Accuracy: 0.40
Classification Report:
               precision    recall  f1-score   support

           0       0.67      0.29      0.40         7
           1       0.29      0.67      0.40         3

    accuracy                           0.40        10
   macro avg       0.48      0.48      0.40        10
weighted avg       0.55      0.40      0.40        10

Confusion Matrix:
 [[2 5]
 [1 2]]
Feature Importance:
                 Feature  Importance
2                   Age    0.681778
1  Socioeconomic_Status    0.144879
3         Health_Status    0.107118
0                Gender    0.066226
