In [31]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score

# Step 1: Load the datasets
merged_df_adjusted = pd.read_csv('merged_df_adjusted.csv')
admissions_df = pd.read_csv('admissions.csv')

# Step 2: Merge the datasets on 'subject_id' and 'hadm_id'
merged_data = pd.merge(merged_df_adjusted, admissions_df, on=['subject_id', 'hadm_id'], how='inner')

# Drop rows with missing values in 'discharge_location' and drop 'stay_id' column
merged_data_cleaned = merged_data.dropna(subset=['discharge_location'])
merged_data_cleaned = merged_data_cleaned.drop(columns=['stay_id'])

# Step 3: Preprocess the data
# Separate features and target
# Drop unnecessary columns (including 'subject_id' and 'hadm_id') before separating features and target
X = merged_data_cleaned.drop(columns=['discharge_location', 'subject_id', 'hadm_id'])  # Features
y = merged_data_cleaned['discharge_location']  # Target

# Identify categorical columns
categorical_columns = X.select_dtypes(include=['object']).columns

# Encode categorical columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col].astype(str))
    label_encoders[col] = le

# Encode target variable
target_encoder = LabelEncoder()
y = target_encoder.fit_transform(y)

# Identify numerical columns
numerical_columns = X.select_dtypes(include=['int64', 'float64']).columns

# Normalize numerical features
scaler = StandardScaler()
X[numerical_columns] = scaler.fit_transform(X[numerical_columns])

# Step 4: Split data into training (70%), validation (20%), and testing sets (10%)
# First, split into 70% training and 30% temporary (which will be split into validation and test sets)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)

# Then split the temporary set (30%) into validation (20%) and test (10%) sets
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=1/3, random_state=42)

# Display the sizes of each split to verify
print(f"Training set size: {X_train.shape[0]} samples")
print(f"Validation set size: {X_val.shape[0]} samples")
print(f"Testing set size: {X_test.shape[0]} samples")

# Step 5: Create and train the Multi-Layer Perceptron model using training data
mlp = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=300, activation='relu', solver='adam', random_state=34)
mlp.fit(X_train, y_train)

# Step 6: Make predictions on the validation and test sets
y_val_pred = mlp.predict(X_val)
y_test_pred = mlp.predict(X_test)

# Step 7: Evaluate the model on validation set
print("Validation Accuracy:", accuracy_score(y_val, y_val_pred))
print("\nValidation Classification Report:\n", classification_report(y_val, y_val_pred, labels=range(len(target_encoder.classes_)), target_names=target_encoder.classes_, zero_division=0))

# Evaluate the model on the test set
print("Test Accuracy:", accuracy_score(y_test, y_test_pred))
print("\nTest Classification Report:\n", classification_report(y_test, y_test_pred, labels=range(len(target_encoder.classes_)), target_names=target_encoder.classes_, zero_division=0))


  merged_data = pd.merge(merged_df_adjusted, admissions_df, on=['subject_id', 'hadm_id'], how='inner')


Training set size: 2865 samples
Validation set size: 819 samples
Testing set size: 410 samples
Validation Accuracy: 0.5738705738705738

Validation Classification Report:
                               precision    recall  f1-score   support

              ACUTE HOSPITAL       0.00      0.00      0.00         3
              AGAINST ADVICE       0.00      0.00      0.00         6
             ASSISTED LIVING       0.00      0.00      0.00         1
CHRONIC/LONG TERM ACUTE CARE       0.19      0.19      0.19        26
                        DIED       0.43      0.36      0.39        42
         HEALTHCARE FACILITY       0.00      0.00      0.00         1
                        HOME       0.71      0.87      0.78       432
            HOME HEALTH CARE       0.35      0.38      0.36       177
                     HOSPICE       0.00      0.00      0.00        11
              OTHER FACILITY       0.00      0.00      0.00         3
              PSYCH FACILITY       0.00      0.00      0.0