In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.impute import SimpleImputer

# Step 1: Load Datasets and Preprocess Data
hospital_1 = pd.read_csv('/content/drive/MyDrive/Dataset correction(categorical to numerical)/HOSPITAL_1_transformed_with_target.csv')
hospital_2 = pd.read_csv('/content/drive/MyDrive/Dataset correction(categorical to numerical)/HOSPITAL_2_transformed_with_target.csv')

# Separate features and target for both hospitals
X_1 = hospital_1.drop(columns=['dialysis_encoded'])
y_1 = hospital_1['dialysis_encoded']

X_2 = hospital_2.drop(columns=['dialysis_encoded'])
y_2 = hospital_2['dialysis_encoded']

# Step 2: Handle Missing Data using SimpleImputer (mean imputation)
imputer = SimpleImputer(strategy='mean')
X_1_imputed = imputer.fit_transform(X_1)
X_2_imputed = imputer.fit_transform(X_2)

# Step 3: Train Decision Tree Models for Both Hospitals
model_1 = DecisionTreeClassifier(random_state=42, max_depth=5, min_samples_split=10, min_samples_leaf=5)
model_2 = DecisionTreeClassifier(random_state=42, max_depth=5, min_samples_split=10, min_samples_leaf=5)

# Train the models on their respective datasets
model_1.fit(X_1_imputed, y_1)
model_2.fit(X_2_imputed, y_2)

# Step 4: Evaluate Accuracy for Each Model on the Respective Dataset
# Predict using the respective models on their training data (or test data if available)
y_pred_1 = model_1.predict(X_1_imputed)
y_pred_2 = model_2.predict(X_2_imputed)

# Calculate accuracy and confusion matrix for both models
accuracy_1 = accuracy_score(y_1, y_pred_1)
conf_matrix_1 = confusion_matrix(y_1, y_pred_1)

accuracy_2 = accuracy_score(y_2, y_pred_2)
conf_matrix_2 = confusion_matrix(y_2, y_pred_2)

print(f"Accuracy for Hospital 1 Model: {accuracy_1}")
print(f"Confusion Matrix for Hospital 1 Model: \n{conf_matrix_1}")

print(f"Accuracy for Hospital 2 Model: {accuracy_2}")
print(f"Confusion Matrix for Hospital 2 Model: \n{conf_matrix_2}")

# Step 5: Federated Learning Concept (Combining the Models)
# Using the same test set for simplicity (for example, X_1_imputed)
y_pred_federated_1 = model_1.predict(X_1_imputed)
y_pred_federated_2 = model_2.predict(X_1_imputed)

# Combine predictions and calculate accuracy for the central (federated) model
y_pred_federated = (y_pred_federated_1 + y_pred_federated_2) / 2
y_pred_federated = y_pred_federated.round().astype(int)  # Round to nearest integer (0 or 1)

# Evaluate the central (federated) model
accuracy_federated = accuracy_score(y_1, y_pred_federated)
conf_matrix_federated = confusion_matrix(y_1, y_pred_federated)

print(f"Federated Model Accuracy: {accuracy_federated}")
print(f"Confusion Matrix for Federated Model: \n{conf_matrix_federated}")
