<a href="https://colab.research.google.com/github/tripti369/Personlized-healthcare/blob/main/Personalized_Healthcare_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import joblib # For saving the trained model

In [5]:
df = pd.read_csv('/blood.csv')
df.columns = ['Recency_Months', 'Frequency_Total', 'Monetary_Total_CC', 'Time_Months', 'Donated_Next_Period']
print(f"Dataset 'blood.csv' loaded with {len(df)} samples.")
df['Donated_Next_Period'] = df['Donated_Next_Period'].apply(lambda x: 1 if x > 0.5 else 0)

Dataset 'blood.csv' loaded with 748 samples.


In [6]:
df.head()

Unnamed: 0,Recency_Months,Frequency_Total,Monetary_Total_CC,Time_Months,Donated_Next_Period
0,2,50,12500,99,1
1,0,13,3250,28,1
2,1,17,4000,36,1
3,2,20,5000,45,1
4,1,24,6000,77,0


In [7]:
print("\n--- Initial Data Exploration ---")
print(df.head())
print("\nTarget Distribution ('Donated_Next_Period'):")
print(df['Donated_Next_Period'].value_counts(normalize=True))


--- Initial Data Exploration ---
   Recency_Months  Frequency_Total  Monetary_Total_CC  Time_Months  \
0               2               50              12500           99   
1               0               13               3250           28   
2               1               17               4000           36   
3               2               20               5000           45   
4               1               24               6000           77   

   Donated_Next_Period  
0                    1  
1                    1  
2                    1  
3                    1  
4                    0  

Target Distribution ('Donated_Next_Period'):
Donated_Next_Period
0    0.762032
1    0.237968
Name: proportion, dtype: float64


In [8]:
X = df.drop('Donated_Next_Period', axis=1)
y = df['Donated_Next_Period']

In [9]:
numerical_features = X.columns.tolist()
preprocessor = StandardScaler()
print("Data preprocessing defined (StandardScaler).")


Data preprocessing defined (StandardScaler).


In [10]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"\nTraining set size: {len(X_train)}")
print(f"Testing set size: {len(X_test)}")



Training set size: 598
Testing set size: 150


In [11]:
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced'))
])

# Train the model
print("\nTraining Random Forest Classifier...")
model_pipeline.fit(X_train, y_train)
print("Model training complete.")




Training Random Forest Classifier...
Model training complete.


In [12]:

y_pred = model_pipeline.predict(X_test)
y_proba = model_pipeline.predict_proba(X_test)[:, 1] # Probability of Class 1 (Donated)

# Evaluation Metrics
print("\n--- Model Evaluation ---")
print(f"ROC AUC Score: {roc_auc_score(y_test, y_proba):.4f}")
print("Confusion Matrix:")

# Rows are True Labels, Columns are Predicted Labels
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report (Key Internship Deliverable):")
# Precision, Recall, F1-Score for each class
print(classification_report(y_test, y_pred, target_names=['0: Will NOT Donate', '1: Will Donate']))



--- Model Evaluation ---
ROC AUC Score: 0.7141
Confusion Matrix:
[[90 24]
 [19 17]]

Classification Report (Key Internship Deliverable):
                    precision    recall  f1-score   support

0: Will NOT Donate       0.83      0.79      0.81       114
    1: Will Donate       0.41      0.47      0.44        36

          accuracy                           0.71       150
         macro avg       0.62      0.63      0.62       150
      weighted avg       0.73      0.71      0.72       150



In [13]:
RECOMMENDATION_MAP = {
    0: 'Standard Outreach: Send a general email reminder next quarter.',
    1: 'Personalized Follow-up: High probability of donation. Send an urgent, personalized invitation.'
}

def generate_personalized_recommendation(donor_data: pd.DataFrame, model: Pipeline) -> str:
    try:
        # Predict the recommendation class (0 or 1)
        prediction_class = model.predict(donor_data)[0]
        prediction_proba = model.predict_proba(donor_data)[0][1] # Probability of donating

        # Get the corresponding recommendation text
        recommendation = RECOMMENDATION_MAP.get(prediction_class, "Error: Unknown class.")

        # Explanation based on key metrics for professional touch
        recency = donor_data['Recency_Months'].iloc[0]
        frequency = donor_data['Frequency_Total'].iloc[0]
        monetary = donor_data['Monetary_Total_CC'].iloc[0]

        explanation = (
            f" (Predicted Probability: {prediction_proba*100:.1f}%).\n"
            f"   - Metrics: Last Donated {recency} months ago, Total Donations: {frequency}, Total Volume: {monetary} CC."
        )

        return f"Recommendation: {recommendation}{explanation}"

    except Exception as e:
        return f"An error occurred during prediction: {e}"


In [14]:
 #Example Donor 1: Low Risk (Long Recency, Low Frequency) -> Prediction 0 likely
donor_low_engagement = pd.DataFrame({
    'Recency_Months': [30],   # Last donation 30 months ago
    'Frequency_Total': [5],   # Only 5 donations
    'Monetary_Total_CC': [1250],
    'Time_Months': [70]
}, index=[0])

In [15]:
# Example Donor 2: High Risk (Short Recency, High Frequency) -> Prediction 1 likely
donor_high_engagement = pd.DataFrame({
    'Recency_Months': [2],    # Last donation 2 months ago
    'Frequency_Total': [50],  # Frequent donor
    'Monetary_Total_CC': [12500],
    'Time_Months': [99]
}, index=[0])

In [16]:
print("\n--- Personalized Recommendation Generation ---")
print(f"Donor 1 (Low Engagement): {generate_personalized_recommendation(donor_low_engagement, model_pipeline)}")
print("-" * 50)
print(f"Donor 2 (High Engagement): {generate_personalized_recommendation(donor_high_engagement, model_pipeline)}")




--- Personalized Recommendation Generation ---
Donor 1 (Low Engagement): Recommendation: Standard Outreach: Send a general email reminder next quarter. (Predicted Probability: 26.0%).
   - Metrics: Last Donated 30 months ago, Total Donations: 5, Total Volume: 1250 CC.
--------------------------------------------------
Donor 2 (High Engagement): Recommendation: Personalized Follow-up: High probability of donation. Send an urgent, personalized invitation. (Predicted Probability: 80.0%).
   - Metrics: Last Donated 2 months ago, Total Donations: 50, Total Volume: 12500 CC.


In [17]:


# Save the trained model pipeline for later use/deployment
MODEL_FILENAME = 'personalized_healthcare_model.pkl'
joblib.dump(model_pipeline, MODEL_FILENAME)
print(f"\nModel saved successfully to {MODEL_FILENAME}. This is crucial for deployment.")


Model saved successfully to personalized_healthcare_model.pkl. This is crucial for deployment.
