In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

# Load the dataset
df = pd.read_csv("final_refined_medical_data.csv")

# Encoding categorical variables
label_encoders = {}
categorical_columns = ['Fever', 'Cough', 'Fatigue', 'Difficulty Breathing', 'Gender', 'Blood Pressure', 'Cholesterol Level', 'Sugar Level', 'Blood Pressure Level']

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target variables
disease_encoder = LabelEncoder()
drug_encoder = LabelEncoder()
dosage_encoder = LabelEncoder()
treatment_encoder = LabelEncoder()

df['Disease'] = disease_encoder.fit_transform(df['Disease'])
df['Drug Dosage'] = dosage_encoder.fit_transform(df['Drug Dosage'])
df['Personalized Treatment'] = treatment_encoder.fit_transform(df['Personalized Treatment'])
df['Drug Names'] = drug_encoder.fit_transform(df['Drug Names'])

# Define features and targets
X = df[categorical_columns + ['Age']]
y_disease = df['Disease']
y_drug = df['Drug Names']
y_dosage = df['Drug Dosage']
y_treatment = df['Personalized Treatment']

# Split dataset
X_train, X_test, y_disease_train, y_disease_test = train_test_split(X, y_disease, test_size=0.2, random_state=42)
X_train, X_test, y_drug_train, y_drug_test = train_test_split(X, y_drug, test_size=0.2, random_state=42)
X_train, X_test, y_dosage_train, y_dosage_test = train_test_split(X, y_dosage, test_size=0.2, random_state=42)
X_train, X_test, y_treatment_train, y_treatment_test = train_test_split(X, y_treatment, test_size=0.2, random_state=42)

# Train models
model_disease = RandomForestClassifier(n_estimators=100, random_state=42)
model_drug = RandomForestClassifier(n_estimators=100, random_state=42)
model_dosage = RandomForestClassifier(n_estimators=100, random_state=42)
model_treatment = RandomForestClassifier(n_estimators=100, random_state=42)

model_disease.fit(X_train, y_disease_train)
model_drug.fit(X_train, y_drug_train)
model_dosage.fit(X_train, y_dosage_train)
model_treatment.fit(X_train, y_treatment_train)

# Predictions
y_disease_pred = model_disease.predict(X_test)
y_drug_pred = model_drug.predict(X_test)
y_dosage_pred = model_dosage.predict(X_test)
y_treatment_pred = model_treatment.predict(X_test)

# Evaluate accuracy
accuracy_disease = accuracy_score(y_disease_test, y_disease_pred)
accuracy_drug = accuracy_score(y_drug_test, y_drug_pred)
accuracy_dosage = accuracy_score(y_dosage_test, y_dosage_pred)
accuracy_treatment = accuracy_score(y_treatment_test, y_treatment_pred)

print(f"Disease Prediction Accuracy: {accuracy_disease:.2f}")
print(f"Drug Recommendation Accuracy: {accuracy_drug:.2f}")
print(f"Dosage Recommendation Accuracy: {accuracy_dosage:.2f}")
print(f"Treatment Recommendation Accuracy: {accuracy_treatment:.2f}")

# # Save models and encoders
# joblib.dump(model_disease, "/mnt/data/disease_prediction_model.pkl")
# joblib.dump(model_drug, "/mnt/data/drug_prediction_model.pkl")
# joblib.dump(model_dosage, "/mnt/data/dosage_prediction_model.pkl")
# joblib.dump(model_treatment, "/mnt/data/treatment_prediction_model.pkl")
# joblib.dump(disease_encoder, "/mnt/data/disease_encoder.pkl")
# joblib.dump(drug_encoder, "/mnt/data/drug_encoder.pkl")
# joblib.dump(dosage_encoder, "/mnt/data/dosage_encoder.pkl")
# joblib.dump(treatment_encoder, "/mnt/data/treatment_encoder.pkl")
# joblib.dump(label_encoders, "/mnt/data/label_encoders.pkl")

# Sample input with original values
def encode_input(sample):
    for col, encoder in label_encoders.items():
        sample[col] = encoder.transform(sample[col])
    return sample

sample_input = pd.DataFrame({
    'Fever': ['No'],  # Yes/No
    'Cough': ['Yes'],  # Yes/No
    'Fatigue': ['No'],  # Yes/No
    'Difficulty Breathing': ['Yes'],  # Yes/No
    'Gender': ['Female'],  # Male/Female
    'Blood Pressure': ['High'],  # Encoded level
    'Cholesterol Level': ['High'],  # Encoded level
    'Sugar Level': ['Normal'],  # Encoded level
    'Blood Pressure Level': ['Low'],  # Encoded level
    'Age': [10]  # Age in years
})

sample_input = encode_input(sample_input)

# Make predictions
predicted_disease = model_disease.predict(sample_input)[0]
predicted_drug = model_drug.predict(sample_input)[0]
predicted_dosage = model_dosage.predict(sample_input)[0]
predicted_treatment = model_treatment.predict(sample_input)[0]

# Decode predictions
predicted_disease = disease_encoder.inverse_transform([predicted_disease])[0]
predicted_drug = drug_encoder.inverse_transform([predicted_drug])[0]
predicted_dosage = dosage_encoder.inverse_transform([predicted_dosage])[0]
predicted_treatment = treatment_encoder.inverse_transform([predicted_treatment])[0]

print(f"Predicted Disease: {predicted_disease}")
print(f"Recommended Drug: {predicted_drug}")
print(f"Recommended Dosage: {predicted_dosage}")
print(f"Recommended Treatment: {predicted_treatment}")


Disease Prediction Accuracy: 0.09
Drug Recommendation Accuracy: 0.84
Dosage Recommendation Accuracy: 0.85
Treatment Recommendation Accuracy: 0.85
Predicted Disease: Rheumatoid Arthritis
Recommended Drug: Amlodipine, Lisinopril
Recommended Dosage: Consult Doctor
Recommended Treatment: Consult specialist for treatment


In [21]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import joblib

# Load the dataset
df = pd.read_csv("final_refined_medical_data.csv")

# Encoding categorical variables
label_encoders = {}
categorical_columns = ['Fever', 'Cough', 'Fatigue', 'Difficulty Breathing', 'Gender', 'Blood Pressure', 'Cholesterol Level', 'Sugar Level', 'Blood Pressure Level']

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target variables
disease_encoder = LabelEncoder()
drug_encoder = LabelEncoder()
dosage_encoder = LabelEncoder()
treatment_encoder = LabelEncoder()

df['Disease'] = disease_encoder.fit_transform(df['Disease'])
df['Drug Dosage'] = dosage_encoder.fit_transform(df['Drug Dosage'])
df['Personalized Treatment'] = treatment_encoder.fit_transform(df['Personalized Treatment'])
df['Drug Names'] = drug_encoder.fit_transform(df['Drug Names'])

# Define features and targets
X = df[categorical_columns + ['Age']]
y_disease = df['Disease']
y_drug = df['Drug Names']
y_dosage = df['Drug Dosage']
y_treatment = df['Personalized Treatment']

# Split dataset
X_train, X_test, y_disease_train, y_disease_test = train_test_split(X, y_disease, test_size=0.2, random_state=42, stratify=y_disease)
X_train, X_test, y_drug_train, y_drug_test = train_test_split(X, y_drug, test_size=0.2, random_state=42)
X_train, X_test, y_dosage_train, y_dosage_test = train_test_split(X, y_dosage, test_size=0.2, random_state=42)
X_train, X_test, y_treatment_train, y_treatment_test = train_test_split(X, y_treatment, test_size=0.2, random_state=42)

# Train models
model_disease = XGBClassifier(n_estimators=100, random_state=42, use_label_encoder=False)
model_drug = XGBClassifier(n_estimators=100, random_state=42, use_label_encoder=False)
model_dosage = XGBClassifier(n_estimators=100, random_state=42, use_label_encoder=False)
model_treatment = XGBClassifier(n_estimators=100, random_state=42, use_label_encoder=False)

model_disease.fit(X_train, y_disease_train)
model_drug.fit(X_train, y_drug_train)
model_dosage.fit(X_train, y_dosage_train)
model_treatment.fit(X_train, y_treatment_train)

# Predictions
y_disease_pred = model_disease.predict(X_test)
y_drug_pred = model_drug.predict(X_test)
y_dosage_pred = model_dosage.predict(X_test)
y_treatment_pred = model_treatment.predict(X_test)

# Evaluate accuracy
accuracy_disease = accuracy_score(y_disease_test, y_disease_pred)
accuracy_drug = accuracy_score(y_drug_test, y_drug_pred)
accuracy_dosage = accuracy_score(y_dosage_test, y_dosage_pred)
accuracy_treatment = accuracy_score(y_treatment_test, y_treatment_pred)

print(f"Disease Prediction Accuracy: {accuracy_disease:.2f}")
print(f"Drug Recommendation Accuracy: {accuracy_drug:.2f}")
print(f"Dosage Recommendation Accuracy: {accuracy_dosage:.2f}")
print(f"Treatment Recommendation Accuracy: {accuracy_treatment:.2f}")

# # Save models and encoders
# joblib.dump(model_disease, "/mnt/data/disease_prediction_model.pkl")
# joblib.dump(model_drug, "/mnt/data/drug_prediction_model.pkl")
# joblib.dump(model_dosage, "/mnt/data/dosage_prediction_model.pkl")
# joblib.dump(model_treatment, "/mnt/data/treatment_prediction_model.pkl")
# joblib.dump(disease_encoder, "/mnt/data/disease_encoder.pkl")
# joblib.dump(drug_encoder, "/mnt/data/drug_encoder.pkl")
# joblib.dump(dosage_encoder, "/mnt/data/dosage_encoder.pkl")
# joblib.dump(treatment_encoder, "/mnt/data/treatment_encoder.pkl")
# joblib.dump(label_encoders, "/mnt/data/label_encoders.pkl")

# Sample input with original values
def encode_input(sample):
    for col, encoder in label_encoders.items():
        sample[col] = encoder.transform(sample[col])
    return sample

sample_input = pd.DataFrame({
    'Fever': ['No'],  # Yes/No
    'Cough': ['Yes'],  # Yes/No
    'Fatigue': ['No'],  # Yes/No
    'Difficulty Breathing': ['Yes'],  # Yes/No
    'Gender': ['Female'],  # Male/Female
    'Blood Pressure': ['High'],  # Encoded level
    'Cholesterol Level': ['High'],  # Encoded level
    'Sugar Level': ['Normal'],  # Encoded level
    'Blood Pressure Level': ['Low'],  # Encoded level
    'Age': [10]  # Age in years
})

sample_input = encode_input(sample_input)

# Make predictions
predicted_disease = model_disease.predict(sample_input)[0]
predicted_drug = model_drug.predict(sample_input)[0]
predicted_dosage = model_dosage.predict(sample_input)[0]
predicted_treatment = model_treatment.predict(sample_input)[0]

# Decode predictions
predicted_disease = disease_encoder.inverse_transform([predicted_disease])[0]
predicted_drug = drug_encoder.inverse_transform([predicted_drug])[0]
predicted_dosage = dosage_encoder.inverse_transform([predicted_dosage])[0]
predicted_treatment = treatment_encoder.inverse_transform([predicted_treatment])[0]

print(f"Predicted Disease: {predicted_disease}")
print(f"Recommended Drug: {predicted_drug}")
print(f"Recommended Dosage: {predicted_dosage}")
print(f"Recommended Treatment: {predicted_treatment}")


ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.

In [23]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
import joblib

# Load the dataset
df = pd.read_csv("final_refined_medical_data.csv")

# Encoding categorical variables
label_encoders = {}
categorical_columns = ['Fever', 'Cough', 'Fatigue', 'Difficulty Breathing', 'Gender', 'Blood Pressure', 'Cholesterol Level', 'Sugar Level', 'Blood Pressure Level']

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target variables
disease_encoder = LabelEncoder()
drug_encoder = LabelEncoder()
dosage_encoder = LabelEncoder()
treatment_encoder = LabelEncoder()

df['Disease'] = disease_encoder.fit_transform(df['Disease'])
df['Drug Dosage'] = dosage_encoder.fit_transform(df['Drug Dosage'])
df['Personalized Treatment'] = treatment_encoder.fit_transform(df['Personalized Treatment'])
df['Drug Names'] = drug_encoder.fit_transform(df['Drug Names'])

# Define features and targets
X = df[categorical_columns + ['Age']]
y_disease = df['Disease']
y_drug = df['Drug Names']
y_dosage = df['Drug Dosage']
y_treatment = df['Personalized Treatment']

# Split dataset
X_train, X_test, y_disease_train, y_disease_test = train_test_split(X, y_disease, test_size=0.2, random_state=42)
X_train, X_test, y_drug_train, y_drug_test = train_test_split(X, y_drug, test_size=0.2, random_state=42)
X_train, X_test, y_dosage_train, y_dosage_test = train_test_split(X, y_dosage, test_size=0.2, random_state=42)
X_train, X_test, y_treatment_train, y_treatment_test = train_test_split(X, y_treatment, test_size=0.2, random_state=42)

# Train models
model_disease = GradientBoostingClassifier(n_estimators=100, random_state=42)
model_drug = GradientBoostingClassifier(n_estimators=100, random_state=42)
model_dosage = GradientBoostingClassifier(n_estimators=100, random_state=42)
model_treatment = GradientBoostingClassifier(n_estimators=100, random_state=42)

model_disease.fit(X_train, y_disease_train)
model_drug.fit(X_train, y_drug_train)
model_dosage.fit(X_train, y_dosage_train)
model_treatment.fit(X_train, y_treatment_train)

# Predictions
y_disease_pred = model_disease.predict(X_test)
y_drug_pred = model_drug.predict(X_test)
y_dosage_pred = model_dosage.predict(X_test)
y_treatment_pred = model_treatment.predict(X_test)

# Evaluate accuracy
accuracy_disease = accuracy_score(y_disease_test, y_disease_pred)
accuracy_drug = accuracy_score(y_drug_test, y_drug_pred)
accuracy_dosage = accuracy_score(y_dosage_test, y_dosage_pred)
accuracy_treatment = accuracy_score(y_treatment_test, y_treatment_pred)

print(f"Disease Prediction Accuracy: {accuracy_disease:.2f}")
print(f"Drug Recommendation Accuracy: {accuracy_drug:.2f}")
print(f"Dosage Recommendation Accuracy: {accuracy_dosage:.2f}")
print(f"Treatment Recommendation Accuracy: {accuracy_treatment:.2f}")

# # Save models and encoders
# joblib.dump(model_disease, "/mnt/data/disease_prediction_model.pkl")
# joblib.dump(model_drug, "/mnt/data/drug_prediction_model.pkl")
# joblib.dump(model_dosage, "/mnt/data/dosage_prediction_model.pkl")
# joblib.dump(model_treatment, "/mnt/data/treatment_prediction_model.pkl")
# joblib.dump(disease_encoder, "/mnt/data/disease_encoder.pkl")
# joblib.dump(drug_encoder, "/mnt/data/drug_encoder.pkl")
# joblib.dump(dosage_encoder, "/mnt/data/dosage_encoder.pkl")
# joblib.dump(treatment_encoder, "/mnt/data/treatment_encoder.pkl")
# joblib.dump(label_encoders, "/mnt/data/label_encoders.pkl")

# Sample input with original values
def encode_input(sample):
    for col, encoder in label_encoders.items():
        sample[col] = encoder.transform(sample[col])
    return sample

sample_input = pd.DataFrame({
    'Fever': ['No'],  # Yes/No
    'Cough': ['Yes'],  # Yes/No
    'Fatigue': ['No'],  # Yes/No
    'Difficulty Breathing': ['Yes'],  # Yes/No
    'Gender': ['Female'],  # Male/Female
    'Blood Pressure': ['High'],  # Encoded level
    'Cholesterol Level': ['High'],  # Encoded level
    'Sugar Level': ['Normal'],  # Encoded level
    'Blood Pressure Level': ['Low'],  # Encoded level
    'Age': [10]  # Age in years
})

sample_input = encode_input(sample_input)

# Make predictions
predicted_disease = model_disease.predict(sample_input)[0]
predicted_drug = model_drug.predict(sample_input)[0]
predicted_dosage = model_dosage.predict(sample_input)[0]
predicted_treatment = model_treatment.predict(sample_input)[0]

# Decode predictions
predicted_disease = disease_encoder.inverse_transform([predicted_disease])[0]
predicted_drug = drug_encoder.inverse_transform([predicted_drug])[0]
predicted_dosage = dosage_encoder.inverse_transform([predicted_dosage])[0]
predicted_treatment = treatment_encoder.inverse_transform([predicted_treatment])[0]

print(f"Predicted Disease: {predicted_disease}")
print(f"Recommended Drug: {predicted_drug}")
print(f"Recommended Dosage: {predicted_dosage}")
print(f"Recommended Treatment: {predicted_treatment}")


Disease Prediction Accuracy: 0.03
Drug Recommendation Accuracy: 0.79
Dosage Recommendation Accuracy: 0.79
Treatment Recommendation Accuracy: 0.81
Predicted Disease: Rheumatoid Arthritis
Recommended Drug: Salbutamol, Corticosteroids
Recommended Dosage: 400 mcg Salbutamol
Recommended Treatment: Consult specialist for treatment


In [47]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
import joblib

# Load the dataset
df = pd.read_csv("final_refined_medical_data.csv")

# Encoding categorical variables
label_encoders = {}
categorical_columns = ['Fever', 'Cough', 'Fatigue', 'Difficulty Breathing', 'Gender', 'Blood Pressure', 'Cholesterol Level', 'Sugar Level', 'Blood Pressure Level']

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target variables
drug_encoder = LabelEncoder()
dosage_encoder = LabelEncoder()
treatment_encoder = LabelEncoder()

df['Drug Dosage'] = dosage_encoder.fit_transform(df['Drug Dosage'])
df['Personalized Treatment'] = treatment_encoder.fit_transform(df['Personalized Treatment'])
df['Drug Names'] = drug_encoder.fit_transform(df['Drug Names'])

# Define features and targets
X = df[categorical_columns + ['Age']]
y_drug = df['Drug Names']
y_dosage = df['Drug Dosage']
y_treatment = df['Personalized Treatment']

# Split dataset
X_train, X_test, y_drug_train, y_drug_test = train_test_split(X, y_drug, test_size=0.2, random_state=42)
X_train, X_test, y_dosage_train, y_dosage_test = train_test_split(X, y_dosage, test_size=0.2, random_state=42)
X_train, X_test, y_treatment_train, y_treatment_test = train_test_split(X, y_treatment, test_size=0.2, random_state=42)

# Train models
model_drug = GradientBoostingClassifier(n_estimators=100, random_state=42)
model_dosage = GradientBoostingClassifier(n_estimators=100, random_state=42)
model_treatment = GradientBoostingClassifier(n_estimators=100, random_state=42)

model_drug.fit(X_train, y_drug_train)
model_dosage.fit(X_train, y_dosage_train)
model_treatment.fit(X_train, y_treatment_train)

# Predictions
y_drug_pred = model_drug.predict(X_test)
y_dosage_pred = model_dosage.predict(X_test)
y_treatment_pred = model_treatment.predict(X_test)

# Evaluate accuracy
accuracy_drug = accuracy_score(y_drug_test, y_drug_pred)
accuracy_dosage = accuracy_score(y_dosage_test, y_dosage_pred)
accuracy_treatment = accuracy_score(y_treatment_test, y_treatment_pred)

print(f"Drug Recommendation Accuracy: {accuracy_drug:.2f}")
print(f"Dosage Recommendation Accuracy: {accuracy_dosage:.2f}")
print(f"Treatment Recommendation Accuracy: {accuracy_treatment:.2f}")

# # Save models and encoders
# joblib.dump(model_drug, "/mnt/data/drug_prediction_model.pkl")
# joblib.dump(model_dosage, "/mnt/data/dosage_prediction_model.pkl")
# joblib.dump(model_treatment, "/mnt/data/treatment_prediction_model.pkl")
# joblib.dump(drug_encoder, "/mnt/data/drug_encoder.pkl")
# joblib.dump(dosage_encoder, "/mnt/data/dosage_encoder.pkl")
# joblib.dump(treatment_encoder, "/mnt/data/treatment_encoder.pkl")
# joblib.dump(label_encoders, "/mnt/data/label_encoders.pkl")

# Sample input with original values
def encode_input(sample):
    for col, encoder in label_encoders.items():
        sample[col] = encoder.transform(sample[col])
    return sample

sample_input = pd.DataFrame({
    'Fever': ['Yes'],  # Yes/No
    'Cough': ['No'],  # Yes/No
    'Fatigue': ['No'],  # Yes/No
    'Difficulty Breathing': ['Yes'],  # Yes/No
    'Gender': ['Male'],  # Male/Female
    'Blood Pressure': ['High'],  # Encoded level
    'Cholesterol Level': ['Normal'],  # Encoded level
    'Sugar Level': ['High'],  # Encoded level
    'Blood Pressure Level': ['High'],  # Encoded level
    # 'Disease': ['Anxiety Disorders'],
    'Age': [28]  # Age in years
    
})

sample_input = encode_input(sample_input)

# Make predictions
predicted_drug = model_drug.predict(sample_input)[0]
predicted_dosage = model_dosage.predict(sample_input)[0]
predicted_treatment = model_treatment.predict(sample_input)[0]

# Decode predictions
predicted_drug = drug_encoder.inverse_transform([predicted_drug])[0]
predicted_dosage = dosage_encoder.inverse_transform([predicted_dosage])[0]
predicted_treatment = treatment_encoder.inverse_transform([predicted_treatment])[0]

print(f"Recommended Drug: {predicted_drug}")
print(f"Recommended Dosage: {predicted_dosage}")
print(f"Recommended Treatment: {predicted_treatment}")


Drug Recommendation Accuracy: 0.79
Dosage Recommendation Accuracy: 0.79
Treatment Recommendation Accuracy: 0.81
Recommended Drug: Salbutamol, Corticosteroids
Recommended Dosage: 400 mcg Salbutamol
Recommended Treatment: Salbutamol inhaler 2 puffs PRN, Inhaled corticosteroids (Budesonide 200 mcg BID), Avoid triggers


In [49]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import joblib
import xgboost as xgb

# Load the dataset
df = pd.read_csv("final_refined_medical_data.csv")

# Encoding categorical variables
label_encoders = {}
categorical_columns = ['Fever', 'Cough', 'Fatigue', 'Difficulty Breathing', 'Gender', 'Blood Pressure', 'Cholesterol Level', 'Sugar Level', 'Blood Pressure Level']

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target variables
drug_encoder = LabelEncoder()
dosage_encoder = LabelEncoder()
treatment_encoder = LabelEncoder()

df['Drug Dosage'] = dosage_encoder.fit_transform(df['Drug Dosage'])
df['Personalized Treatment'] = treatment_encoder.fit_transform(df['Personalized Treatment'])
df['Drug Names'] = drug_encoder.fit_transform(df['Drug Names'])

# Define features and targets
X = df[categorical_columns + ['Age']]
y_drug = df['Drug Names']
y_dosage = df['Drug Dosage']
y_treatment = df['Personalized Treatment']

# Split dataset
X_train, X_test, y_drug_train, y_drug_test = train_test_split(X, y_drug, test_size=0.2, random_state=42)
X_train, X_test, y_dosage_train, y_dosage_test = train_test_split(X, y_dosage, test_size=0.2, random_state=42)
X_train, X_test, y_treatment_train, y_treatment_test = train_test_split(X, y_treatment, test_size=0.2, random_state=42)

# Train models
model_drug = xgb.XGBClassifier(n_estimators=100, random_state=42)
model_dosage = xgb.XGBClassifier(n_estimators=100, random_state=42)
model_treatment = xgb.XGBClassifier(n_estimators=100, random_state=42)

model_drug.fit(X_train, y_drug_train)
model_dosage.fit(X_train, y_dosage_train)
model_treatment.fit(X_train, y_treatment_train)

# Predictions
y_drug_pred = model_drug.predict(X_test)
y_dosage_pred = model_dosage.predict(X_test)
y_treatment_pred = model_treatment.predict(X_test)

# Evaluate accuracy
accuracy_drug = accuracy_score(y_drug_test, y_drug_pred)
accuracy_dosage = accuracy_score(y_dosage_test, y_dosage_pred)
accuracy_treatment = accuracy_score(y_treatment_test, y_treatment_pred)

print(f"Drug Recommendation Accuracy: {accuracy_drug:.2f}")
print(f"Dosage Recommendation Accuracy: {accuracy_dosage:.2f}")
print(f"Treatment Recommendation Accuracy: {accuracy_treatment:.2f}")

# # Save models and encoders
# joblib.dump(model_drug, "/mnt/data/drug_prediction_model.pkl")
# joblib.dump(model_dosage, "/mnt/data/dosage_prediction_model.pkl")
# joblib.dump(model_treatment, "/mnt/data/treatment_prediction_model.pkl")
# joblib.dump(drug_encoder, "/mnt/data/drug_encoder.pkl")
# joblib.dump(dosage_encoder, "/mnt/data/dosage_encoder.pkl")
# joblib.dump(treatment_encoder, "/mnt/data/treatment_encoder.pkl")
# joblib.dump(label_encoders, "/mnt/data/label_encoders.pkl")

# Sample input with original values
def encode_input(sample):
    for col, encoder in label_encoders.items():
        sample[col] = encoder.transform(sample[col])
    return sample

sample_input = pd.DataFrame({
    'Fever': ['Yes'],  # Yes/No
    'Cough': ['No'],  # Yes/No
    'Fatigue': ['No'],  # Yes/No
    'Difficulty Breathing': ['Yes'],  # Yes/No
    'Gender': ['Male'],  # Male/Female
    'Blood Pressure': ['High'],  # Encoded level
    'Cholesterol Level': ['Normal'],  # Encoded level
    'Sugar Level': ['High'],  # Encoded level
    'Blood Pressure Level': ['High'],  # Encoded level
    # 'Disease': ['Anxiety Disorders'],
    'Age': [28]  # Age in years
})

sample_input = encode_input(sample_input)

# Make predictions
predicted_drug = model_drug.predict(sample_input)[0]
predicted_dosage = model_dosage.predict(sample_input)[0]
predicted_treatment = model_treatment.predict(sample_input)[0]

# Decode predictions
predicted_drug = drug_encoder.inverse_transform([predicted_drug])[0]
predicted_dosage = dosage_encoder.inverse_transform([predicted_dosage])[0]
predicted_treatment = treatment_encoder.inverse_transform([predicted_treatment])[0]

print(f"Recommended Drug: {predicted_drug}")
print(f"Recommended Dosage: {predicted_dosage}")
print(f"Recommended Treatment: {predicted_treatment}")


Drug Recommendation Accuracy: 0.79
Dosage Recommendation Accuracy: 0.84
Treatment Recommendation Accuracy: 0.85
Recommended Drug: Salbutamol, Corticosteroids
Recommended Dosage: 400 mcg Salbutamol
Recommended Treatment: Salbutamol inhaler 2 puffs PRN, Inhaled corticosteroids (Budesonide 200 mcg BID), Avoid triggers


In [51]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import joblib

# Load the dataset
df = pd.read_csv("final_refined_medical_data.csv")

# Encoding categorical variables
label_encoders = {}
categorical_columns = ['Fever', 'Cough', 'Fatigue', 'Difficulty Breathing', 'Gender', 'Blood Pressure', 'Cholesterol Level', 'Sugar Level', 'Blood Pressure Level']

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target variables
drug_encoder = LabelEncoder()
dosage_encoder = LabelEncoder()
treatment_encoder = LabelEncoder()

df['Drug Dosage'] = dosage_encoder.fit_transform(df['Drug Dosage'])
df['Personalized Treatment'] = treatment_encoder.fit_transform(df['Personalized Treatment'])
df['Drug Names'] = drug_encoder.fit_transform(df['Drug Names'])

# Define features and targets
X = df[categorical_columns + ['Age']]
y_drug = df['Drug Names']
y_dosage = df['Drug Dosage']
y_treatment = df['Personalized Treatment']

# Split dataset
X_train, X_test, y_drug_train, y_drug_test = train_test_split(X, y_drug, test_size=0.2, random_state=42)
X_train, X_test, y_dosage_train, y_dosage_test = train_test_split(X, y_dosage, test_size=0.2, random_state=42)
X_train, X_test, y_treatment_train, y_treatment_test = train_test_split(X, y_treatment, test_size=0.2, random_state=42)

# Train models
model_drug = SVC(probability=True, random_state=42)
model_dosage = SVC(probability=True, random_state=42)
model_treatment = SVC(probability=True, random_state=42)

model_drug.fit(X_train, y_drug_train)
model_dosage.fit(X_train, y_dosage_train)
model_treatment.fit(X_train, y_treatment_train)

# Predictions
y_drug_pred = model_drug.predict(X_test)
y_dosage_pred = model_dosage.predict(X_test)
y_treatment_pred = model_treatment.predict(X_test)

# Evaluate accuracy
accuracy_drug = accuracy_score(y_drug_test, y_drug_pred)
accuracy_dosage = accuracy_score(y_dosage_test, y_dosage_pred)
accuracy_treatment = accuracy_score(y_treatment_test, y_treatment_pred)

print(f"Drug Recommendation Accuracy: {accuracy_drug:.2f}")
print(f"Dosage Recommendation Accuracy: {accuracy_dosage:.2f}")
print(f"Treatment Recommendation Accuracy: {accuracy_treatment:.2f}")

# # Save models and encoders
# joblib.dump(model_drug, "/mnt/data/drug_prediction_model.pkl")
# joblib.dump(model_dosage, "/mnt/data/dosage_prediction_model.pkl")
# joblib.dump(model_treatment, "/mnt/data/treatment_prediction_model.pkl")
# joblib.dump(drug_encoder, "/mnt/data/drug_encoder.pkl")
# joblib.dump(dosage_encoder, "/mnt/data/dosage_encoder.pkl")
# joblib.dump(treatment_encoder, "/mnt/data/treatment_encoder.pkl")
# joblib.dump(label_encoders, "/mnt/data/label_encoders.pkl")

# Sample input with original values
def encode_input(sample):
    for col, encoder in label_encoders.items():
        sample[col] = encoder.transform(sample[col])
    return sample

sample_input = pd.DataFrame({
    'Fever': ['Yes'],  # Yes/No
    'Cough': ['No'],  # Yes/No
    'Fatigue': ['No'],  # Yes/No
    'Difficulty Breathing': ['Yes'],  # Yes/No
    'Gender': ['Male'],  # Male/Female
    'Blood Pressure': ['High'],  # Encoded level
    'Cholesterol Level': ['Normal'],  # Encoded level
    'Sugar Level': ['High'],  # Encoded level
    'Blood Pressure Level': ['High'],  # Encoded level
    'Age': [28]  # Age in years
})

sample_input = encode_input(sample_input)

# Make predictions
predicted_drug = model_drug.predict(sample_input)[0]
predicted_dosage = model_dosage.predict(sample_input)[0]
predicted_treatment = model_treatment.predict(sample_input)[0]

# Decode predictions
predicted_drug = drug_encoder.inverse_transform([predicted_drug])[0]
predicted_dosage = dosage_encoder.inverse_transform([predicted_dosage])[0]
predicted_treatment = treatment_encoder.inverse_transform([predicted_treatment])[0]

print(f"Recommended Drug: {predicted_drug}")
print(f"Recommended Dosage: {predicted_dosage}")
print(f"Recommended Treatment: {predicted_treatment}")


Drug Recommendation Accuracy: 0.33
Dosage Recommendation Accuracy: 0.81
Treatment Recommendation Accuracy: 0.81
Recommended Drug: Amlodipine, Lisinopril
Recommended Dosage: Consult Doctor
Recommended Treatment: Consult specialist for treatment
