In [1]:
import pandas as pd

# Load the data
file_path = 'expanded_risk_medicine_data (1).csv'
data = pd.read_csv(file_path)

# Display the first few rows to understand the data structure
print(data.head())

  Medicine Name  Dosage     Form        Frequency      Duration  \
0       Aspirin   500mg   Tablet       Once daily        7 days   
1     Metformin  1000mg   Tablet      Twice daily       30 days   
2     Ibuprofen   200mg   Tablet  Every 4-6 hours     As needed   
3   Amoxicillin   500mg  Capsule      Twice daily       10 days   
4    Lisinopril    10mg   Tablet       Once daily  Indefinitely   

            High Risk            Low Risk  
0         Memory loss  Exercise regularly  
1  Frequent confusion  Exercise regularly  
2  Frequent confusion  Exercise regularly  
3   Cognitive decline  Exercise regularly  
4  Frequent confusion  Exercise regularly  


In [4]:

# Define evaluation functions
def calculate_precision(actual, recommended):
    true_positives = len(actual & recommended)
    total_predicted_positives = len(recommended)
    return true_positives / total_predicted_positives if total_predicted_positives > 0 else 0

def calculate_recall(actual, recommended):
    true_positives = len(actual & recommended)
    total_actual_positives = len(actual)
    return true_positives / total_actual_positives if total_actual_positives > 0 else 0

def calculate_f1_score(precision, recall):
    return 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

def calculate_accuracy(actual, recommended):
    correct_recommendations = len(actual & recommended)
    total_recommendations = len(actual | recommended)
    return correct_recommendations / total_recommendations if total_recommendations > 0 else 0

In [5]:
# Placeholder for actual and recommended medication sets
# These should be derived from your system's outputs and actual needs
actual_medications = {'Aspirin', 'Metformin'}  # Example set of actual medications needed
recommended_medications = {'Aspirin', 'Ibuprofen'}  # Example set of medications recommended by the system

# Calculate metrics
precision = calculate_precision(actual_medications, recommended_medications)
recall = calculate_recall(actual_medications, recommended_medications)
f1_score = calculate_f1_score(precision, recall)
accuracy = calculate_accuracy(actual_medications, recommended_medications)

# Print results
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1_score:.2f}")
print(f"Accuracy: {accuracy:.2f}")

Precision: 0.50
Recall: 0.50
F1-Score: 0.50
Accuracy: 0.33


In [6]:
import pandas as pd
import numpy as np
from joblib import dump, load
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, roc_auc_score, precision_score, recall_score, f1_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import VotingClassifier

In [7]:
# Load the dataset
file_path = 'Updated_Alzheimers_Data_Numeric.csv'
data = pd.read_csv(file_path)

In [8]:
# Define the confidence threshold
confidence_threshold = 50

In [9]:
# Create the target variable (binary classification)
data['Target'] = (data['High_Confidence_Limit'] > confidence_threshold).astype(int)

In [11]:
# Define features and target, including additional features for better model performance
features = data[['Topic_Numeric', 'Question_Numeric', 'Data_Value', 'Low_Confidence_Limit', 'High_Confidence_Limit']]
target = data['Target']

In [13]:
# Feature Engineering: Creating new feature as the difference in confidence limits
features['Confidence_Width'] = data['High_Confidence_Limit'] - data['Low_Confidence_Limit']
# Preprocessing for numeric columns (Standard Scaling)
numeric_features = ['Topic_Numeric', 'Question_Numeric', 'Data_Value', 'Low_Confidence_Limit', 'High_Confidence_Limit', 'Confidence_Width']
numeric_preprocessor = StandardScaler()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['Confidence_Width'] = data['High_Confidence_Limit'] - data['Low_Confidence_Limit']


In [14]:
# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('numeric', numeric_preprocessor, numeric_features)
    ]
)

# Create a neural network classifier with hyperparameter tuning using GridSearchCV
mlp = MLPClassifier(random_state=42)
param_grid = {
    'classifier__hidden_layer_sizes': [(50, 30), (100, 50), (100,)],
    'classifier__activation': ['tanh', 'relu'],
    'classifier__solver': ['adam', 'sgd'],
    'classifier__alpha': [0.0001, 0.001],
    'classifier__learning_rate': ['constant', 'adaptive']
}

# Create a pipeline with preprocessing and neural network classifier
neural_network_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', mlp)
])


In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Use GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(neural_network_pipeline, param_grid, cv=5, scoring='roc_auc', verbose=2)
grid_search.fit(X_train, y_train)

# Best parameters from grid search
best_params = grid_search.best_params_
print(f"Best Hyperparameters: {best_params}")

# Evaluate the model using the best estimator
best_model = grid_search.best_estimator_
y_pred_nn = best_model.predict(X_test)
y_pred_proba_nn = best_model.predict_proba(X_test)[:, 1]

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV] END classifier__activation=tanh, classifier__alpha=0.0001, classifier__hidden_layer_sizes=(50, 30), classifier__learning_rate=constant, classifier__solver=adam; total time=  19.0s
[CV] END classifier__activation=tanh, classifier__alpha=0.0001, classifier__hidden_layer_sizes=(50, 30), classifier__learning_rate=constant, classifier__solver=adam; total time=  27.2s
[CV] END classifier__activation=tanh, classifier__alpha=0.0001, classifier__hidden_layer_sizes=(50, 30), classifier__learning_rate=constant, classifier__solver=adam; total time=  29.0s
[CV] END classifier__activation=tanh, classifier__alpha=0.0001, classifier__hidden_layer_sizes=(50, 30), classifier__learning_rate=constant, classifier__solver=adam; total time=  25.1s
[CV] END classifier__activation=tanh, classifier__alpha=0.0001, classifier__hidden_layer_sizes=(50, 30), classifier__learning_rate=constant, classifier__solver=adam; total time=  15.0s
[CV] END clas

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Simulate some results
actual = ['High Risk', 'High Risk', 'High Risk', 'High Risk', 'Low Risk']
predicted = ['High Risk', 'High Risk', 'High Risk', 'High Risk', 'High Risk']

# Generate and display confusion matrix
cm = confusion_matrix(actual, predicted, labels=['High Risk', 'Low Risk'])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['High Risk', 'Low Risk'])
disp.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix for Medication Recommendation System')
plt.show()