<a href="https://colab.research.google.com/github/thomasbeck95/CTM/blob/main/CTM_hackathon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import numpy as np

# Define possible task types
task_types = [
    "prescription request", "patient communication (via phone, NHS app, AccuRX)",
    "sick notes", "referral letters", "medical reports", "review results"
]

# Define comorbidities
comorbidities = ["epilepsy", "historical heart attack / stroke", "diabetes", "mental health"]

# Priority mapping logic
def assign_priority(task, comorbidities, time_since_requested, critical_result):
    if task == "review results":
        if critical_result:
            return "red"
        return "amber"

    if task.startswith("patient communication"):
        return "red"

    if task == "prescription request":
        if comorbidities["epilepsy"]:
            return "red"
        if comorbidities["historical heart attack / stroke"]:
            return "amber"
        return "green"

    if task in ["sick notes", "referral letters", "medical reports"]:
        return "green"

    # Escalation over time
    if time_since_requested > 72:
        return "red"
    elif time_since_requested > 48:
        return "amber"

    return "green"

# Generate synthetic dataset
np.random.seed(42)
num_samples = 500

# Modify priority assignment to introduce uncertainty
def assign_priority_with_uncertainty(task, comorbidities, time_since_requested, critical_result):
    base_priority = assign_priority(task, comorbidities, time_since_requested, critical_result)

    # Introduce randomness to create edge cases
    if np.random.rand() < 0.1:  # 10% chance of assigning a random priority
        return np.random.choice(["red", "amber", "green"])

    # Add uncertainty based on time since requested
    if base_priority == "green" and time_since_requested > 60 and np.random.rand() < 0.3:
        return "amber"  # Some green tasks escalate faster than expected

    if base_priority == "amber" and time_since_requested < 24 and np.random.rand() < 0.3:
        return "green"  # Some amber tasks remain lower priority

    if base_priority == "red" and time_since_requested < 12 and np.random.rand() < 0.3:
        return "amber"  # Some red tasks might be slightly over-prioritized

    return base_priority

# Generate dataset with uncertainty
data_with_uncertainty = []
for _ in range(num_samples):
    task = np.random.choice(task_types)
    comorbidity_flags = {c: np.random.choice([0, 1], p=[0.85, 0.15]) for c in comorbidities}
    bmi = np.random.uniform(18, 40)
    time_since_requested = np.random.randint(1, 100)  # Hours
    estimated_duration = np.random.uniform(5, 120)  # Minutes

    critical_result = 0
    if task == "review results":
        critical_result = np.random.choice([0, 1], p=[0.8, 0.2])

    priority = assign_priority_with_uncertainty(task, comorbidity_flags, time_since_requested, critical_result)

    row = [task] + list(comorbidity_flags.values()) + [bmi, time_since_requested, estimated_duration, critical_result, priority]
    data_with_uncertainty.append(row)

# Create DataFrame
df_uncertain = pd.DataFrame(data_with_uncertainty, columns=columns)

# Show sample of dataset with uncertainty
df_uncertain.head()


Unnamed: 0,Task Type,epilepsy,historical heart attack / stroke,diabetes,mental health,BMI,Time since task requested,Estimated task duration,Critical result flag,Priority
0,referral letters,1,0,0,0,21.431879,75,57.813623,0,amber
1,sick notes,0,1,0,0,22.000149,21,76.010374,0,green
2,medical reports,0,0,0,0,24.427182,80,31.768704,0,amber
3,sick notes,0,1,0,1,32.966766,73,12.480933,0,green
4,"patient communication (via phone, NHS app, Acc...",0,0,0,0,33.031797,44,61.945345,0,amber


In [9]:
df

Unnamed: 0,epilepsy,historical heart attack / stroke,diabetes,mental health,BMI,Time since task requested,Estimated task duration,Critical result flag,Priority,"Task Type_patient communication (via phone, NHS app, AccuRX)",Task Type_prescription request,Task Type_referral letters,Task Type_review results,Task Type_sick notes
0,1,0,0,0,21.431879,75,57.813623,0,green,False,False,True,False,False
1,0,0,0,1,36.313738,38,5.089558,0,green,False,False,False,False,False
2,0,0,0,0,24.407041,59,50.984012,0,green,False,False,True,False,False
3,1,0,0,0,26.414164,64,58.677733,0,green,False,False,True,False,False
4,0,0,0,1,39.243905,9,6.836119,0,green,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,0,0,0,0,25.709092,49,37.385845,1,red,False,False,False,True,False
496,0,0,0,0,35.478578,99,21.359353,0,green,False,False,True,False,False
497,0,0,1,0,39.666606,88,47.838527,0,green,False,False,False,False,True
498,0,0,0,0,24.004900,14,56.609562,0,green,False,False,True,False,False


In [8]:
# Define additional features specific to prescription requests
medication_types = {
    "high": ["insulin", "anti-epileptics", "immunosuppressants"],
    "medium": ["blood pressure medication", "statins", "SSRI"],
    "low": ["vitamins", "emollients", "mild pain relief"],
}

request_types = {
    "high": ["out-of-hours gp", "pharmacist", "phone"],
    "medium": ["online", "NHS app"],
    "low": ["routine scheduled repeat"]
}

# Function to assign prescription-related features
def generate_prescription_features():
    med_risk = np.random.choice(["high", "medium", "low"], p=[0.2, 0.4, 0.4])
    medication = np.random.choice(medication_types[med_risk])
    days_until_out = np.random.randint(-3, 10)  # Can be negative if already run out
    polypharmacy = np.random.choice([0, 1], p=[0.8, 0.2])  # 20% chance of polypharmacy
    request_type_risk = np.random.choice(["high", "medium", "low"], p=[0.3, 0.5, 0.2])
    request_type = np.random.choice(request_types[request_type_risk])
    monitoring_required = np.random.choice([0, 1], p=[0.2, 0.8])  # 20% chance of needing monitoring
    return medication, days_until_out, polypharmacy, request_type, monitoring_required

# Updated priority function
def assign_priority_with_prescription_features(task, comorbidities, time_since_requested,
                                               critical_result, medication, days_until_out,
                                               polypharmacy, request_type, monitoring_required):
    if task == "review results":
        return "red" if critical_result else "amber"

    if task.startswith("patient communication"):
        return "red"

    if task == "prescription request":
        # Higher risk medication & emergency supply needed
        if days_until_out <= 1:
            return "red"
        if days_until_out <= 3:
            return "amber"

        # Medication risk level
        if medication in medication_types["high"]:
            return "red" if days_until_out <= 3 else "amber"
        if medication in medication_types["medium"]:
            return "amber" if days_until_out <= 3 else "green"

        # Polypharmacy and monitoring requirement influence
        if polypharmacy or monitoring_required:
            return "red" if days_until_out <= 3 else "amber"

        # Request type influence
        if request_type in request_types["high"]:
            return "red"
        if request_type in request_types["medium"]:
            return "amber"

        return "green"

    if task in ["sick notes", "referral letters", "medical reports"]:
        return "green"

    # Escalation over time
    if time_since_requested > 72:
        return "red"
    elif time_since_requested > 48:
        return "amber"

    return "green"

# Generate dataset with new features for prescription requests
data_expanded = []
for _ in range(num_samples):
    task = np.random.choice(task_types)
    comorbidity_flags = {c: np.random.choice([0, 1], p=[0.85, 0.15]) for c in comorbidities}
    bmi = np.random.uniform(18, 40)
    time_since_requested = np.random.randint(1, 100)  # Hours
    estimated_duration = np.random.uniform(5, 120)  # Minutes

    critical_result = 0
    medication = None
    days_until_out = None
    polypharmacy = None
    request_type = None
    monitoring_required = None

    if task == "review results":
        critical_result = np.random.choice([0, 1], p=[0.8, 0.2])

    if task == "prescription request":
        medication, days_until_out, polypharmacy, request_type, monitoring_required = generate_prescription_features()

    priority = assign_priority_with_prescription_features(task, comorbidity_flags, time_since_requested,
                                                          critical_result, medication, days_until_out,
                                                          polypharmacy, request_type, monitoring_required)

    row = [task] + list(comorbidity_flags.values()) + [bmi, time_since_requested, estimated_duration,
                                                       critical_result, medication, days_until_out,
                                                       polypharmacy, request_type, monitoring_required, priority]
    data_expanded.append(row)

# Create DataFrame with expanded features
columns_expanded = ["Task Type"] + comorbidities + ["BMI", "Time since task requested",
                   "Estimated task duration", "Critical result flag", "Medication",
                   "Days until out of supply", "Polypharmacy", "Request Type",
                   "Monitoring Required", "Priority"]

df_expanded = pd.DataFrame(data_expanded, columns=columns_expanded)

# Show sample of updated dataset
df_expanded.head()


Unnamed: 0,Task Type,epilepsy,historical heart attack / stroke,diabetes,mental health,BMI,Time since task requested,Estimated task duration,Critical result flag,Medication,Days until out of supply,Polypharmacy,Request Type,Monitoring Required,Priority
0,medical reports,1,0,0,0,19.688677,60,24.535348,0,,,,,,green
1,referral letters,0,0,0,0,18.528111,97,7.555125,0,,,,,,green
2,prescription request,0,1,0,0,24.938002,94,61.39698,0,anti-epileptics,-2.0,0.0,online,0.0,red
3,referral letters,0,0,0,0,39.835322,53,84.03899,0,,,,,,green
4,review results,0,0,0,0,39.520855,23,6.917719,0,,,,,,amber


In [11]:
df_expanded

AttributeError: 'DataFrame' object has no attribute 'get_dtypes'

In [10]:
# prompt: write me code to use scikit-learn to train a classification model. outcome is 'Priority' from df, all other columns are input features. assess performance with classification matrix (plot this)

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

categorical_features = df_expanded.get_dtypes()
# Convert categorical features to numerical using one-hot encoding
df_expanded = pd.get_dummies(df_expanded, columns=['Task Type'], drop_first=True)

# Define features (X) and target (y)
X = df_expanded.drop('Priority', axis=1)
y = df_expanded['Priority']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train a RandomForestClassifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))

# Create and plot the confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['amber', 'green', 'red'], yticklabels=['amber', 'green', 'red'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()


ValueError: could not convert string to float: 'mild pain relief'