In [1]:

# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import joblib
from openai import AzureOpenAI
from azure.cosmos import CosmosClient, PartitionKey
import uuid
import os

from dotenv import load_dotenv
load_dotenv()

True

In [2]:

# Step 2: Load Dataset
df = pd.read_csv("data/dataset.csv")
df.head()


Unnamed: 0,AGE,GENDER,US_ECHO,US_DIAPHRAGM,US_FIBRIN,US_PLEURAL_THICKENING,PF_PROTEIN,PF_LDH,PF_GLUCOSE,PF_ADA,DX_CLASS
0,59,0,1,0,1,3.7,37.0,282,3.5,30.254,1
1,26,0,0,1,0,0.0,54.0,380,4.7,46.5,1
2,53,0,0,0,1,10.0,38.0,3493,0.1,135.9,1
3,23,1,0,1,1,0.0,59.0,675,3.8,80.1,1
4,25,1,0,1,1,0.0,58.0,364,4.6,49.6,1


In [3]:
# Step 3: Type casting (optional, if needed)
df = df.astype({
    "AGE": 'int64',
    "GENDER": 'int64',
    "US_ECHO": 'int64',
    "US_DIAPHRAGM": 'int64',
    "US_FIBRIN": 'int64',
    "US_PLEURAL_THICKENING": 'float64',
    "PF_PROTEIN": 'float64',
    "PF_LDH": 'float64',
    "PF_GLUCOSE": 'float64',
    "PF_ADA": 'float64',
    "DX_CLASS": 'int64'
})

In [4]:

# Step 4: Preprocess Data
X = df.drop(columns=["DX_CLASS"])
y = df["DX_CLASS"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:

# Step 5: Train Random Forest Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
joblib.dump(model, "models/pleuroai_model.pkl")


['models/pleuroai_model.pkl']

In [6]:

# Step 6: Evaluate Model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9259259259259259
Confusion Matrix:
 [[17  1]
 [ 1  8]]
Classification Report:
               precision    recall  f1-score   support

           1       0.94      0.94      0.94        18
           2       0.89      0.89      0.89         9

    accuracy                           0.93        27
   macro avg       0.92      0.92      0.92        27
weighted avg       0.93      0.93      0.93        27



In [7]:

# Step 6: Load model and test on new input
import json
from openai import AzureOpenAI

# Load trained model
model = joblib.load("models/pleuroai_model.pkl")

# Example input (MPE test case)
input_data = {
    "AGE": 65,
    "GENDER": 1,
    "US_ECHO": 0,
    "US_DIAPHRAGM": 0,
    "US_FIBRIN": 0,
    "US_PLEURAL_THICKENING": 3.0,
    "PF_PROTEIN": 5.3,
    "PF_LDH": 789,
    "PF_GLUCOSE": 60,
    "PF_ADA": 12
}

df = pd.DataFrame([input_data])

# Step 7: Predict
proba = model.predict_proba(df)[0]
prediction_raw = model.predict(df)[0]
class_index = list(model.classes_).index(prediction_raw)
confidence = round(proba[class_index] * 100, 2)

prediction_label = "Likely Tuberculous Pleural Effusion (TPE)" if prediction_raw == 1 else "Likely Malignant Pleural Effusion (MPE)"
print(f"Prediction: {prediction_label} with confidence: {confidence}%")

# Step 8: GPT Clinical Advice
prompt = ""
if prediction_raw == 1:
    prompt = (
        f"The patient is predicted to have Tuberculous Pleural Effusion (TPE). "
        f"Based on this and the following data: {input_data}, please suggest the next clinical steps, "
        f"including confirmatory diagnostics and treatment recommendations."
    )
elif prediction_raw == 2:
    prompt = (
        f"The patient is predicted to have Malignant Pleural Effusion (MPE). "
        f"Based on this and the following data: {input_data}, please suggest relevant next clinical steps, "
        f"including investigations to identify underlying malignancy, staging, and management plans."
    )

# Setup OpenAI
client = AzureOpenAI(
    api_key = os.getenv("OPENAI_KEY"),
    api_version="2023-05-15",
    azure_endpoint="https://openai-tpe-assistant.openai.azure.com/"
)

response = client.chat.completions.create(
    model="gpt-35-tpebot",
    messages=[
        {"role": "system", "content": "You are a clinical assistant specialized in tuberculosis and pleural effusion diagnosis."},
        {"role": "user", "content": prompt}
    ],
    temperature=0.4,
    max_tokens=500
)

gpt_reply = response.choices[0].message.content
print("GPT Clinical Suggestion:\n", gpt_reply)


Prediction: Likely Malignant Pleural Effusion (MPE) with confidence: 57.0%
GPT Clinical Suggestion:
 Based on the data provided and the suspicion of Malignant Pleural Effusion (MPE), the following clinical steps can be considered:

1. **Further Investigations**:
   - **Thoracentesis**: Confirm the presence of malignant cells in the pleural fluid through cytology or cell block analysis.
   - **Pleural Biopsy**: Consider image-guided pleural biopsy to establish a definitive diagnosis and identify the underlying malignancy.
   - **Imaging Studies**: Perform imaging studies such as CT scans to evaluate the extent of pleural involvement and identify potential primary tumors.

2. **Staging**:
   - Once the diagnosis of MPE is confirmed, staging of the underlying malignancy should be conducted to determine the extent of disease spread. This may involve imaging studies, such as CT scans or PET scans, and possibly other tests depending on the suspected primary tumor.

3. **Management**:
   - **

In [8]:
# Initialize Cosmos DB
cosmos_client = CosmosClient(
    "https://tpe-cosmosdb.documents.azure.com:443/",
    credential=os.getenv("COSMOS_KEY")
)
db = cosmos_client.get_database_client("TPEAssistant")
container = db.get_container_client("Predictions")

# Save to Cosmos DB
record = {
    "id": str(uuid.uuid4()),
    "input": input_data,  # raw input
    "prediction": prediction_label,
    "confidence": f"{confidence}%",
    "gpt_response": gpt_reply
}
container.create_item(body=record)

print("✅ Record saved to Cosmos DB.")

✅ Record saved to Cosmos DB.
