In [None]:

# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import joblib
from openai import AzureOpenAI
from azure.cosmos import CosmosClient, PartitionKey
import uuid
import os

from dotenv import load_dotenv
load_dotenv()

In [None]:

# Step 2: Load Dataset
df = pd.read_csv("data/tpe_dataset.csv")
df.head()


In [None]:
# Step 3: Type casting (optional, if needed)
df = df.astype({
    "AGE": 'int64',
    "GENDER": 'int64',
    "US_ECHO": 'int64',
    "US_DIAPHRAGM": 'int64',
    "US_FIBRIN": 'int64',
    "US_PLEURAL_THICKENING": 'float64',
    "PF_PROTEIN": 'float64',
    "PF_LDH": 'float64',
    "PF_GLUCOSE": 'float64',
    "PF_ADA": 'float64',
    "DX_CLASS": 'int64'
})

In [None]:

# Step 4: Preprocess Data
X = df.drop(columns=["DX_CLASS"])
y = df["DX_CLASS"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

# Step 5: Train Random Forest Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
joblib.dump(model, "models/tpe_model.pkl")


In [None]:

# Step 6: Evaluate Model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


In [None]:

# Step 7: Load Trained Model
model = joblib.load("models/tpe_model.pkl")


In [None]:

# Step 8: Predict on a Sample
sample_input = X_test.iloc[0]
sample_input_df = sample_input.to_frame().T
sample_prediction = model.predict(sample_input_df)[0]
print("Prediction:", "Likely TPE" if sample_prediction == 1 else "Not TPE")


In [None]:

# Step 9: Generate GPT Response
prompt = f"The patient details are: {sample_input.to_dict()} with prediction: {'Likely TPE' if sample_prediction == 1 else 'Not TPE'}. What should be the next steps?"

client = AzureOpenAI(
   api_key = os.getenv("OPENAI_KEY"),
    api_version="2023-05-15",
    azure_endpoint="https://openai-tpe-assistant.openai.azure.com/"
)

response = client.chat.completions.create(
    model="gpt-35-tpebot",
    messages=[
        {"role": "system", "content": "You are a clinical assistant specialized in tuberculosis and pleural effusion diagnosis."},
        {"role": "user", "content": prompt}
    ],
    temperature=0.4,
    max_tokens=500
)

gpt_reply = response.choices[0].message.content
print("GPT Suggestion:\n", gpt_reply)


In [None]:

# Step 10: Save Result to Cosmos DB
cosmos_url = "https://tpe-cosmosdb.documents.azure.com:443/"
cosmos_key=os.getenv("COSMOS_KEY"))
cosmos_client = CosmosClient(cosmos_url, credential=cosmos_key)

db = cosmos_client.get_database_client("TPEAssistant")
container = db.get_container_client("Predictions")

result_doc = {
    "id": str(uuid.uuid4()),
    "input": sample_input.to_dict(),
    "prediction": "Likely TPE" if sample_prediction == 1 else "Not TPE",
    "gpt_response": gpt_reply
}

container.create_item(body=result_doc)
print("✅ Saved to Cosmos DB")
