In [None]:
import pandas as pd
import numpy as np

#Male = 1 Female = 2
# Generate synthetic health data
data = pd.DataFrame({
    'Age': np.random.randint(20, 80, 100),
    'Gender': np.random.choice([1, 2], 100),
    'BMI': np.round(np.random.normal(25, 5, 100), 1),
    'Chol_LDL': np.random.randint(80, 200, 100),
    'HealthScore': np.random.randint(50, 100, 100)
})

data.to_csv('health_data.csv', index=False)
print("Saved health data")

In [10]:
# Train a regression model (e.g., RandomForest)
from sklearn.ensemble import RandomForestRegressor
import pandas as pd

data = pd.read_csv("health_data.csv")
X, y = data.drop("HealthScore", axis=1), data["HealthScore"]
model = RandomForestRegressor().fit(X, y)
print("Model expects features:", model.n_features_in_)  # Should show 4
print("Completed")

Model expects features: 4
Completed


In [11]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import Ollama  # For local LLM
from langchain.text_splitter import CharacterTextSplitter

# 1. Initialize local embeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")  # Lightweight local model

# 2. Load medical guidelines into a local vector DB
guidelines = [
    "LDL > 160: High risk. Recommend statins and omega-3 supplements.",
    "BMI > 30: Suggest 150 mins/week of moderate exercise.",
    "Glucose > 126: Potential diabetes risk. Recommend HbA1c test."
]
vector_db = FAISS.from_texts(guidelines, embeddings)  # Using local embeddings

# 3. Initialize local LLM (requires Ollama installed)
llm = Ollama(model="deepseek-r1")  # Run `ollama pull llama3` first

def generate_health_report(user_data):
    # Predict score (assuming you have a trained model)
    score = model.predict([user_data])[0]  # Your existing ML model
    
    # Retrieve relevant guidelines
    retriever = vector_db.as_retriever()
    query = f"BMI: {user_data[1]}, LDL: {user_data[3]}"
    docs = retriever.invoke(query)  # Updated LangChain syntax
    
    # Generate explanation with local LLM
    prompt = f"""
    Patient data: {user_data}. 
    Predicted health score: {score}/100. 
    Medical guidelines: {[doc.page_content for doc in docs]}.
    Generate 3 concise, actionable recommendations.
    """
    report = llm.invoke(prompt)
    return score, report

# Example usage
user_data = [45, 26.5, 170, 120]  # Age, BMI, LDL, HDL, Glucose
score, advice = generate_health_report(user_data)
print(f"Health Score: {score}")
print("Recommendations:", advice)



Health Score: 68.14
Recommendations: <think>
Alright, so I've got this patient data here: [45, 26.5, 170, 120]. Let me figure out what each of these numbers represents because it's crucial to understand them before giving any recommendations.

First off, the numbers are in a list with four elements. Typically, in medical contexts, this could be something like age, BMI, blood pressure, and cholesterol levels. So let's break that down:

1. 45 - That's probably the patient's age. So they're 45 years old.
2. 26.5 - This is likely their Body Mass Index (BMI). A BMI of 26.5 is within a healthy range, but I know that BMIs can be tricky because they don't account for muscle mass or body fat percentage. Still, at 26.5, they're probably considered overweight or just on the higher side.
3. 170 - That's their blood pressure reading. Now, this could be either diastolic or systolic. If it's both, it might need to be clarified. But let's assume it's normal or slightly elevated depending on context.
4

In [None]:
import joblib

# Load your saved model
model = joblib.load('health_score_predictor.joblib')  

# Check feature count
print("Model expects features:", model.n_features_in_)  # Should show 4