In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

def generate_balanced_patient_data(num_patients):
    np.random.seed(0)
    data = {
        'patient_id': np.arange(1, num_patients + 1),
        'age': np.random.randint(20, 91, num_patients),
        'weight': np.random.normal(75, 15, num_patients).astype(int),
        'medical_conditions': np.random.choice(['Hypertension', 'Diabetes', 'Anemia', 'None'], num_patients, p=[0.25, 0.25, 0.25, 0.25]),
        'heartrate': np.random.normal(80, 10, num_patients).astype(int),
        'spo2': np.random.normal(95, 2, num_patients).astype(int),
        'blood_pressure_systolic': np.random.normal(120, 15, num_patients).astype(int),
        'ankle_swelling': np.random.randint(1, 6, num_patients),
        'breathlessness': np.random.randint(1, 6, num_patients)              
    }
    patient_df = pd.DataFrame(data)
    patient_df['weight'] = patient_df['weight'].clip(50, 120)
    patient_df['heartrate'] = patient_df['heartrate'].clip(60, 100)
    patient_df['spo2'] = patient_df['spo2'].clip(90, 100)
    patient_df['blood_pressure_systolic'] = patient_df['blood_pressure_systolic'].clip(90, 140)
    return patient_df

def generate_weekly_symptoms_data(patient_data, num_weeks):
    records = []
    for _, row in patient_data.iterrows():
        patient_id = row['patient_id']
        start_date = datetime.now() - timedelta(weeks=num_weeks)
        for week in range(num_weeks):
            current_date = start_date + timedelta(weeks=week)
            records.append({
                'patient_id': patient_id,
                'timestamp': current_date,
                'ankle_swelling': np.random.randint(1, 6),
                'breathlessness': np.random.randint(1, 6)
            })
            

    weekly_symptoms_df = pd.DataFrame(records)
    weekly_symptoms_df['average_symptoms'] = weekly_symptoms_df[['ankle_swelling', 'breathlessness']].mean(axis=1)
    weekly_symptoms_df['average_diff'] = weekly_symptoms_df.groupby('patient_id')['average_symptoms'].diff().fillna(0)
    
    # Determine the progress (worsening, improving, stable)
    def determine_progress(diff):
        if diff > 0:
            return 'Worsening'
        elif diff < 0:
            return 'Improving'
        else:
            return 'Stable'
    
    weekly_symptoms_df['progress'] = weekly_symptoms_df['average_diff'].apply(determine_progress)
    
    return weekly_symptoms_df

# Generate datasets
patient_data = generate_balanced_patient_data(5000)
weekly_symptoms_data = generate_weekly_symptoms_data(patient_data, 104)

# Step 1: Add a drug column to patient_data
drugs = ['Drug_A', 'Drug_B', 'Drug_C', 'Drug_D']
patient_data['drug'] = np.random.choice(drugs, size=len(patient_data))

# Step 2: Get the most recent 'average_diff' and 'progress' for each patient
latest_weekly_data = weekly_symptoms_data.sort_values(by='timestamp').groupby('patient_id').tail(1)[['patient_id', 'average_diff', 'progress']]

# Step 3: Merge the latest 'average_diff' and 'progress' with patient_data
combined_data = pd.merge(patient_data, latest_weekly_data, on='patient_id', how='left')

# Display the combined DataFrame with relevant columns

combined_data.head()

Unnamed: 0,patient_id,age,weight,medical_conditions,heartrate,spo2,blood_pressure_systolic,ankle_swelling,breathlessness,drug,average_diff,progress
0,1,64,96,,69,97,91,1,5,Drug_C,0.5,Worsening
1,2,67,66,Diabetes,90,95,120,2,5,Drug_B,-2.0,Improving
2,3,84,98,Anemia,76,97,108,1,4,Drug_A,0.0,Stable
3,4,87,68,Anemia,99,95,116,3,2,Drug_C,1.0,Worsening
4,5,87,67,,83,93,125,5,3,Drug_D,3.5,Worsening


In [2]:
# Define the ranges for each category
heartrate_bins = [0, 60, 70, 80, 90, 100, float('inf')]
spo2_bins = [0, 90, 92, 94, 96, 98, 100]
bp_systolic_bins = [0, 90, 100, 110, 120, 130, float('inf')]
#bp_diastolic_bins = [0, 60, 70, 80, 90, float('inf')]

# Create categorical variables
combined_data['heartrate_category'] = pd.cut(combined_data['heartrate'], bins=heartrate_bins, labels=[1, 2, 3, 4, 5, 6])
combined_data['spo2_category'] = pd.cut(combined_data['spo2'], bins=spo2_bins, labels=[1, 2, 3, 4, 5, 6])
combined_data['blood_pressure_systolic_category'] = pd.cut(combined_data['blood_pressure_systolic'], bins=bp_systolic_bins, labels=[1, 2, 3, 4, 5, 6])

# Display the updated DataFrame
combined_data.head()

Unnamed: 0,patient_id,age,weight,medical_conditions,heartrate,spo2,blood_pressure_systolic,ankle_swelling,breathlessness,drug,average_diff,progress,heartrate_category,spo2_category,blood_pressure_systolic_category
0,1,64,96,,69,97,91,1,5,Drug_C,0.5,Worsening,2,5,2
1,2,67,66,Diabetes,90,95,120,2,5,Drug_B,-2.0,Improving,4,4,4
2,3,84,98,Anemia,76,97,108,1,4,Drug_A,0.0,Stable,3,5,3
3,4,87,68,Anemia,99,95,116,3,2,Drug_C,1.0,Worsening,5,4,4
4,5,87,67,,83,93,125,5,3,Drug_D,3.5,Worsening,4,3,5


In [3]:
def generate_weekly_symptoms_data(patient_data, num_weeks):
    records = []
    for _, row in patient_data.iterrows():
        patient_id = row['patient_id']
        start_date = datetime.now() - timedelta(weeks=num_weeks)
        for week in range(num_weeks):
            current_date = start_date + timedelta(weeks=week)
            records.append({
                'patient_id': patient_id,
                'timestamp': current_date,
                'heartrate_category': row['heartrate_category'],
                'spo2_category': row['spo2_category'],
                'blood_pressure_systolic_category': row['blood_pressure_systolic_category'],
                'ankle_swelling': np.random.randint(1, 6),
                'breathlessness': np.random.randint(1, 6)
            })
            

    weekly_symptoms_df = pd.DataFrame(records)
    weekly_symptoms_df['average_symptoms'] = weekly_symptoms_df[['ankle_swelling', 'breathlessness']].mean(axis=1)
    weekly_symptoms_df['average_diff'] = weekly_symptoms_df.groupby('patient_id')['average_symptoms'].diff().fillna(0)
    
    # Determine the progress (worsening, improving, stable)
    def determine_progress(diff):
        if diff > 0:
            return 'Worsening'
        elif diff < 0:
            return 'Improving'
        else:
            return 'Stable'
    
    weekly_symptoms_df['progress'] = weekly_symptoms_df['average_diff'].apply(determine_progress)
    
    return weekly_symptoms_df

# Generate weekly symptoms data for combined_data
num_weeks = 52  # Adjust as needed
weekly_symptoms_combined_data = generate_weekly_symptoms_data(combined_data, num_weeks)

# Display the generated data
weekly_symptoms_combined_data.head()

Unnamed: 0,patient_id,timestamp,heartrate_category,spo2_category,blood_pressure_systolic_category,ankle_swelling,breathlessness,average_symptoms,average_diff,progress
0,1,2023-06-19 17:25:17.819242,2,5,2,2,2,2.0,0.0,Stable
1,1,2023-06-26 17:25:17.819242,2,5,2,2,4,3.0,1.0,Worsening
2,1,2023-07-03 17:25:17.819242,2,5,2,1,4,2.5,-0.5,Improving
3,1,2023-07-10 17:25:17.819242,2,5,2,1,4,2.5,0.0,Stable
4,1,2023-07-17 17:25:17.819242,2,5,2,1,3,2.0,-0.5,Improving


In [4]:
weekly_symptoms_combined_data['progress'].unique()

array(['Stable', 'Worsening', 'Improving'], dtype=object)

In [5]:

# Step 2: Get the most recent 'average_diff' and 'progress' for each patient
latest_weekly_data = weekly_symptoms_combined_data.sort_values(by='timestamp').groupby('patient_id').tail(1)[['patient_id', 'average_diff', 'progress']]

# Step 3: Merge the latest 'average_diff' and 'progress' with patient_data
combined_data1 = pd.merge(patient_data, latest_weekly_data, on='patient_id', how='left')

# Display the combined DataFrame with relevant columns

combined_data1.head()

Unnamed: 0,patient_id,age,weight,medical_conditions,heartrate,spo2,blood_pressure_systolic,ankle_swelling,breathlessness,drug,average_diff,progress
0,1,64,96,,69,97,91,1,5,Drug_C,2.0,Worsening
1,2,67,66,Diabetes,90,95,120,2,5,Drug_B,-3.5,Improving
2,3,84,98,Anemia,76,97,108,1,4,Drug_A,0.0,Stable
3,4,87,68,Anemia,99,95,116,3,2,Drug_C,-1.5,Improving
4,5,87,67,,83,93,125,5,3,Drug_D,1.5,Worsening


In [6]:
combined_data1.to_csv('combined_patient_data1.csv', index=False)

In [7]:
combined_data1.head()

Unnamed: 0,patient_id,age,weight,medical_conditions,heartrate,spo2,blood_pressure_systolic,ankle_swelling,breathlessness,drug,average_diff,progress
0,1,64,96,,69,97,91,1,5,Drug_C,2.0,Worsening
1,2,67,66,Diabetes,90,95,120,2,5,Drug_B,-3.5,Improving
2,3,84,98,Anemia,76,97,108,1,4,Drug_A,0.0,Stable
3,4,87,68,Anemia,99,95,116,3,2,Drug_C,-1.5,Improving
4,5,87,67,,83,93,125,5,3,Drug_D,1.5,Worsening


In [8]:
combined_data1['progress'].unique()

array(['Worsening', 'Improving', 'Stable'], dtype=object)

In [9]:
# Save the combined_data DataFrame to a CSV file
combined_data.to_csv('combined_patient_data.csv', index=False)


In [10]:
combined_data.shape

(5000, 15)

In [11]:
# Define a mapping for progress categories
progress_mapping = {'Worsening': 0, 'Improving': 1, 'Stable': 2}

# Convert 'progress' column to categorical values
combined_data1['progress_category'] = combined_data1['progress'].map(progress_mapping)
# Display the updated DataFrame
combined_data1.head()

Unnamed: 0,patient_id,age,weight,medical_conditions,heartrate,spo2,blood_pressure_systolic,ankle_swelling,breathlessness,drug,average_diff,progress,progress_category
0,1,64,96,,69,97,91,1,5,Drug_C,2.0,Worsening,0
1,2,67,66,Diabetes,90,95,120,2,5,Drug_B,-3.5,Improving,1
2,3,84,98,Anemia,76,97,108,1,4,Drug_A,0.0,Stable,2
3,4,87,68,Anemia,99,95,116,3,2,Drug_C,-1.5,Improving,1
4,5,87,67,,83,93,125,5,3,Drug_D,1.5,Worsening,0


In [12]:
cat_cols = [col for col in combined_data1.columns if combined_data1[col].dtype=='object']

In [13]:
# Check the number of categories in the category column
for col in cat_cols:
    print(f"{col} has {combined_data1[col].nunique()} categories\n")

medical_conditions has 4 categories

drug has 4 categories

progress has 3 categories



In [14]:
num_cols = [col for col in combined_data1.columns if combined_data1[col].dtype!='object']

In [15]:
# Check the number of categories in the category column
for col in num_cols:
    print(f"{col} has {combined_data1[col].nunique()} categories\n")

patient_id has 5000 categories

age has 71 categories

weight has 71 categories

heartrate has 41 categories

spo2 has 11 categories

blood_pressure_systolic has 51 categories

ankle_swelling has 5 categories

breathlessness has 5 categories

average_diff has 17 categories

progress_category has 3 categories



In [16]:
combined_data1.columns

Index(['patient_id', 'age', 'weight', 'medical_conditions', 'heartrate',
       'spo2', 'blood_pressure_systolic', 'ankle_swelling', 'breathlessness',
       'drug', 'average_diff', 'progress', 'progress_category'],
      dtype='object')

In [17]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
for col in cat_cols:
    combined_data1[col] = le.fit_transform(combined_data1[col])

In [18]:
combined_data1.head()

Unnamed: 0,patient_id,age,weight,medical_conditions,heartrate,spo2,blood_pressure_systolic,ankle_swelling,breathlessness,drug,average_diff,progress,progress_category
0,1,64,96,3,69,97,91,1,5,2,2.0,2,0
1,2,67,66,1,90,95,120,2,5,1,-3.5,0,1
2,3,84,98,0,76,97,108,1,4,0,0.0,1,2
3,4,87,68,0,99,95,116,3,2,2,-1.5,0,1
4,5,87,67,3,83,93,125,5,3,3,1.5,2,0


In [19]:
# List of columns to drop
drop_cols = ['progress_category', 'patient_id', 'predicted_progress','average_diff','progress']

# Create a list of columns to keep
ind_col = [col for col in combined_data1.columns if col not in drop_cols]

# Create a new DataFrame with the remaining columns
new_data = combined_data1[ind_col]
new_data.head()

Unnamed: 0,age,weight,medical_conditions,heartrate,spo2,blood_pressure_systolic,ankle_swelling,breathlessness,drug
0,64,96,3,69,97,91,1,5,2
1,67,66,1,90,95,120,2,5,1
2,84,98,0,76,97,108,1,4,0
3,87,68,0,99,95,116,3,2,2
4,87,67,3,83,93,125,5,3,3


In [20]:
X = new_data
y = combined_data1['progress']

In [55]:
# lets create Training and Testing Sets for Validation of Results
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

print("The Shape of x train:", x_train.shape)
print("The Shape of x test:", x_test.shape)
print("The Shape of y train:", y_train.shape)
print("The Shape of y test:", y_test.shape)

The Shape of x train: (4000, 9)
The Shape of x test: (1000, 9)
The Shape of y train: (4000,)
The Shape of y test: (1000,)


In [59]:
x_train.head()

Unnamed: 0,age,weight,medical_conditions,heartrate,spo2,blood_pressure_systolic,ankle_swelling,breathlessness,drug
2913,48,81,3,86,97,139,3,1,1
3275,67,78,1,76,97,132,4,2,2
775,79,70,3,78,94,119,4,1,3
217,87,58,0,66,94,90,4,1,3
1245,35,68,3,88,99,118,3,2,1


In [61]:
y_train.head()

2913    0
3275    2
775     0
217     2
1245    2
Name: progress, dtype: int32

In [56]:
# lets create a Predictive Model

from sklearn.linear_model import LogisticRegression

model1 = LogisticRegression()
model1.fit(x_train, y_train)
y_pred = model1.predict(x_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [62]:
x_train.columns

Index(['age', 'weight', 'medical_conditions', 'heartrate', 'spo2',
       'blood_pressure_systolic', 'ankle_swelling', 'breathlessness', 'drug'],
      dtype='object')

In [63]:
import numpy as np

# Function to get user input for the features
def get_input():
    age = int(input("Enter age: "))
    weight = int(input("Enter weight: "))
    medical_conditions = int(input("Enter number of medical conditions: "))
    heartrate = int(input("Enter heartrate: "))
    spo2 = int(input("Enter spo2: "))
    blood_pressure_systolic = int(input("Enter blood pressure systolic: "))
    ankle_swelling = int(input("Enter ankle swelling (0 for none, 1 for mild, 2 for moderate, 3 for severe): "))
    breathlessness = int(input("Enter breathlessness (0 for none, 1 for mild, 2 for moderate, 3 for severe): "))
    drug = int(input("Enter drug (0 for no drug, 1 for drug A, 2 for drug B, 3 for drug C): "))

    
    return np.array([[age, weight, medical_conditions, heartrate, spo2, blood_pressure_systolic, ankle_swelling, breathlessness, drug]])

In [65]:
# Get user input
input_features = get_input()

# Predict the target 'progress'
predicted_progress = model1.predict(input_features)
# Print the predicted progress category with advice
if predicted_progress == 2:
    print("Predicted progress: Serious")
    print("Your condition requires immediate medical attention. Please consult your doctor promptly.")
elif predicted_progress == 1:
    print("Predicted progress: Worsening")
    print("If your condition is worsening, consider increasing your frusemide dose. Monitor your condition more frequently and watch out for any new symptoms. Additionally, reduce your protein intake and fluid intake by 300 ml to avoid volume overload.")
elif predicted_progress == 0:
    print("Predicted progress: Improving")
    print("If your condition is improving, continue taking your medicines on time and monitor your condition regularly.")
else:
    print("Invalid progress prediction")

Predicted progress: Improving
If your condition is improving, continue taking your medicines on time and monitor your condition regularly.




In [66]:
import pickle

# Saving model to disk
pickle.dump(model1, open('model.pkl','wb'))

In [None]:
# Load the model from disk
loaded_model = pickle.load(open('model.pkl', 'rb'))

# Get user input
input_features = get_input()

# Predict the target 'progress'
predicted_progress = loaded_model.predict(input_features)

# Print the predicted progress category with advice
if predicted_progress == 2:
    print("Predicted progress: Serious")
    print("Your condition requires immediate medical attention. Please consult your doctor promptly.")
elif predicted_progress == 1:
    print("Predicted progress: Worsening")
    print("If your condition is worsening, consider increasing your frusemide dose. Monitor your condition more frequently and watch out for any new symptoms. Additionally, reduce your protein intake and fluid intake by 300 ml to avoid volume overload.")
elif predicted_progress == 0:
    print("Predicted progress: Improving")
    print("If your condition is improving, continue taking your medicines on time and monitor your condition regularly.")
else:
    print("Invalid progress prediction")

# Use the RAG model in combination with the documents
# Assuming 'create_retrieval_chain' and 'create_stuff_documents_chain' are properly defined and imported elsewhere in the notebook
retrieval_chain = create_retrieval_chain(vector_store=st.session_state.vectors)
documents_chain = create_stuff_documents_chain(document_loader=st.session_state.loader)

# Combine the output of the prediction with the documents to generate a comprehensive response
context = f"Predicted progress: {predicted_progress}"
response = llm.generate(context=context, chains=[retrieval_chain, documents_chain])
print(response)


In [None]:
import streamlit as st
import joblib  # or use pickle
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv
import time
import os

load_dotenv()

# Load the GROQ and Google API KEY
groq_api_key = os.getenv('GROQ_API_KEY')
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

st.title("Kidney Care AI")

llm = ChatGroq(
    groq_api_key=groq_api_key,
    model_name="Llama3-8b-8192"
)

prompt = ChatPromptTemplate.from_template("""
Based on the provided patient data, generate a summary and recommendations for managing Chronic Kidney Disease (CKD).
<context>
{context}
<context>
Patient Data: {input}
"""
)

def vector_embedding():
    if "vectors" not in st.session_state:
        st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
        st.session_state.loader = PyPDFDirectoryLoader("./ckd")  # Data Ingestion
        st.session_state.docs = st.session_state.loader.load()  # Document Loading
        st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)  # Chunk Creation
        st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs)  # Splitting
        st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings)  # Vector OpenAI embeddings

if st.button("Prepare Document Embeddings"):
    vector_embedding()
    st.write("Document Embeddings Prepared")

# Input fields for patient data
age = st.number_input("Age", min_value=0)
weight = st.number_input("Weight (kg)", min_value=0.0)
heart_rate = st.number_input("Heart Rate", min_value=0)
spo2 = st.number_input("SpO2 Level (%)", min_value=0.0, max_value=100.0, step=0.1)
blood_pressure_systolic = st.number_input("Blood Pressure (Systolic)", min_value=0)
ankle_swelling = st.selectbox("Ankle Swelling", ["mild", "moderate","severe"])
breathlessness = st.selectbox("Breathlessness", ["mild", "moderate","severe"])

# Load the machine learning model
model = joblib.load('model.pkl')

# Create a dictionary with the patient data
patient_data = {
    "Age": age,
    "Weight": weight,
    "Heart Rate": heart_rate,
    "SpO2 Level": spo2,
    "Blood Pressure (Systolic)": blood_pressure_systolic,
    "Ankle Swelling": ankle_swelling,
    "Breathlessness": breathlessness
}

# Convert patient data to a DataFrame or the required format for the model
import pandas as pd 
patient_df = pd.DataFrame([combined_data1])

# Make predictions
predicted_progress = model.predict(patient_df)[0]

def generate_recommendations(predicted_progress):
    if predicted_progress == "Serious":
        return "Your condition requires immediate medical attention. Please consult your doctor promptly."
    elif predicted_progress == "Worsening":
        return ("If your condition is worsening, consider increasing your frusemide dose. "
                "Monitor your condition more frequently and watch out for any new symptoms. "
                "Additionally, reduce your protein intake and fluid intake by 300 ml to avoid volume overload.")
    elif predicted_progress == "Improving":
        return "If your condition is improving, continue taking your medicines on time and monitor your condition regularly."
    else:
        return "Invalid progress prediction"

if st.button("Generate Recommendations") and "vectors" in st.session_state:
    # Prepare the input for the LLM
    patient_data_input = "\n".join([f"{key}: {value}" for key, value in combined_data1.items()])

    document_chain = create_stuff_documents_chain(llm, prompt)
    retriever = st.session_state.vectors.as_retriever()
    retrieval_chain = create_retrieval_chain(retriever, document_chain)
    
    start = time.process_time()
    response = retrieval_chain.invoke({'input': patient_data_input})
    st.write(f"Response time: {time.process_time() - start} seconds")
    st.write(response['answer'])

    # Display the recommendations based on predicted progress
    recommendations = generate_recommendations(predicted_progress)
    st.write(f"Predicted Progress: {predicted_progress}")
    st.write(f"Recommendations: {recommendations}")

    # With a Streamlit expander
    with st.expander("🧬🧬🧬🧬🧬🧬🧬🧬"):
        for i, doc in enumerate(response["context"]):
            st.write(doc.page_content)
            st.write("💊💊💊💊💊💊💊💊💊💊💊💊💊💊")

In [28]:
from sklearn.model_selection import train_test_split

# Divide the data set into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

In [29]:
from sklearn.preprocessing import StandardScaler

# Features normalization
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [32]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)

print(f'Training Accuracy of Logistic Regression is {accuracy_score(y_train, logreg.predict(X_train))}\n')

logreg_accuracy = accuracy_score(y_test, y_pred)
logreg_precision = precision_score(y_test, y_pred, average='macro')
logreg_recall = recall_score(y_test, y_pred, average='macro')
logreg_f1_score = f1_score(y_test, y_pred, average='macro')

print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}\n")
print(f"Test Accuracy of Logistic Regression is {logreg_accuracy} \n")
print(f"Test Precision of Logistic Regression is {logreg_precision} \n")
print(f"Test Recall of Logistic Regression is {logreg_recall} \n")
print(f"Test F1_score of Logistic Regression is {logreg_f1_score} \n")
print(f"Classification Report: \n{classification_report(y_test, y_pred)}")

Training Accuracy of Logistic Regression is 0.4596

Confusion Matrix: 
[[627   0 473]
 [194   0 140]
 [637   0 429]]

Test Accuracy of Logistic Regression is 0.4224 

Test Precision of Logistic Regression is 0.2805831352074332 

Test Recall of Logistic Regression is 0.32414634146341464 

Test F1_score of Logistic Regression is 0.2990825375018731 

Classification Report: 
              precision    recall  f1-score   support

           0       0.43      0.57      0.49      1100
           1       0.00      0.00      0.00       334
           2       0.41      0.40      0.41      1066

    accuracy                           0.42      2500
   macro avg       0.28      0.32      0.30      2500
weighted avg       0.36      0.42      0.39      2500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [36]:
# Prepare the input data for predictions
X_pred = combined_data1[ind_col]

# Standardize the input data
X_pred_scaled = scaler.transform(X_pred)

# Make predictions
y_pred = logreg.predict(X_pred_scaled)

# Map predictions to progress categories
progress_mapping = {0: 'Improving', 1: 'Worsening', 2: 'Serious'}
predictions = [progress_mapping[pred] for pred in y_pred]

# Add predictions to combined_data1
combined_data1['predicted_progress'] = predictions

# Display the updated DataFrame with predictions
print(combined_data1[['patient_id', 'predicted_progress']].head())

   patient_id predicted_progress
0           1            Serious
1           2            Serious
2           3            Serious
3           4          Improving
4           5          Improving


In [40]:
# Make predictions on the new data
predictions = logreg.predict(new_data)



In [49]:

import pickle

# Save the trained logistic regression model to disk
with open('logreg_model.pkl', 'wb') as model_file:
    pickle.dump(logreg, model_file)

In [50]:
import pickle

# Saving model to disk
pickle.dump(model1, open('model.pkl','wb'))

In [53]:
import streamlit as st
import joblib  # or use pickle
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv
import time
import os

load_dotenv()

# Load the GROQ and Google API KEY
groq_api_key = os.getenv('GROQ_API_KEY')
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

st.title("Kidney Care AI")

llm = ChatGroq(
    groq_api_key=groq_api_key,
    model_name="Llama3-8b-8192"
)

prompt = ChatPromptTemplate.from_template("""
Based on the provided patient data, generate a summary and recommendations for managing Chronic Kidney Disease (CKD).
<context>
{context}
<context>
Patient Data: {input}
"""
)

def vector_embedding():
    if "vectors" not in st.session_state:
        st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
        st.session_state.loader = PyPDFDirectoryLoader("./ckd")  # Data Ingestion
        st.session_state.docs = st.session_state.loader.load()  # Document Loading
        st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)  # Chunk Creation
        st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs)  # Splitting
        st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings)  # Vector OpenAI embeddings

if st.button("Prepare Document Embeddings"):
    vector_embedding()
    st.write("Document Embeddings Prepared")

# Input fields for patient data
age = st.number_input("Age", min_value=0)
weight = st.number_input("Weight (kg)", min_value=0.0)
heart_rate = st.number_input("Heart Rate", min_value=0)
spo2 = st.number_input("SpO2 Level (%)", min_value=0.0, max_value=100.0, step=0.1)
blood_pressure_systolic = st.number_input("Blood Pressure (Systolic)", min_value=0)
ankle_swelling = st.selectbox("Ankle Swelling", ["mild", "moderate","severe"])
breathlessness = st.selectbox("Breathlessness", ["mild", "moderate","severe"])

# Load the machine learning model
model = joblib.load('model.pkl')

# Create a dictionary with the patient data
patient_data = {
    "Age": age,
    "Weight": weight,
    "Heart Rate": heart_rate,
    "SpO2 Level": spo2,
    "Blood Pressure (Systolic)": blood_pressure_systolic,
    "Ankle Swelling": ankle_swelling,
    "Breathlessness": breathlessness
}

# Convert patient data to a DataFrame or the required format for the model
import pandas as pd
patient_df = pd.DataFrame([combined_data1])

# Make predictions
predicted_progress = model.predict(patient_df)[0]

def generate_recommendations(predicted_progress):
    if predicted_progress == "Serious":
        return "Your condition requires immediate medical attention. Please consult your doctor promptly."
    elif predicted_progress == "Worsening":
        return ("If your condition is worsening, consider increasing your frusemide dose. "
                "Monitor your condition more frequently and watch out for any new symptoms. "
                "Additionally, reduce your protein intake and fluid intake by 300 ml to avoid volume overload.")
    elif predicted_progress == "Improving":
        return "If your condition is improving, continue taking your medicines on time and monitor your condition regularly."
    else:
        return "Invalid progress prediction"

if st.button("Generate Recommendations") and "vectors" in st.session_state:
    # Prepare the input for the LLM
    patient_data_input = "\n".join([f"{key}: {value}" for key, value in combined_data1.items()])

    document_chain = create_stuff_documents_chain(llm, prompt)
    retriever = st.session_state.vectors.as_retriever()
    retrieval_chain = create_retrieval_chain(retriever, document_chain)
    
    start = time.process_time()
    response = retrieval_chain.invoke({'input': patient_data_input})
    st.write(f"Response time: {time.process_time() - start} seconds")
    st.write(response['answer'])

    # Display the recommendations based on predicted progress
    recommendations = generate_recommendations(predicted_progress)
    st.write(f"Predicted Progress: {predicted_progress}")
    st.write(f"Recommendations: {recommendations}")

    # With a Streamlit expander
    with st.expander("🧬🧬🧬🧬🧬🧬🧬🧬"):
        for i, doc in enumerate(response["context"]):
            st.write(doc.page_content)
            st.write("💊💊💊💊💊💊💊💊💊💊💊💊💊💊")

ValueError: Must pass 2-d input. shape=(1, 5000, 14)