In [None]:
# heartRate : Heart rate in beats per minute (bpm).
# When : The time of the heart rate sample. 
# heartBeatIntervals: The most recent beat-to-beat interval data as an Array of Number objects in milliseconds (ms). 
# Calories: The calories for the day in kilocalories (kCal) 
# Distance: The distance for the day in centimeters (cm). 
# Number of floors climbed: The number of floors climbed for the day.
# Steps:
# stressScore: The current stress score. The stress score calculated using a rolling average of the last 30 seconds of stress level readings.
# respirationRate: Current respiration rate for the user, in breaths per minute Value may be null.
# timeToRecovery: Time to recover from the last activity, in hours Value may be null.
# oxygenSaturation: The current oxygen saturation in percent (%) 
# temperature: Celsius ©
# Cadence: The cadence in revolutions per minute (rpm).
# Pressure: The barometric pressure in Pascals (Pa). 


In [42]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import Pipeline
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import precision_score, recall_score, confusion_matrix


In [63]:
import pandas as pd
import numpy as np



def generate_synthetic_data(n_samples=10000, filename='medical_sample_data.csv'):
    """
    Generate synthetic health metrics data with emergency labels and save to CSV
    
    Parameters:
    n_samples (int): Number of data points to generate
    filename (str): Name of CSV file to save the data
    
    Returns:
    pandas.DataFrame: Synthetic dataset with health metrics and emergency labels
    """
    np.random.seed(42)
    
    # Normal ranges for different health metrics
    normal_ranges = {
        'heartRate': (60, 100),
        'stressScore': (0, 70),
        'respirationRate': (12, 20),
        'oxygenSaturation': (95, 100),
        'temperature': (36, 39),
    }
    
    # Create empty dataframe
    data = pd.DataFrame()
    
    # Generate normal values for most samples
    for feature, (min_val, max_val) in normal_ranges.items():
        # Generate mostly normal values with some outliers
        if np.random.random() < 0.6:  # 60% normal values
            data[feature] = np.random.uniform(min_val, max_val, n_samples)
        else:  # 40% broader range that might include abnormal values
            spread = max_val - min_val
            data[feature] = np.random.uniform(min_val - spread*0.8, max_val + spread*0.8, n_samples)
    
    # Emergency conditions definitions (medical thresholds)
    emergency_conditions = [
        # High heart rate (tachycardia)
        (data['heartRate'] > 120),
        # Low heart rate (bradycardia)
        (data['heartRate'] < 50),
        # Low oxygen saturation
        (data['oxygenSaturation'] < 90),
        # High temperature (fever)
        (data['temperature'] > 38),
        # Very low temperature (hypothermia)
        (data['temperature'] < 35.0),
        # Abnormal respiration (high)
        (data['respirationRate'] > 25),
        # Abnormal respiration (low)
        (data['respirationRate'] < 8),
        # Extremely high stress
        (data['stressScore'] > 90)
    ]
    
    # Mark as emergency if any condition is true
    data['emergency'] = np.logical_or.reduce(emergency_conditions).astype(int)
    
    # Set some values to None/NaN to simulate missing data
    for feature in normal_ranges.keys():
        # Randomly set 5% of the values to NaN
        data.loc[data.sample(frac=0.05).index, feature] = np.nan
    
    # Save data to CSV file
    data.to_csv(filename, index=False)
    
    return data



# Generate 50 samples and display first 10 rows
if __name__ == "__main__":
    # Generate and save data
    sample_data = generate_synthetic_data(n_samples=10000)
    
    # Display first 10 rows
    print("\nSample Data (First 10 rows):")
    print(sample_data.head(10))
    
    # Display statistics about emergencies
    emergency_count = sample_data['emergency'].sum()
    total_samples = len(sample_data)
    print(f"\nEmergency Statistics:")
    print(f"Total samples: {total_samples}")
    print(f"Emergency cases: {emergency_count} ({emergency_count/total_samples:.1%})")
    print(f"Non-emergency cases: {total_samples - emergency_count} ({(total_samples - emergency_count)/total_samples:.1%})")
    
    # Display distribution of key metrics in emergency vs non-emergency cases
    print("\nMean Values for Key Metrics:")
    metrics = ['heartRate', 'oxygenSaturation', 'temperature', 'respirationRate', 'stressScore']
    for metric in metrics:
        emergency_mean = sample_data[sample_data['emergency'] == 1][metric].mean()
        normal_mean = sample_data[sample_data['emergency'] == 0][metric].mean()
        print(f"{metric}: Emergency = {emergency_mean:.2f}, Normal = {normal_mean:.2f}")


Sample Data (First 10 rows):
   heartRate  stressScore  respirationRate  oxygenSaturation  temperature  \
0  98.028572    12.330774        17.306245         97.939282    36.726785   
1  89.279758    42.508667        15.856715         98.501050    36.766380   
2  83.946339    33.363691        17.908568         99.127822          NaN   
3  66.240746    60.599069        19.689663         97.034855    37.528720   
4  66.239781     2.247671        12.932374         98.434610    36.926638   
5  62.323344    45.070755        17.676542         96.516007    38.721889   
6  94.647046    53.406421        13.842753         97.177380    37.659419   
7  84.044600    53.164060              NaN         99.478136    38.362431   
8  88.322903    62.025178        12.262902         99.749538    38.772713   
9  60.823380    51.032362        13.087259         98.594087    36.489405   

   emergency  
0          0  
1          0  
2          0  
3          0  
4          0  
5          1  
6          0  
7 

In [64]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
import joblib

data = pd.read_csv('medical_sample_data.csv')
print(data.head())


   heartRate  stressScore  respirationRate  oxygenSaturation  temperature  \
0  98.028572    12.330774        17.306245         97.939282    36.726785   
1  89.279758    42.508667        15.856715         98.501050    36.766380   
2  83.946339    33.363691        17.908568         99.127822          NaN   
3  66.240746    60.599069        19.689663         97.034855    37.528720   
4  66.239781     2.247671        12.932374         98.434610    36.926638   

   emergency  
0          0  
1          0  
2          0  
3          0  
4          0  


In [65]:
# Handle missing values (you can choose a strategy such as mean, median, mode, or drop rows/columns)
data = data.dropna()
data.fillna(data.mean(), inplace=True) 
# Replace missing values with mean

# Encoding categorical variables if necessary (example for 'category_column')
# data['category_column'] = data['category_column'].map({'category1': 0, 'category2': 1})

# Feature scaling (optional, but recommended for algorithms like SVM or KNN)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(data.drop('emergency', axis=1))

# Split into features (X) and target (y)
X = scaled_features
y = data['emergency']  # Replace with your actual target column name

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


In [66]:
X = data.drop(columns=['emergency'])

# Target (emergency label)
y = data['emergency']

# Step 2: Train-Test Split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Feature Scaling (important for many models, though not necessary for Random Forest)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 4: Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)



  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


In [67]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 4: Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Step 5: Model Evaluation

# Predictions
y_pred = model.predict(X_test_scaled)

# Classification report
print("Classification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Step 6: Model performance statistics
accuracy = np.mean(y_pred == y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")


  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1043
           1       1.00      1.00      1.00       508

    accuracy                           1.00      1551
   macro avg       1.00      1.00      1.00      1551
weighted avg       1.00      1.00      1.00      1551

Confusion Matrix:
 [[1043    0]
 [   0  508]]
Accuracy: 100.00%


In [68]:
import joblib

# Save the trained model and scaler
joblib.dump(model, 'random_forest_model.pkl')
joblib.dump(scaler, 'scaler.pkl')


['scaler.pkl']

In [56]:
!pip install --upgrade scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl.metadata (31 kB)
Downloading scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl (11.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.5.1
    Uninstalling scikit-learn-1.5.1:
      Successfully uninstalled scikit-learn-1.5.1
Successfully installed scikit-learn-1.6.1


In [2]:
import pandas as pd
import numpy as np

def generate_synthetic_data(n_samples=10000, filename='medical_sample_data_dummy.csv'):
    """
    Generate synthetic health metrics data with emergency labels, suffering_from, and treatment columns,
    and save to CSV.
    
    Parameters:
    n_samples (int): Number of data points to generate
    filename (str): Name of CSV file to save the data
    
    Returns:
    pandas.DataFrame: Synthetic dataset with health metrics and emergency labels
    """
    np.random.seed(42)
    
    # Normal ranges for different health metrics
    normal_ranges = {
        'heartRate': (60, 100),
        'stressScore': (0, 70),
        'respirationRate': (12, 20),
        'oxygenSaturation': (95, 100),
        'temperature': (36, 39),
    }
    
    # Create empty dataframe
    data = pd.DataFrame()
    
    # Generate normal values for most samples
    for feature, (min_val, max_val) in normal_ranges.items():
        # Generate mostly normal values with some outliers
        if np.random.random() < 0.6:  # 60% normal values
            data[feature] = np.random.uniform(min_val, max_val, n_samples)
        else:  # 40% broader range that might include abnormal values
            spread = max_val - min_val
            data[feature] = np.random.uniform(min_val - spread*0.8, max_val + spread*0.8, n_samples)
    
    # Create 'suffering_from' and 'treatment' columns
    suffering_from_choices = ['heart', 'oxygen', 'diabetes', 'highBloodPressure', 'none']
    treatment_choices = ['heart', 'oxygen', 'medication', 'surgery', 'none']
    data['suffering_from'] = np.random.choice(suffering_from_choices, size=n_samples)
    data['treatment'] = np.random.choice(treatment_choices, size=n_samples)
    
    # Apply treatment-specific adjustments
    for i in range(n_samples):
        if data.loc[i, 'treatment'] == 'heart':
            data.loc[i, 'heartRate'] = 110  # Heart treatment should have heartRate of 110
        elif data.loc[i, 'suffering_from'] == 'oxygen':
            # Stretch oxygen levels for oxygen-related patients
            data.loc[i, 'oxygenSaturation'] = np.random.uniform(85, 95)  # Slightly low oxygen saturation for oxygen-related patients
    
    # Emergency conditions definitions (medical thresholds)
    emergency_conditions = [
        # High heart rate (tachycardia)
        (data['heartRate'] > 120),
        # Low heart rate (bradycardia)
        (data['heartRate'] < 50),
        # Low oxygen saturation
        (data['oxygenSaturation'] < 90),
        # High temperature (fever)
        (data['temperature'] > 38),
        # Very low temperature (hypothermia)
        (data['temperature'] < 35.0),
        # Abnormal respiration (high)
        (data['respirationRate'] > 25),
        # Abnormal respiration (low)
        (data['respirationRate'] < 8),
        # Extremely high stress
        (data['stressScore'] > 90)
    ]
    
    # Mark as emergency if any condition is true
    data['emergency'] = np.logical_or.reduce(emergency_conditions).astype(int)
    
    # Sentimental Analysis: "outlier" logic for those with no treatment
    data['condition'] = data.apply(lambda row: 'outlier' if row['treatment'] == 'none' and 
                                    (row['heartRate'] < 60 or row['heartRate'] > 100 or
                                     row['oxygenSaturation'] < 95 or row['oxygenSaturation'] > 100) else 'normal', axis=1)
    
    # Set some values to None/NaN to simulate missing data
    for feature in normal_ranges.keys():
        # Randomly set 5% of the values to NaN
        data.loc[data.sample(frac=0.05).index, feature] = np.nan
    
    # Save data to CSV file
    data.to_csv(filename, index=False)
    
    return data

# Generate 50 samples and display first 10 rows
if __name__ == "__main__":
    # Generate and save data
    sample_data = generate_synthetic_data(n_samples=10000)
    
    # Display first 10 rows
    print("\nSample Data (First 10 rows):")
    print(sample_data.head(10))
    
    # Display statistics about emergencies
    emergency_count = sample_data['emergency'].sum()
    total_samples = len(sample_data)
    print(f"\nEmergency Statistics:")
    print(f"Total samples: {total_samples}")
    print(f"Emergency cases: {emergency_count} ({emergency_count/total_samples:.1%})")
    print(f"Non-emergency cases: {total_samples - emergency_count} ({(total_samples - emergency_count)/total_samples:.1%})")
    
    # Display distribution of key metrics in emergency vs non-emergency cases
    print("\nMean Values for Key Metrics:")
    metrics = ['heartRate', 'oxygenSaturation', 'temperature', 'respirationRate', 'stressScore']
    for metric in metrics:
        emergency_mean = sample_data[sample_data['emergency'] == 1][metric].mean()
        normal_mean = sample_data[sample_data['emergency'] == 0][metric].mean()
        print(f"{metric}: Emergency = {emergency_mean:.2f}, Normal = {normal_mean:.2f}")


Sample Data (First 10 rows):
    heartRate  stressScore  respirationRate  oxygenSaturation  temperature  \
0   98.028572    12.330774        17.306245         97.939282    36.726785   
1  110.000000    42.508667        15.856715         98.501050    36.766380   
2   83.946339    33.363691        17.908568         94.202385    37.367149   
3   66.240746    60.599069        19.689663         97.034855    37.528720   
4   66.239781     2.247671        12.932374         98.434610    36.926638   
5  110.000000    45.070755        17.676542         96.516007    38.721889   
6   94.647046    53.406421        13.842753         97.177380    37.659419   
7   84.044600    53.164060        15.315814         94.756919    38.362431   
8   88.322903    62.025178        12.262902         99.749538    38.772713   
9   60.823380    51.032362        13.087259         98.594087    36.489405   

      suffering_from treatment  emergency condition  
0               none      none          0    normal  
1  

In [1]:
import json
import random
from typing import List, Dict, Any

def generate_patient_dataset(num_records: int = 1000) -> List[Dict[str, Any]]:
    """
    Generate a dataset of patient health readings with labels.
    
    Args:
        num_records: Number of patient records to generate
        
    Returns:
        List of dictionaries containing patient health data and labels
    """
    # Define normal ranges for each vital sign
    normal_ranges = {
        "heartRate": (60, 80),
        "stressScore": (30, 60),
        "respirationRate": (12, 18),
        "oxygenSaturation": (95, 100),
        "temperature": (36.5, 37.5)
    }
    
    # Define conditions with their effects on vital signs and whether they can be "normal"
    conditions = [
        {
            "name": "Healthy",
            "probability": 0.5,
            "effects": {},
            "can_be_normal": True
        },
        {
            "name": "HeartCondition",
            "probability": 0.1,
            "effects": {
                "heartRate": (75, 100),  # Higher heart rate range
                "stressScore": (40, 80)  # Higher stress due to heart condition
            },
            "can_be_normal": True  # Can be "normal" for someone with this condition
        },
        {
            "name": "ChronicLungDisease",
            "probability": 0.1,
            "effects": {
                "respirationRate": (16, 24),  # Higher respiration rate
                "oxygenSaturation": (88, 95)  # Lower oxygen saturation
            },
            "can_be_normal": True  # Can be "normal" for someone with this condition
        },
        {
            "name": "Fever",
            "probability": 0.1,
            "effects": {
                "temperature": (37.8, 39.5),  # Higher temperature
                "heartRate": (80, 110),  # Higher heart rate due to fever
                "stressScore": (60, 85)  # Higher stress due to illness
            },
            "can_be_normal": False  # Never "normal" to have a fever
        },
        {
            "name": "Hypoxia",
            "probability": 0.05,
            "effects": {
                "oxygenSaturation": (75, 88),  # Very low oxygen
                "respirationRate": (20, 30),  # Elevated breathing rate
                "heartRate": (90, 120)  # Elevated heart rate compensating for low oxygen
            },
            "can_be_normal": False  # Never "normal" to have hypoxia
        },
        {
            "name": "Hypothermia",
            "probability": 0.05,
            "effects": {
                "temperature": (33.0, 35.5),  # Low body temperature
                "heartRate": (40, 60),  # Lower heart rate
                "respirationRate": (8, 12)  # Lower respiration rate
            },
            "can_be_normal": False  # Never "normal" to have hypothermia
        },
        {
            "name": "AnxietyDisorder",
            "probability": 0.1,
            "effects": {
                "stressScore": (60, 95),  # Very high stress
                "heartRate": (75, 100),  # Elevated heart rate
                "respirationRate": (18, 24)  # Slightly elevated breathing
            },
            "can_be_normal": True  # Can be "normal" for someone with anxiety
        }
    ]
    
    dataset = []
    
    for _ in range(num_records):
        # Randomly select a condition based on probability
        condition = random.choices(
            conditions, 
            weights=[c["probability"] for c in conditions], 
            k=1
        )[0]
        
        # Generate vital signs based on the condition
        patient_data = {}
        for vital, normal_range in normal_ranges.items():
            # If condition affects this vital sign, use its range instead
            if vital in condition["effects"]:
                vital_range = condition["effects"][vital]
            else:
                vital_range = normal_range
                
            # Generate a value within the appropriate range
            if vital == "temperature":
                patient_data[vital] = round(random.uniform(*vital_range), 1)
            else:
                patient_data[vital] = round(random.uniform(*vital_range))
        
        # Determine if the reading is normal based on the condition and actual values
        is_normal = True
        
        # If this condition can never be normal, mark as abnormal
        if not condition["can_be_normal"]:
            is_normal = False
        else:
            # Check each vital sign against normal ranges
            # For conditions that can be normal, check if values are within the condition's expected range
            for vital, value in patient_data.items():
                if vital in condition["effects"]:
                    # For affected vitals, check if within the condition's range
                    condition_range = condition["effects"][vital]
                    if value < condition_range[0] * 0.95 or value > condition_range[1] * 1.05:
                        is_normal = False
                        break
                else:
                    # For unaffected vitals, check against standard normal ranges
                    if value < normal_ranges[vital][0] * 0.95 or value > normal_ranges[vital][1] * 1.05:
                        is_normal = False
                        break
        
        # Add small random chance for anomalies even within expected ranges
        if random.random() < 0.05:
            is_normal = not is_normal
        
        # Set the label
        patient_data["label"] = "normal" if is_normal else "abnormal"
        
        # Add condition info for reference (can be removed for actual training data)
        patient_data["condition"] = condition["name"]
        
        dataset.append(patient_data)
    
    return dataset

def save_data_to_json(dataset: List[Dict[str, Any]], filename: str = "patient_data.json"):
    """Save the generated dataset to a JSON file"""
    with open(filename, "w") as f:
        json.dump(dataset, f, indent=2)
    print(f"Dataset saved to {filename}")

def print_sample_data(dataset: List[Dict[str, Any]], num_samples: int = 10):
    """Print some sample data from the generated dataset"""
    print(f"\nSample data (showing {num_samples} records):")
    
    # Group by normal/abnormal for better visualization
    normal_samples = [d for d in dataset if d["label"] == "normal"][:num_samples//2]
    abnormal_samples = [d for d in dataset if d["label"] == "abnormal"][:num_samples//2]
    
    print("\nNORMAL CASES:")
    for sample in normal_samples:
        print(json.dumps(sample))
    
    print("\nABNORMAL CASES:")
    for sample in abnormal_samples:
        print(json.dumps(sample))

if __name__ == "__main__":
    # Generate dataset
    dataset = generate_patient_dataset(1000)
    
    # Save to file
    save_data_to_json(dataset)
    
    # Print samples
    print_sample_data(dataset)

Dataset saved to patient_data.json

Sample data (showing 10 records):

NORMAL CASES:
{"heartRate": 78, "stressScore": 38, "respirationRate": 23, "oxygenSaturation": 92, "temperature": 37.0, "label": "normal", "condition": "ChronicLungDisease"}
{"heartRate": 61, "stressScore": 37, "respirationRate": 18, "oxygenSaturation": 97, "temperature": 37.1, "label": "normal", "condition": "Healthy"}
{"heartRate": 96, "stressScore": 73, "respirationRate": 17, "oxygenSaturation": 95, "temperature": 37.1, "label": "normal", "condition": "HeartCondition"}
{"heartRate": 73, "stressScore": 36, "respirationRate": 18, "oxygenSaturation": 100, "temperature": 36.8, "label": "normal", "condition": "Healthy"}
{"heartRate": 77, "stressScore": 54, "respirationRate": 16, "oxygenSaturation": 95, "temperature": 37.2, "label": "normal", "condition": "Healthy"}

ABNORMAL CASES:
{"heartRate": 88, "stressScore": 61, "respirationRate": 13, "oxygenSaturation": 99, "temperature": 36.8, "label": "abnormal", "condition": 

In [9]:
import json

def convert_to_llama_format(data):
    """Converts structured health data into LLaMA fine-tuning format."""
    formatted_data = []
    
    for entry in data:
        Instruction = "What is the health condition?Is the paitent's sympotm "
        Input = (f"Patient Data: Heart Rate: {entry['heartRate']}, Stress Score: {entry['stressScore']}, "
                  f"Respiration Rate: {entry['respirationRate']}, Oxygen Saturation: {entry['oxygenSaturation']}, "
                  f"Temperature: {entry['temperature']} The patient is {entry['condition']}")
        Response = f"{entry['label']}"
        
        formatted_data.append({ "instruction": Instruction ,"input": Input, "output": Response})
    
    return formatted_data

with open('patient_data.json', 'r') as file:
    data = json.load(file)

print(data)



formatted_data = convert_to_llama_format(data)
with open("llama_finetune_data.jsonl", "w") as f:
    for item in formatted_data:
        f.write(json.dumps(item) + "\n")

print("Data successfully converted and saved to llama_finetune_data.jsonl")


[{'heartRate': 88, 'stressScore': 61, 'respirationRate': 13, 'oxygenSaturation': 99, 'temperature': 36.8, 'label': 'abnormal', 'condition': 'HeartCondition'}, {'heartRate': 78, 'stressScore': 38, 'respirationRate': 23, 'oxygenSaturation': 92, 'temperature': 37.0, 'label': 'normal', 'condition': 'ChronicLungDisease'}, {'heartRate': 61, 'stressScore': 37, 'respirationRate': 18, 'oxygenSaturation': 97, 'temperature': 37.1, 'label': 'normal', 'condition': 'Healthy'}, {'heartRate': 73, 'stressScore': 57, 'respirationRate': 23, 'oxygenSaturation': 89, 'temperature': 37.2, 'label': 'abnormal', 'condition': 'ChronicLungDisease'}, {'heartRate': 96, 'stressScore': 73, 'respirationRate': 17, 'oxygenSaturation': 95, 'temperature': 37.1, 'label': 'normal', 'condition': 'HeartCondition'}, {'heartRate': 87, 'stressScore': 66, 'respirationRate': 17, 'oxygenSaturation': 96, 'temperature': 37.9, 'label': 'abnormal', 'condition': 'Fever'}, {'heartRate': 73, 'stressScore': 36, 'respirationRate': 18, 'oxyg