#Creation Of Data

In [23]:
import pandas as pd
import numpy as np

# Number of rows
n = 1200

# Generating synthetic user data
np.random.seed(42)

# Age between 18 and 80
age = np.random.randint(18, 80, n)

# Gender: 0 for Female and 1 for Male
gender = np.random.choice(['M', 'F'], n)

# BMI (Body Mass Index) ranging from underweight to obese (18.5 to 40)
bmi = np.round(np.random.uniform(18.5, 40.0, n), 1)

# Smoking Status: 1 for smoker, 0 for non-smoker
smoking_status = np.random.choice([0, 1], n, p=[0.7, 0.3])

# Physical activity level: 1 (Sedentary), 2 (Moderate), 3 (Active), 4 (Very Active)
physical_activity = np.random.choice([1, 2, 3, 4], n, p=[0.35, 0.35, 0.2, 0.1])

# List of medical conditions
medical_conditions = ['Diabetes', 'Hypertension', 'Asthma', 'Heart Disease', 'Obesity', 'Arthritis', 'None',
                      'Depression', 'Anxiety', 'COPD', 'Kidney Disease', 'Osteoporosis', 'Allergies',
                      'High Cholesterol', 'Thyroid Problems', 'Chronic Fatigue']

medical_history_probabilities = [0.12,  # Diabetes
                                 0.15,  # Hypertension
                                 0.08,  # Asthma
                                 0.07,  # Heart Disease
                                 0.10,  # Obesity
                                 0.08,  # Arthritis
                                 0.14,  # None
                                 0.05,  # Depression
                                 0.05,  # Anxiety
                                 0.03,  # COPD
                                 0.02,  # Kidney Disease
                                 0.02,  # Osteoporosis
                                 0.05,  # Allergies
                                 0.06,  # High Cholesterol
                                 0.05,  # Thyroid Problems
                                 0.03]  # Chronic Fatigue

total_sum = sum(medical_history_probabilities)
medical_history_probabilities = [p / total_sum for p in medical_history_probabilities]

# Randomly assign medical history based on corrected probabilities
medical_history = np.random.choice(medical_conditions, n, p=medical_history_probabilities)

# Health tips based on medical conditions
health_tips_map = {
    'Diabetes': [
        'Reduce sugar intake and monitor blood sugar regularly',
        'Follow a low glycemic index diet, avoid processed sugars',
        'Exercise daily to help control blood glucose levels',
        'Include more fiber-rich foods in your diet'
    ],
    'Hypertension': [
        'Limit salt intake and avoid processed foods',
        'Manage stress through relaxation techniques like yoga or meditation',
        'Monitor your blood pressure regularly and stay active',
        'Follow a heart-healthy diet rich in vegetables, fruits, and whole grains'
    ],
    'Asthma': [
        'Use inhalers as prescribed and avoid known allergens',
        'Maintain good air quality in your home, use air purifiers if necessary',
        'Engage in moderate exercise to strengthen your lungs',
        'Avoid smoking and exposure to secondhand smoke'
    ],
    'Heart Disease': [
        'Eat heart-healthy foods, rich in omega-3 fatty acids',
        'Avoid trans fats, high cholesterol foods, and processed foods',
        'Exercise regularly, at least 30 minutes of moderate activity daily',
        'Monitor cholesterol and blood pressure levels frequently'
    ],
    'Obesity': [
        'Follow a calorie-controlled diet, avoid sugary drinks and snacks',
        'Increase your physical activity, aim for at least 150 minutes of exercise per week',
        'Incorporate more vegetables and fruits into your diet',
        'Consult a nutritionist to develop a personalized weight loss plan'
    ],
    'Arthritis': [
        'Exercise gently to maintain mobility and joint flexibility',
        'Maintain a healthy weight to reduce stress on your joints',
        'Use hot/cold therapy to relieve joint pain and inflammation',
        'Consider low-impact exercises like swimming or cycling'
    ],
    'Depression': [
        'Seek therapy or counseling, stay connected with friends and family',
        'Exercise regularly to boost mood and reduce anxiety',
        'Practice mindfulness techniques like meditation or deep breathing',
        'Avoid alcohol and drugs, and focus on maintaining a balanced diet'
    ],
    'Anxiety': [
        'Practice relaxation techniques, avoid caffeine, stay active',
        'Seek therapy or counseling if needed to manage anxiety',
        'Develop a regular sleep routine, and avoid stress triggers',
        'Incorporate calming activities like yoga or reading into your day'
    ],
    'COPD': [
        'Avoid smoking, and use prescribed medications for breathing issues',
        'Engage in pulmonary rehabilitation to strengthen lungs',
        'Stay physically active, but avoid overexertion',
        'Maintain clean air in your living environment, and avoid pollutants'
    ],
    'Kidney Disease': [
        'Limit protein, sodium, and phosphorus intake',
        'Stay hydrated but monitor fluid intake if advised by a doctor',
        'Avoid over-the-counter pain medications like NSAIDs',
        'Maintain regular monitoring of kidney function with a healthcare provider'
    ],
    'Osteoporosis': [
        'Increase calcium and vitamin D intake through diet or supplements',
        'Engage in weight-bearing exercises to strengthen bones',
        'Avoid smoking and limit alcohol consumption',
        'Consider bone density testing if advised by your doctor'
    ],
    'Allergies': [
        'Avoid known allergens, and keep your living space allergen-free',
        'Use antihistamines as needed to manage allergy symptoms',
        'Consider immunotherapy if allergies are severe',
        'Maintain clean air at home with air filters or purifiers'
    ],
    'High Cholesterol': [
        'Reduce intake of saturated fats and trans fats',
        'Increase consumption of fiber-rich foods like oats and vegetables',
        'Exercise regularly to help lower LDL cholesterol levels',
        'Monitor cholesterol levels regularly and avoid high-fat processed foods'
    ],
    'Thyroid Problems': [
        'Follow a diet rich in iodine if hypothyroid, and avoid excess iodine if hyperthyroid',
        'Monitor thyroid hormone levels regularly with your doctor',
        'Incorporate selenium and zinc into your diet to support thyroid function',
        'Avoid goitrogenic foods like soy and certain vegetables if you have thyroid issues'
    ],
    'Chronic Fatigue': [
        'Establish a regular sleep routine, and avoid overexertion',
        'Manage stress through techniques like yoga or meditation',
        'Maintain a balanced diet, and avoid sugar and caffeine crashes',
        'Focus on gentle exercises like walking or stretching to conserve energy'
    ],
    'None': [
        'Maintain a balanced diet and exercise regularly',
        'Avoid smoking and limit alcohol consumption',
        'Stay hydrated and get regular health checkups',
        'Focus on preventive care and monitor overall well-being'
    ]
}

# Function to generate health tips based on medical condition, BMI, smoking status, and activity level
def generate_health_tips(condition, bmi, smoking, activity):
    tips = []

    # Start with a random condition-specific tip from the expanded health_tips_map
    if condition in health_tips_map:
        tips.append(np.random.choice(health_tips_map[condition]))

    # Adjust tips based on BMI
    if bmi >= 30:
        tips.append('Maintain a calorie deficit to lower BMI, consult a nutritionist, increase daily physical activity')
    elif bmi < 18.5:
        tips.append('Focus on gaining healthy weight through calorie-rich nutritious foods')

    # Adjust tips for smokers
    if smoking == 1:
        tips.append('Quit smoking to improve overall health, seek support for cessation')

    # Adjust tips for sedentary activity level
    if activity == 1:
        tips.append('Increase physical activity, aim for at least 30 minutes of moderate exercise daily')
    elif activity == 4:
        tips.append('Maintain an active lifestyle, but be cautious of over-exertion')

    # Combine tips into a single string and return unique recommendations
    return ', '.join(set(tips))

# Generate health tips for each user based on their condition, BMI, smoking status, and activity level
health_tips = [generate_health_tips(medical_history[i], bmi[i], smoking_status[i], physical_activity[i]) for i in range(n)]

df = pd.DataFrame({
    'User ID': np.arange(1, n + 1),
    'Age': age,
    'Gender': gender,
    'BMI': bmi,
    'Smoking Status': smoking_status,
    'Physical Activity Level': physical_activity,
    'Medical History': medical_history
})

df['Recommended Health Tips'] = health_tips

# Save the dataset
df.to_csv('diverse_health_data.csv', index=False)

print(df.head())


   User ID  Age Gender   BMI  Smoking Status  Physical Activity Level  \
0        1   38      M  27.2               1                        1   
1        2   62      F  32.4               0                        1   
2        3   23      M  28.8               0                        2   
3        4   28      F  22.9               0                        3   
4        5   44      F  36.0               0                        2   

    Medical History                            Recommended Health Tips  
0  Thyroid Problems  Quit smoking to improve overall health, seek s...  
1              None  Maintain a calorie deficit to lower BMI, consu...  
2      Hypertension  Monitor your blood pressure regularly and stay...  
3     Heart Disease  Avoid trans fats, high cholesterol foods, and ...  
4           Obesity  Maintain a calorie deficit to lower BMI, consu...  


#Data Preprocessing

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Loading the data
df = pd.read_csv('diverse_health_data.csv')

# Encode categorical variables: Gender(# M=1, F=0) and Medical History
encoder_gender = LabelEncoder()
df['Gender'] = encoder_gender.fit_transform(df['Gender'])

encoder_medical = LabelEncoder()
df['Medical History'] = encoder_medical.fit_transform(df['Medical History'])

# Extract features and target
X = df[['Age', 'Gender', 'Medical History']]
y = df['Recommended Health Tips']

print(X)
print(y)

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


print(pd.DataFrame(X_train_scaled, columns=X.columns).head())


      Age  Gender  Medical History
0      56       0               12
1      69       0                2
2      46       0               10
3      32       0                6
4      60       1               10
...   ...     ...              ...
1195   78       0               15
1196   69       0                7
1197   32       0               12
1198   64       0                0
1199   73       1                0

[1200 rows x 3 columns]
0       Increase your physical activity, aim for at le...
1       Consider low-impact exercises like swimming or...
2       Quit smoking to improve overall health, seek s...
3       Exercise regularly to boost mood and reduce an...
4       Quit smoking to improve overall health, seek s...
                              ...                        
1195    Quit smoking to improve overall health, seek s...
1196    Increase physical activity, aim for at least 3...
1197    Increase your physical activity, aim for at le...
1198    Use antihistamines as nee

#Model Building

In [24]:
from sklearn.neighbors import NearestNeighbors

# Initializing the k-NN model with k=3
knn_model = NearestNeighbors(n_neighbors=3, metric='euclidean')

knn_model.fit(X_train_scaled)

#Getting Recommendations

In [25]:
def get_knn_recommendations(user_profile, knn_model, X, y, scaler):
    # Convert the user_profile into a DataFrame with the same columns as X_train
    user_profile_df = pd.DataFrame([user_profile], columns=X.columns)

    # Scale the input user profile using the same scaler as the training data
    user_profile_scaled = scaler.transform(user_profile_df)

    # Find the 3 nearest neighbors for the user
    distances, indices = knn_model.kneighbors(user_profile_scaled)

    # Get the health tips from the nearest neighbors in the training data
    top_health_tips = []
    for index in indices[0]:
        # Separating concatenated tips
        tips = y.iloc[index].split(', ')
        top_health_tips.extend(tips)

    # Ensuring the recommendations are unique
    unique_health_tips = list(dict.fromkeys(top_health_tips))

    # first 3 unique tips
    return unique_health_tips[:3]

#Example

In [26]:
user_profile_1 = [25, 1, 1]  # [Age, Gender (M=1), Medical History (Hypertension=1)]
user_profile_2 = [40, 1, 6]  # [Age, Gender (M=1), Medical History (Obesity=6)]

top_3_tips_user_1 = get_knn_recommendations(user_profile_1, knn_model, X_train, y_train, scaler)
top_3_tips_user_2 = get_knn_recommendations(user_profile_2, knn_model, X_train, y_train, scaler)

# Function to print the tips in pointers
def print_health_tips(health_tips):
    print("\nTop 3 Health Tips for the user profile:")
    for i, tip in enumerate(health_tips, 1):
        print(f"{i}. {tip}")


print_health_tips(top_3_tips_user_1)
print_health_tips(top_3_tips_user_2)


Top 3 Health Tips for the user profile:
1. Develop a regular sleep routine
2. and avoid stress triggers
3. Maintain a calorie deficit to lower BMI

Top 3 Health Tips for the user profile:
1. Follow a low glycemic index diet
2. avoid processed sugars
3. Maintain a calorie deficit to lower BMI


#Model Evaluation


**1. Alignment with Common Health Advice:**

The k-NN recommendation system performs reasonably well, achieving an accuracy of 51.25%. The generated health tips generally align with standard medical advice:

- Hypertension: Users receive advice like "Limit salt intake" and "Manage stress," which reflects typical hypertension management.
- Diabetes: Recommendations such as "Reduce sugar intake" and "Monitor blood sugar" are in line with standard diabetes care.

However, some tips, like "Exercise regularly" and "Maintain a balanced diet," are too broad and appear across multiple conditions (e.g., Hypertension, Obesity, Heart Disease). This overlap makes the recommendations less distinct and condition-specific.

**2. Real-life Examples:**
- Diabetes (40-year-old male): Tips like "Reduce sugar intake" and "Exercise daily" align well with diabetes management.
- Hypertension (55-year-old female): Tips such as "Limit salt intake" and "Manage stress" follow standard hypertension guidelines.
- Obesity (35-year-old male): Recommendations like "Follow a calorie-controlled diet" and "Increase physical activity" are consistent with common weight management advice.

##Suggested Improvements:
**1. Incorporate More User Data:**

Add features like BMI, smoking status, physical activity level, and dietary habits to make recommendations more personalized.
Condition Severity: Tailor tips based on the severity of conditions, offering more urgent advice for severe cases (e.g., medication reminders for severe diabetes).

**2. Use Collaborative Filtering:**

Leverage user feedback (ratings or selected tips) to refine recommendations. Collaborative filtering can prioritize tips found useful by users with similar profiles.
Item-based filtering: Recommend tips that worked for other users with similar conditions, even if their profiles differ slightly.

**3. Improve Model Specificity:**

Increase k in k-NN to include more diverse recommendations and reduce repetition.
Combine content-based filtering with collaborative filtering to offer more varied and personalized health tips.

In [27]:
def model_evaluation(X_test, y_test, knn_model, scaler, X_train, y_train):
    correct_predictions = 0
    total_predictions = len(X_test)

    for i in range(total_predictions):
        user_profile = X_test.iloc[i].values
        actual_tips = y_test.iloc[i].split(', ')
        predicted_tips = get_knn_recommendations(user_profile, knn_model, X_train, y_train, scaler)

        # Count of the predicted tips match the actual tips
        correct_tips = [tip for tip in predicted_tips if tip in actual_tips]

        if correct_tips:
            correct_predictions += 1

    accuracy = correct_predictions / total_predictions * 100
    print(f"Model Evaluation - Accuracy: {accuracy:.2f}%")

model_evaluation(X_test, y_test, knn_model, scaler, X_train, y_train)

Model Evaluation - Accuracy: 51.25%


Detailed report in this doc:

https://docs.google.com/document/d/10ltMJFT4nfS7z8B-7_xs8iv64FMMklexPySK3R3lF6M/edit?usp=sharing