In [1]:
# importing libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# importing cleaned data (refer to Heart_Ml_Model_draft2.ipynb to see the code for cleaning data)
df = pd.read_csv('/Users/nikgurunathan/Desktop/csv_heart/Heart_cleaned.csv')

##### asked chatgpt to help me rank and assign points to various health/lifestyle risk factors based on how disruptive they are to daily life

from most to least disruptive:
* stroke = 10 pts
* heart attack = 9.5 pts
* COPD = 9 pts
* depressive disorder = 8.5 pts
* blind / vision difficulty = 8.5 pts
* difficulty dressing and bathing = 8.5 pts
* angina = 8 pts
* arthritis = 7.5 pts
* kidney disease = 7.5 pts
* difficulty concentrating = 7 pts
* diabetes = 7 pts
* asthma = 6.5 pts
* difficulty running errands = 6 pts
* deaf / hard of hearing = 6 pts
* skin cancer = 5.5 pts
* calculated bmi < 18.5 - 5 pts
* calculated bmi > 24.9 = 4.5 pts 
(bmi normal range is 18.5-24.9)
* smoker status: current smoker - now smokes every day = 4.5 pts
* e-cigarette usage - use them every day = 4 pts
* smoker status: current smoker - now smokes some days = 3.5 pts
* e-cigarette usage: use them some days - 3 pts
* smoker status: former smoker = 2.5 pts
* e-cigarette usage: not at all (right now) = 2 pts
* smoker status: never smoked = 2 pts
* e-cigarette usage: never used e-cigarettes in my entire life = 2 pts
* alcohol drinkers = 2 pts
* sleep hours < 7 hours = 1.5 pts

In [7]:
risk_points = {
    "Stroke": 10,
    "Heart attack": 9.5,
    "COPD": 9,
    "Depressive disorder": 8.5,
    "Blind / vision difficulty": 8.5,
    "Difficulty dressing and bathing": 8.5,
    "Angina": 8,
    "Arthritis": 7.5,
    "Kidney disease": 7.5,
    "Difficulty concentrating": 7,
    "Diabetes": 7,
    "Asthma": 6.5,
    "Difficulty running errands": 6,
    "Deaf / hard of hearing": 6,
    "Skin cancer": 5.5,
    "Calculated BMI: less than 18.5": 5,
    "Calculated BMI: more than 24.9": 4.5,
    "Alcohol Drinkers": 2,
    "Sleep hours: less than 7": 1.5
}

smokerstatus_points = {
    "current smoker - now smokes every day": 4.5,
    "current smoker - now smokes some days": 3.5,
    "former smoker": 2.5,
    "never smoked": 2
}

ecigaretteusage_points = {
    "use them every day": 4,
    "use them some days": 3,
    "not at all (right now)": 2,
    "never used e-cigarettes in my entire life": 2
}

def risk_calculator(row):
    total_risk = 0

    for factor, points in risk_points.items():
        if factor in row and row(factor) == 1:
            total_risk += points

        if "SmokerStatus" in row:
            smoker_status = row["SmokerStatus"]
            if smoker_status in smokerstatus_points:
                total_risk += smokerstatus_points[smoker_status]

        if "ECigaretteUsage" in row:
            ecig_usage = row["ECigaretteUsage"]
            if ecig_usage in ecigaretteusage_points:
                total_risk += ecigaretteusage_points[ecig_usage]
        
        if row["SleepHours"] < 7.0:
            total_risk += 1.5

        if row["CalculatedBMI"] < 18.5:
            total_risk += 5
        
        if row["CalculatedBMI"] > 24.9:
            total_risk += 4.5

    return total_risk

df["NewTotalRiskPoints"] = df.apply(risk_calculator, axis=1)

df.head()

Unnamed: 0,State,Sex,GeneralHealth,PhysicalHealthDays,MentalHealthDays,LastCheckupTime,PhysicalActivities,SleepHours,RemovedTeeth,HadHeartAttack,...,BlindRisk,DepressiveRisk,WalkingRisk,KidneyRisk,DiabetesRisk,SkinCancerRisk,DeafRisk,SmokerPoints,ECigPoints,NewTotalRiskPoints
0,alabama,female,very good,0.0,0.0,within past year (anytime less than 12 months ...,0,8.0,none of them,0,...,0,0,0,0,3.0,0,0,0,1,161.5
1,alabama,female,excellent,0.0,0.0,within past year (anytime less than 12 months ...,0,6.0,none of them,0,...,0,0,0,0,0.0,2,0,0,0,190.0
2,alabama,female,very good,2.0,3.0,within past year (anytime less than 12 months ...,1,5.0,none of them,0,...,0,0,0,0,0.0,2,0,0,0,190.0
3,alabama,female,excellent,0.0,0.0,within past year (anytime less than 12 months ...,1,7.0,none of them,0,...,0,0,0,0,0.0,0,0,2,0,104.5
4,alabama,female,fair,2.0,0.0,within past year (anytime less than 12 months ...,1,9.0,none of them,0,...,0,0,0,0,0.0,0,0,0,0,76.0


In [8]:
df.to_csv('/Users/nikgurunathan/Desktop/csv_heart/Heart_cleaned_copy.csv', index=False)