In [None]:
# Import dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data_csv = "clean_data.csv"
data_df = pd.read_csv(data_csv)
data_df.head()

In [None]:
data_df['General Health'].value_counts()

# Labels for values
data_df["General Health Category"] = data_df["General Health"].map(lambda x: 
                                                 "1 Poor" if x == 1 else 
                                                 ("2 Fair" if x == 2 else
                                                  ("3 Good" if x == 3 else
                                                   ("4 Very Good" if x == 4 else
                                                    ( "5 Excellent")))))

data_df["Fruit & Veg Servings"] = data_df["Eaten Fruits or Veggies Yesterday"].map(lambda x: 
                                                 "None" if x == 1 else 
                                                 ("1-4 servings" if x == 2 else
                                                  (">=5 servings")))

data_df["Exercise Y/N"] = data_df["Exercise"].map(lambda x: "No" if x == 0 else "Yes")

data_df["Gender"] = data_df["Sex"].map(lambda x: "Female" if x == 0 else "Male")

data_df["Age Category"] = data_df["Age Group"].map(lambda x: 
                                                 "18-24yrs" if x == 1 else 
                                                 ("25-29 yrs" if x == 2 else
                                                  ("30-44 yrs" if x == 3 else
                                                   ("45-64 yrs" if x == 4 else
                                                    ("65+ yrs")))))

data_df = data_df.sort_values('General Health',ascending=True)
data_df.head()

In [None]:
gender_count = data_df["Gender"].value_counts()
print(gender_count)
diet_count = data_df["Fruit & Veg Servings"].count()
print(diet_count)
workout_count = data_df["Exercise Y/N"].value_counts()
print(workout_count)
age_count = data_df["Age Category"].value_counts()
print(age_count)

In [None]:
sns.set(style="darkgrid")
flatui = ["#9b59b6", "#3498db","#2ecc71"]
sns.countplot(x="General Health Category", hue="Fruit & Veg Servings",data=data_df,palette=flatui,hue_order=["None","1-4 servings",">=5 servings"]).set_title("General Health by Diet\nTotal: 7,913")
plt.savefig("fruitveggie.png")

In [None]:
grouped_gender = data_df.groupby(['General Health Category','Exercise Y/N'])['General Health Category'].count().unstack('Exercise Y/N')
grouped_gender[['Yes','No']].plot(kind='bar',edgecolor="black",stacked=True)

plt.ylabel("Count")
plt.title("General Health by Exercise\nExercise: 6,019 & No Exercise: 1,894")
plt.legend (loc = "upper right")
plt.xticks(rotation=0)
plt.show()
plt.savefig("exercisestack.png")

In [None]:
sns.countplot(x="General Health Category", hue="Exercise Y/N", data=data_df).set_title("General Health by Exercise\nExercise: 6,019 & No Exercise: 1,894")
plt.savefig("exercisesbs.png")

In [None]:
bars = ("Excellent","Very Good","Good","Fair","Poor")

grouped_gender = data_df.groupby(['General Health Category','Gender'])['General Health Category'].count().unstack('Gender')
grouped_gender[['Male','Female']].plot(kind='bar',edgecolor="white",stacked=True)
                                                        
plt.ylabel("Count")
plt.title("General Health by Gender\nFemale: 4,550 & Male 3,363")
plt.legend (loc = "upper right")
plt.xticks(rotation=0)
plt.show()
plt.savefig("genderstack.png")

In [None]:
sns.countplot(x="General Health Category", hue="Gender",palette="muted", data=data_df).set_title("General Health by Gender\nFemale: 4,550 & Male 3,363")
plt.savefig("gendersbs.png")

In [None]:
#Create a dataframe for each sex
male = data_df[data_df["Sex"] == 1]
female = data_df[data_df["Sex"] == 0]

In [None]:
m_count = male["Age Category"].value_counts()
print(m_count)
f_count = female["Age Category"].value_counts()
print(f_count)

In [None]:
m_count = male["Gender"].value_counts()
print(m_count)
f_count = female["Gender"].value_counts()
print(f_count)

In [None]:
#General Health of Males by Age Group
group_male = male.groupby(['General Health Category','Age Category'])['General Health Category'].count().unstack('Age Category')
group_male[['18-24yrs','25-29 yrs','30-44 yrs','45-64 yrs','65+ yrs']].plot(kind="bar")

plt.ylabel("Count")
plt.title("General Health of Males by Age Group")
plt.xticks(rotation=0)
plt.ylim(0,600)
plt.savefig("malesbyage.png")
plt.show()

In [None]:
#General Health of Males by Age Group
group_male = male.groupby(['Age Category','General Health Category'])['Age Category'].count().unstack('General Health Category')
group_male[['1 Poor','2 Fair','3 Good','4 Very Good','5 Excellent']].plot(kind="bar")

plt.ylabel("Count")
plt.title("General Health of Males by Age Group")
plt.xticks(rotation=0)
plt.ylim(0,600)
plt.savefig("malesbyage2.png")
plt.show()

In [None]:
#General Health of Females by Age Group

group_female = female.groupby(['General Health Category','Age Category'])['General Health Category'].count().unstack('Age Category')
group_female[['18-24yrs','25-29 yrs','30-44 yrs','45-64 yrs','65+ yrs']].plot(kind="bar")

plt.ylabel("Count")
plt.title("General Health of Females by Age Group")
plt.xticks(rotation=0)
plt.ylim(0,600)
plt.savefig("femalebyage.png")
plt.show()

In [None]:
#General Health of Females by Age Group
group_female = female.groupby(['Age Category','General Health Category'])['Age Category'].count().unstack('General Health Category')
group_female[['1 Poor','2 Fair','3 Good','4 Very Good','5 Excellent']].plot(kind="bar")
#group_female = female.groupby(['Age Category'])

plt.ylabel("Count")
plt.title("General Health of Females by Age Group")
plt.xticks(rotation=0)
plt.ylim(0,600)
plt.savefig("femalebyage2.png")
plt.show()