In [1]:
import plotly.express as px
import numpy as np
import pandas as pd

**What is the distribution of obesity levels in the dataset?**

In [2]:
# Load the dataset
df = pd.read_csv('/kaggle/input/obesity-based-on-eating-habits-and-physical-cond/obesity.csv')

# Visualize the distribution of obesity levels
fig = px.histogram(df, x='NObeyesdad', title='Distribution of Obesity Levels',
                   labels={'NObeyesdad': 'Obesity Level'},
                   category_orders={'NObeyesdad': [0, 1]},
                   color_discrete_sequence=['#1f77b4', '#ff7f0e'])
fig.update_layout(bargap=0.2)
fig.show()


**What is the age distribution of the respondents?**

In [3]:
# Visualize the age distribution
fig = px.histogram(df, x='Age', title='Age Distribution of Respondents',
                   labels={'Age': 'Age'},
                   marginal='box', # Include box plot for better understanding
                   color_discrete_sequence=['#1f77b4'])
fig.update_layout(bargap=0.2)
fig.show()


**How many respondents have a family history of overweight?**

In [4]:
# Visualize the distribution of family history with overweight
fig = px.histogram(df, x='family_history_with_overweight', title='Distribution of Family History with Overweight',
                   labels={'family_history_with_overweight': 'Family History'},
                   category_orders={'family_history_with_overweight': [0, 1]},
                   color_discrete_sequence=['#1f77b4'])
fig.update_layout(bargap=0.2)
fig.show()


**What is the percentage of respondents who frequently eat high-caloric food?**

In [5]:
# Visualize the distribution of respondents who eat high-caloric food frequently
fig = px.pie(df, names='FAVC', title='Percentage of Respondents Eating High-Caloric Food Frequently',
             labels={'FAVC': 'Frequently Eats High-Caloric Food'},
             category_orders={'FAVC': [0, 1]},
             color_discrete_sequence=['#1f77b4', '#ff7f0e'])
fig.show()


**How is the distribution of physical activity levels among respondents?**

In [6]:
# Visualize the distribution of physical activity levels
fig = px.histogram(df, x='FAF', title='Distribution of Physical Activity Levels',
                   labels={'FAF': 'Physical Activity Level'},
                   category_orders={'FAF': [0, 1, 2, 3]},
                   color_discrete_sequence=['#1f77b4'])
fig.update_layout(bargap=0.2)
fig.show()


**What is the correlation between age and the number of main meals a respondent has daily?**

In [7]:
# Visualize the correlation between age and number of main meals
fig = px.scatter(df, x='Age', y='NCP', title='Correlation between Age and Number of Main Meals',
                 labels={'Age': 'Age', 'NCP': 'Number of Main Meals'},
                 color_discrete_sequence=['#1f77b4'])
fig.show()


**How does the primary mode of transportation vary among respondents?**

In [8]:
# Visualize the distribution of primary mode of transportation
transport_columns = ['Automobile', 'Bike', 'Motorbike', 'Public_Transportation', 'Walking']
fig = px.bar(df[transport_columns].sum(), title='Distribution of Primary Mode of Transportation',
             labels={'value': 'Number of Respondents', 'index': 'Transportation Mode'},
             color_discrete_sequence=['#1f77b4'])
fig.update_layout(bargap=0.2)
fig.show()


**Is there a relationship between smoking and monitoring caloric intake?**

In [9]:
# Visualize the relationship between smoking and monitoring caloric intake
fig = px.histogram(df, x='SMOKE', color='SCC',
                   title='Relationship between Smoking and Monitoring Caloric Intake',
                   labels={'SMOKE': 'Smoking', 'SCC': 'Monitors Caloric Intake'},
                   category_orders={'SMOKE': [0, 1], 'SCC': [0, 1]},
                   color_discrete_sequence=['#1f77b4', '#ff7f0e'])
fig.update_layout(bargap=0.2)
fig.show()


**What is the distribution of time spent looking at devices with screens based on gender?**

In [10]:
# Visualize the distribution of time spent on screens based on gender
fig = px.box(df, x='Gender', y='TUE', title='Distribution of Time Spent on Screens by Gender',
             labels={'Gender': 'Gender', 'TUE': 'Time Spent on Screens'},
             category_orders={'Gender': [0, 1]},
             color_discrete_sequence=['#1f77b4'])
fig.show()


**What is the distribution of water intake (CH2O) among respondents?**

In [11]:
# Visualize the distribution of water intake
fig = px.histogram(df, x='CH2O', title='Distribution of Water Intake',
                   labels={'CH2O': 'Water Intake'},
                   category_orders={'CH2O': [0, 1, 2]},
                   color_discrete_sequence=['#1f77b4'])
fig.update_layout(bargap=0.2)
fig.show()


**What is the relationship between the frequency of vegetable consumption (FCVC) and physical activity (FAF)?**

In [12]:
# Visualize the relationship between vegetable consumption and physical activity
fig = px.scatter(df, x='FCVC', y='FAF', title='Relationship between Vegetable Consumption and Physical Activity',
                 labels={'FCVC': 'Vegetable Consumption', 'FAF': 'Physical Activity'},
                 color_discrete_sequence=['#1f77b4'])
fig.show()


**How many respondents monitor their caloric intake (SCC) based on family history with overweight?**

In [13]:
# Visualize the distribution of monitoring caloric intake based on family history
fig = px.histogram(df, x='SCC', color='family_history_with_overweight',
                   title='Monitoring Caloric Intake based on Family History with Overweight',
                   labels={'SCC': 'Monitors Caloric Intake', 'family_history_with_overweight': 'Family History'},
                   category_orders={'SCC': [0, 1], 'family_history_with_overweight': [0, 1]},
                   color_discrete_sequence=['#1f77b4', '#ff7f0e'])
fig.update_layout(bargap=0.2)
fig.show()


**What is the overall distribution of the age group and gender of respondents?**

In [14]:
# Visualize the distribution of age group and gender
fig = px.scatter(df, x='Age', color='Gender', title='Distribution of Age Group and Gender',
                 labels={'Age': 'Age', 'Gender': 'Gender'},
                 color_discrete_sequence=['#1f77b4', '#ff7f0e'])
fig.show()


**Is there any noticeable trend between age and the frequency of high-caloric food consumption (FAVC)?**

In [15]:
# Visualize the trend between age and high-caloric food consumption
fig = px.scatter(df, x='Age', y='FAVC', title='Trend between Age and Frequency of High-Caloric Food Consumption',
                 labels={'Age': 'Age', 'FAVC': 'Frequency of High-Caloric Food Consumption'},
                 color_discrete_sequence=['#1f77b4'])
fig.show()


**What is the distribution of caloric intake monitoring (SCC) among different age groups?**

In [16]:
# Visualize the distribution of caloric intake monitoring across age groups
fig = px.histogram(df, x='SCC', color='Age', title='Distribution of Caloric Intake Monitoring Across Age Groups',
                   labels={'SCC': 'Caloric Intake Monitoring', 'Age': 'Age'},
                   category_orders={'SCC': [0, 1]},
                   color_discrete_sequence=px.colors.sequential.Plasma)
fig.update_layout(bargap=0.2)
fig.show()


**How does the age distribution differ between smokers and non-smokers?**

In [17]:
# Visualize the age distribution difference between smokers and non-smokers
fig = px.box(df, x='SMOKE', y='Age', title='Age Distribution Difference between Smokers and Non-Smokers',
             labels={'SMOKE': 'Smoker', 'Age': 'Age'},
             category_orders={'SMOKE': [0, 1]},
             color_discrete_sequence=['#1f77b4'])
fig.show()


**What is the proportion of respondents who have a family history of overweight (family_history_with_overweight) and monitor their caloric intake (SCC)?**

In [18]:
# Visualize the proportion of family history and caloric intake monitoring
fig = px.sunburst(df, path=['family_history_with_overweight', 'SCC'],
                  title='Proportion of Family History and Caloric Intake Monitoring',
                  labels={'family_history_with_overweight': 'Family History', 'SCC': 'Caloric Intake Monitoring'},
                  color_discrete_sequence=['#1f77b4', '#ff7f0e'])
fig.show()


**What is the distribution of time spent on screens (TUE) for respondents who smoke?**

In [19]:
# Visualize the distribution of time spent on screens for smokers
fig = px.histogram(df, x='TUE', color='SMOKE',
                   title='Distribution of Time Spent on Screens for Smokers',
                   labels={'TUE': 'Time Spent on Screens', 'SMOKE': 'Smoker'},
                   category_orders={'TUE': [0, 1, 2], 'SMOKE': [0, 1]},
                   color_discrete_sequence=['#1f77b4', '#ff7f0e'])
fig.update_layout(bargap=0.2)
fig.show()


**What is the relationship between the number of main meals (NCP) and physical activity levels (FAF)?**

In [20]:
# Visualize the relationship between number of main meals and physical activity levels
fig = px.scatter(df, x='NCP', y='FAF', title='Relationship between Number of Main Meals and Physical Activity Levels',
                 labels={'NCP': 'Number of Main Meals', 'FAF': 'Physical Activity Level'},
                 color_discrete_sequence=['#1f77b4'])
fig.show()


**How does the distribution of caloric intake monitoring (SCC) vary based on the primary mode of transportation?**

In [21]:
# Visualize the distribution of caloric intake monitoring based on transportation mode
fig = px.histogram(df, x='SCC', color='Public_Transportation',
                   title='Distribution of Caloric Intake Monitoring based on Transportation Mode',
                   labels={'SCC': 'Caloric Intake Monitoring', 'Public_Transportation': 'Uses Public Transportation'},
                   category_orders={'SCC': [0, 1], 'Public_Transportation': [0, 1]},
                   color_discrete_sequence=['#1f77b4', '#ff7f0e'])
fig.update_layout(bargap=0.2)
fig.show()
