**Food Allergy Analysis**


We parsed through a open source data set containing over 3000 records of information about children's allergies. By analyzing this data, we have determined the distribution and prevalence of these allergies. This analysis allows us to understand the proportion of different allergies among the children and identify which allergies are the most common and rare.

In [56]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

df = pd.read_csv('data.csv')
cols = [col for col in df.columns if 'ALG_END' in col]
labels = [name.replace('_ALG_END', '').title() for name in cols if 'END' in name]
values = []
for col in cols:
    if df[col].isnull().sum() == len(df[col]):
        values.append(0)
    else:
        num = 0
        for row in range(len(df[col])):
            if not np.isnan(df[col][row]):
                num += 1
        values.append(num)

total_values = sum(values)
percentage_threshold = 2

filtered_labels = []
filtered_values = []
for label, value in zip(labels, values):
    percentage = (value / total_values) * 100
    if percentage >= percentage_threshold:
        filtered_labels.append(label)
        filtered_values.append(value)

fig = go.Figure(data=[go.Pie(labels=filtered_labels, values=filtered_values, hovertemplate="%{label}: %{value}<br>(%{percent})")])
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(title='Pie Chart of Most Common Food Allergies', showlegend=False)

fig.show()

small_values_labels = []
small_values_values = []
for label, value in zip(labels, values):
    percentage = (value / total_values) * 100
    if percentage < percentage_threshold:
        small_values_labels.append(label)
        small_values_values.append(value)

fig_small = go.Figure([go.Bar(x=small_values_labels, y=small_values_values)])
fig_small.update_layout(title='Less Common Allergies')
fig_small.show()