# Day 5

Fuente: https://www.kaggle.com/datasets/programmerrdai/human-height?select=average-height-by-year-of-birth.csv

In [337]:
import pandas as pd

In [338]:
df = pd.read_csv("average-height-by-year-of-birth.csv", sep=",")
df.shape

(21008, 5)

In [339]:
df = df.dropna()
df.shape

(20301, 5)

In [340]:
df.dtypes

Entity                      object
Code                        object
Year                         int64
Mean male height (cm)      float64
Mean female height (cm)    float64
dtype: object

In [341]:
df.head(10)

Unnamed: 0,Entity,Code,Year,Mean male height (cm),Mean female height (cm)
0,Afghanistan,AFG,1896,161.164095,149.187747
1,Afghanistan,AFG,1897,161.196286,149.321451
2,Afghanistan,AFG,1898,161.228297,149.455494
3,Afghanistan,AFG,1899,161.260727,149.589503
4,Afghanistan,AFG,1900,161.293068,149.723587
5,Afghanistan,AFG,1901,161.325492,149.857573
6,Afghanistan,AFG,1902,161.358355,149.990929
7,Afghanistan,AFG,1903,161.391215,150.123635
8,Afghanistan,AFG,1904,161.423989,150.25514
9,Afghanistan,AFG,1905,161.456764,150.385801


In [342]:
df.describe()

Unnamed: 0,Year,Mean male height (cm),Mean female height (cm)
count,20301.0,20301.0,20301.0
mean,1946.0,168.33772,157.044033
std,29.155478,5.247556,4.794033
min,1896.0,152.884624,140.29005
25%,1921.0,164.581195,153.836776
50%,1946.0,168.315818,157.298663
75%,1971.0,171.683874,160.135626
max,1996.0,182.567309,169.797931


In [343]:
df = df[df["Entity"] == "Spain"]
df.shape

(101, 5)

In [344]:
df.head(5)

Unnamed: 0,Entity,Code,Year,Mean male height (cm),Mean female height (cm)
17473,Spain,ESP,1896,162.290259,151.108934
17474,Spain,ESP,1897,162.373685,151.186757
17475,Spain,ESP,1898,162.457372,151.264801
17476,Spain,ESP,1899,162.541389,151.34294
17477,Spain,ESP,1900,162.625339,151.42094


In [345]:
df["mean male height (m)"] = df["Mean male height (cm)"] / 100
df["mean female height (m)"] = df["Mean female height (cm)"] / 100

In [346]:
df["difference height (m)"] = df["mean male height (m)"] - df["mean female height (m)"]

In [347]:
df.head(5)

Unnamed: 0,Entity,Code,Year,Mean male height (cm),Mean female height (cm),mean male height (m),mean female height (m),difference height (m)
17473,Spain,ESP,1896,162.290259,151.108934,1.622903,1.511089,0.111813
17474,Spain,ESP,1897,162.373685,151.186757,1.623737,1.511868,0.111869
17475,Spain,ESP,1898,162.457372,151.264801,1.624574,1.512648,0.111926
17476,Spain,ESP,1899,162.541389,151.34294,1.625414,1.513429,0.111984
17477,Spain,ESP,1900,162.625339,151.42094,1.626253,1.514209,0.112044


In [348]:
df.to_csv("average-height-by-year-of-birth-spain.csv", sep=",")

In [349]:
import plotly.graph_objects as go

In [350]:
df['Mean male height (cm)'] = df['Mean male height (cm)'] * -1

diverging_chart = go.Figure()

diverging_chart.add_trace(go.Bar(
    x=df['Mean male height (cm)'],
    y=df['Year'],
    orientation='h',
    name='Mean Male Height (cm)',
    marker_color='blue',
    hoverinfo='x+y'
))

diverging_chart.add_trace(go.Bar(
    x=df['Mean female height (cm)'],
    y=df['Year'],
    orientation='h',
    name='Mean Female Height (cm)',
    marker_color='pink',
    hoverinfo='x+y'
))

diverging_chart.update_layout(
    title='Average Height by Year of Birth in Spain (Diverging Chart)',
    barmode='relative',
    height=800,
    width=900,
    yaxis=dict(
        title='Year of Birth',
        autorange='reversed'
    ),
    xaxis=dict(title='Height (cm)'),
    legend=dict(
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    )
)

diverging_chart

In [351]:
df['FiveYearGroup'] = (df['Year'] // 5) * 5 

five_year_grouped = df.groupby('FiveYearGroup').agg({
    'Mean male height (cm)': 'mean',
    'Mean female height (cm)': 'mean'
}).reset_index()

five_year_grouped['Height Difference (cm)'] = abs(five_year_grouped['Mean male height (cm)']) - five_year_grouped['Mean female height (cm)']

In [352]:
decade_grouped['Height Difference (cm)'] = abs(decade_grouped['Mean male height (cm)']) - decade_grouped['Mean female height (cm)']

diverging_chart_decade_vertical = go.Figure()

diverging_chart_decade_vertical.add_trace(go.Bar(
    x=decade_grouped['Decade'],
    y=-decade_grouped['Mean male height (cm)'],
    orientation='v',
    name='Mean Male Height (cm)',
    marker_color='blue',
))


diverging_chart_decade_vertical.add_trace(go.Bar(
    x=decade_grouped['Decade'],
    y=decade_grouped['Mean female height (cm)'],
    orientation='v',
    name='Mean Female Height (cm)',
    marker_color='pink',  
))


diverging_chart_decade_vertical.add_trace(go.Bar(
    x=decade_grouped['Decade'],
    y=decade_grouped['Height Difference (cm)'],
    orientation='v',
    name='Height Difference (cm)',
    marker_color='green', 
))


diverging_chart_decade_vertical.update_layout(
    title='Average Height by Decade of Birth in Spain (Diverging Chart)',
    barmode='relative', 
    height=800,  
    width=900,
    xaxis=dict(
        title='Decade of Birth',
        autorange=True 
    ),
    yaxis=dict(title='Height (cm)'),
    legend=dict(
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    )
)

diverging_chart_decade_vertical


In [368]:
diverging_chart_five_year_horizontal = go.Figure()

# Hombres
diverging_chart_five_year_horizontal.add_trace(go.Bar(
    y=five_year_grouped['FiveYearGroup'],
    x=-five_year_grouped['Mean male height (cm)'],
    orientation='h',
    name='Mean Male Height (m)',
    marker_color='lightblue',
    text=(-five_year_grouped['Mean male height (cm)']/100).round(2),
    textposition='inside',
    insidetextfont=dict(color='black', size=34) 
))


# Diferencia
diverging_chart_five_year_horizontal.add_trace(go.Bar(
    y=five_year_grouped['FiveYearGroup'],
    x=five_year_grouped['Height Difference (cm)'],
    orientation='h',
    name='Mean Height Difference (cm)',
    marker_color='green',  
    text=five_year_grouped['Height Difference (cm)'].round(2),
    textposition='inside',
    insidetextfont=dict(color='white', size=34) 
))


# Mujeres
diverging_chart_five_year_horizontal.add_trace(go.Bar(
    y=five_year_grouped['FiveYearGroup'],
    x=five_year_grouped['Mean female height (cm)'],
    orientation='h',
    name='Mean Female Height (m)',
    marker_color='pink',
    text=(five_year_grouped['Mean female height (cm)']/100).round(2),
    textposition='inside',
    insidetextfont=dict(color='black', size=34)
))


# Anotaciones
diverging_chart_five_year_horizontal.update_layout(
    title='Average Height by 5-Year Group of Birth in Spain',
    barmode='relative',
    height=730,
    width=1450,
    yaxis=dict(
        title='5-Year Group of Birth',
        autorange='reversed' 
    ),
    xaxis=dict(title='Height (m)'),
    legend=dict(
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    ),
    annotations=[
        dict(
            x=0.5, 
            y=-0.12, 
            xref='paper',  
            yref='paper',
            showarrow=False,
            text="Source: Kaggle",
            font=dict(size=18, color="black"),
            align="center"
        ),
        dict(
            x=0.5, 
            y=-0.05, 
            xref='paper',  
            yref='paper',
            showarrow=False,
            text="Author: Sergio Esteban Tarrero",
            font=dict(size=18, color="black"),
            align="center"
        )
    ]
)

diverging_chart_five_year_horizontal

In [359]:
# exportado a html
diverging_chart_five_year_horizontal.write_html("diverging_chart_five_year_horizontal.html")