# Baby names in the United States

In [1]:
import os
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# List all files in the directory
folder_path = "names/"
files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]

# Sort the files in ascending order (e.g., by filename)
files.sort()

# Initialize an empty DataFrame
df_baby_names = pd.DataFrame()

# Specify the column names (update with actual names as needed)
column_names = ["Name", "Gender", "Count"]

current_year = 1880

# Loop through the files and read them into the DataFrame
for file in files:
    file_path = os.path.join(folder_path, file)

    temp_df = pd.read_csv(file_path, names=column_names, header=None)  # Adjust sep if needed
    
    temp_df["Year"] = current_year

    df_baby_names = pd.concat([df_baby_names, temp_df], ignore_index=True)
    current_year +=1


df_baby_names.head(20)

Unnamed: 0,Name,Gender,Count,Year
0,Mary,F,7065,1880
1,Anna,F,2604,1880
2,Emma,F,2003,1880
3,Elizabeth,F,1939,1880
4,Minnie,F,1746,1880
5,Margaret,F,1578,1880
6,Ida,F,1472,1880
7,Alice,F,1414,1880
8,Bertha,F,1320,1880
9,Sarah,F,1288,1880


# Generations by birth year  

## The Greatest Generation (1901 - 1927) 


In [2]:
# Filter the DataFrame for The Greatest Generation years
df_greatest_generation = df_baby_names[["Name", "Year", "Gender", "Count"]][
    (df_baby_names.Year >= 1901) & (df_baby_names.Year <= 1927)
]


# Group by "Name" and "Gender" to calculate the total count for each gender
grouped = df_greatest_generation.groupby(["Name", "Gender"], as_index=False).agg({"Count": "sum"})

# Find the most popular gender for each name
most_popular_gender = (
    grouped.loc[grouped.groupby("Name")["Count"].idxmax()]  
    .rename(columns={"Count": "MaxGenderCount"})            
    [["Name", "Gender"]]                                    
)

# Merge the most popular gender back into the original DataFrame
df_greatest_generation = (
    df_greatest_generation.groupby("Name", as_index=False)
    .agg({
        "Year": "min",          # Earliest year
        "Count": "sum"          # Total count
    })
    .merge(most_popular_gender, on="Name", how="left")       
)

# Sort by Count in descending order
df_greatest_generation = df_greatest_generation.sort_values(by="Count", ascending=False)

bar_chart = go.Bar(
    x=df_greatest_generation.iloc[:20]["Name"],
    y=df_greatest_generation.iloc[:20]["Count"],
    text=df_greatest_generation["Count"],
    textposition='auto',
    name="Name Popularity",
    marker=dict(color='teal')
)

gender_distribution = df_greatest_generation["Gender"].value_counts()
pie_chart = go.Pie(
    labels=gender_distribution.index,
    values=gender_distribution.values,
    name="Gender Distribution"
)

# Retrieves the 5 most popular names for the Generation
df_line_chart_name = df_greatest_generation.iloc[:5][["Name", "Gender"]] 

# Return only the names in the previous dataset from the original dataset
df_filtered = df_baby_names[df_baby_names["Name"].isin(df_line_chart_name["Name"])] 

# Filter the filtered dataset to only return the years between 1901 to 1927
df_filtered = df_filtered[(df_filtered["Year"] >= 1901) & (df_filtered["Year"] <= 1927)]


# Return all the fields Name, Year, Count, Gender
df_line_chart = df_filtered[["Name", "Year", "Count", "Gender"]] 

# Merge the previous df_line_chart_name with df_line_chart to return the right names with most popular gender by years in the generation
df_line_chart_actual = pd.merge(df_line_chart, df_line_chart_name, on=['Name', 'Gender'], how='inner')

line_chart = go.Figure()

# Group by "Name" and iterate through the groups directly
for name, group in df_line_chart_actual.groupby("Name"):
    line_chart.add_trace(go.Scatter(
        x=group["Year"],
        y=group["Count"],
        mode='lines+markers',
        name=name,  
        line=dict(width=1),
        marker=dict(size=6),
        text=group["Name"]  
    ))

# Update Layout for Better Visualization
line_chart.update_layout(
    title="Popularity of Names Over Time",
    xaxis_title="Year",
    yaxis_title="Count",
    template="plotly_white",
    showlegend=True,
)

# Create a Subplot Layout
fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "bar"}, {"type": "pie"}], [{"type": "scatter", "colspan": 2}, None]],
    subplot_titles=("Most Popular Names", "Gender Distribution", "Popularity Over Time")
)

# Add the Bar Chart and Pie Chart to the Subplot
fig.add_trace(bar_chart, row=1, col=1)
fig.add_trace(pie_chart, row=1, col=2)

# Add the scatter plot from the line_chart to the subplot
for trace in line_chart.data:
    fig.add_trace(trace, row=2, col=1)

# Update Layout
fig.update_layout(
    title_text="The Greatest Generation Name Analysis Dashboard",
    title_x=0.5,
    height=700,
    showlegend=True,
    template="plotly_white",
    xaxis_title="Names",
    yaxis_title="Count"
)
# Display the Dashboard
fig.show()


## The Silent Generation (1928 - 1945) 

In [3]:
# Filter the DataFrame for Silent Generation years
df_silent_generation = df_baby_names[["Name", "Year", "Gender", "Count"]][
    (df_baby_names.Year >= 1928) & (df_baby_names.Year <= 1945)
]

# Group by "Name" and "Gender" to calculate the total count for each gender
grouped = df_silent_generation.groupby(["Name", "Gender"], as_index=False).agg({"Count": "sum"})

# Find the most popular gender for each name
most_popular_gender = (
    grouped.loc[grouped.groupby("Name")["Count"].idxmax()]  
    .rename(columns={"Count": "MaxGenderCount"})            
    [["Name", "Gender"]]                                    
)

# Merge the most popular gender back into the original DataFrame
df_silent_generation = (
    df_silent_generation.groupby("Name", as_index=False)
    .agg({
        "Year": "min",          
        "Count": "sum"          
    })
    .merge(most_popular_gender, on="Name", how="left")       
)

# Sort by Count in descending order
df_silent_generation = df_silent_generation.sort_values(by="Count", ascending=False)

bar_chart = go.Bar(
    x=df_silent_generation.iloc[:20]["Name"],
    y=df_silent_generation.iloc[:20]["Count"],
    text=df_silent_generation["Count"],
    textposition='auto',
    name="Name Popularity",
    marker=dict(color='teal')
)

gender_distribution = df_silent_generation["Gender"].value_counts()
pie_chart = go.Pie(
    labels=gender_distribution.index,
    values=gender_distribution.values,
    name="Gender Distribution"
)

# Retrieves the 5 most popular names for the Generation
df_line_chart_name = df_silent_generation.iloc[:5][["Name", "Gender"]] 

# Return only the names in the previous dataset from the original dataset
df_filtered = df_baby_names[df_baby_names["Name"].isin(df_line_chart_name["Name"])] 

# Filter the filtered dataset to only return the years between 1928 and 1945 
df_filtered = df_filtered[(df_filtered["Year"] >= 1928) & (df_filtered["Year"] <= 1945)]

# Return all the fields Name, Year, Count, Gender
df_line_chart = df_filtered[["Name", "Year", "Count", "Gender"]] 

# Merge the previous df_line_chart_name with df_line_chart to return the right names with most popular gender by years in the generation
df_line_chart_actual = pd.merge(df_line_chart, df_line_chart_name, on=['Name', 'Gender'], how='inner')

line_chart = go.Figure()

# Group by "Name" and iterate through the groups directly
for name, group in df_line_chart_actual.groupby("Name"):
    line_chart.add_trace(go.Scatter(
        x=group["Year"],
        y=group["Count"],
        mode='lines+markers',
        name=name,  
        line=dict(width=1),
        marker=dict(size=6),
        text=group["Name"]
    ))

# Update Layout for Better Visualization
line_chart.update_layout(
    title="Popularity of Names Over Time",
    xaxis_title="Year",
    yaxis_title="Count",
    template="plotly_white",
    showlegend=True,
)

# Subplot Layout
fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "bar"}, {"type": "pie"}], [{"type": "scatter", "colspan": 2}, None]],
    subplot_titles=("Most Popular Names", "Gender Distribution", "Popularity Over Time")
)

# Add the Bar Chart and Pie Chart to the Subplot
fig.add_trace(bar_chart, row=1, col=1)
fig.add_trace(pie_chart, row=1, col=2)

# Add the scatter plot from the line_chart to the subplot
for trace in line_chart.data:
    fig.add_trace(trace, row=2, col=1)

# Update Layout for Better Visualization
fig.update_layout(
    title_text="Silent Generation Name Analysis Dashboard",
    title_x=0.5,
    height=700,
    showlegend=True,
    template="plotly_white",
    xaxis_title="Names",
    yaxis_title="Count"
)
# Display the Dashboard
fig.show()

## The Baby Boom Generation (1946 - 1964) 

In [4]:
# Filter the DataFrame for The Baby Boom Generation years
df_baby_boom_generation = df_baby_names[["Name", "Year", "Gender", "Count"]][
    (df_baby_names.Year >= 1946) & (df_baby_names.Year <= 1964)
]

# Group by "Name" and "Gender" to calculate the total count for each gender
grouped = df_baby_boom_generation.groupby(["Name", "Gender"], as_index=False).agg({"Count": "sum"})

# Find the most popular gender for each name
most_popular_gender = (
    grouped.loc[grouped.groupby("Name")["Count"].idxmax()]  
    .rename(columns={"Count": "MaxGenderCount"})            
    [["Name", "Gender"]]                                    
)

# Merge the most popular gender back into the original DataFrame
df_baby_boom_generation = (
    df_baby_boom_generation.groupby("Name", as_index=False)
    .agg({
        "Year": "min",         
        "Count": "sum"         
    })
    .merge(most_popular_gender, on="Name", how="left")     
)

# Sort by Count in descending order
df_baby_boom_generation = df_baby_boom_generation.sort_values(by="Count", ascending=False)

bar_chart = go.Bar(
    x=df_baby_boom_generation.iloc[:20]["Name"],
    y=df_baby_boom_generation.iloc[:20]["Count"],
    text=df_baby_boom_generation["Count"],
    textposition='auto',
    name="Name Popularity",
    marker=dict(color='teal')
)

gender_distribution = df_baby_boom_generation["Gender"].value_counts()
pie_chart = go.Pie(
    labels=gender_distribution.index,
    values=gender_distribution.values,
    name="Gender Distribution"
)

# Retrieves the 5 most popular names for the Generation
df_line_chart_name = df_baby_boom_generation.iloc[:5][["Name", "Gender"]] 

# Return only the names in the previous dataset from the original dataset
df_filtered = df_baby_names[df_baby_names["Name"].isin(df_line_chart_name["Name"])] 

# Filter the filtered dataset to only return the years between 1946 and 1964 
df_filtered = df_filtered[(df_filtered["Year"] >= 1946) & (df_filtered["Year"] <= 1964)]

# Return all the fields Name, Year, Count, Gender
df_line_chart = df_filtered[["Name", "Year", "Count", "Gender"]] 

# Merge the previous df_line_chart_name with df_line_chart to return the right names with most popular gender by years in the generation
df_line_chart_actual = pd.merge(df_line_chart, df_line_chart_name, on=['Name', 'Gender'], how='inner')

line_chart = go.Figure()

# Group by "Name" and iterate through the groups directly
for name, group in df_line_chart_actual.groupby("Name"):
    line_chart.add_trace(go.Scatter(
        x=group["Year"],
        y=group["Count"],
        mode='lines+markers',
        name=name,  
        line=dict(width=1),
        marker=dict(size=6),
        text=group["Name"] 
    ))

# Update Layout for Better Visualization
line_chart.update_layout(
    title="Popularity of Names Over Time",
    xaxis_title="Year",
    yaxis_title="Count",
    template="plotly_white",
    showlegend=True,
)

# Subplot Layout
fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "bar"}, {"type": "pie"}], [{"type": "scatter", "colspan": 2}, None]],
    subplot_titles=("Most Popular Names", "Gender Distribution", "Popularity Over Time")
)

# Add the Bar Chart and Pie Chart to the Subplot
fig.add_trace(bar_chart, row=1, col=1)
fig.add_trace(pie_chart, row=1, col=2)

# Add the scatter plot from the line_chart to the subplot
for trace in line_chart.data:
    fig.add_trace(trace, row=2, col=1)

# Update Layout for Better Visualization
fig.update_layout(
    title_text="Baby Boom Generation Name Analysis Dashboard",
    title_x=0.5,
    height=700,
    showlegend=True,
    template="plotly_white",
    xaxis_title="Names",
    yaxis_title="Count"
)
# Display the Dashboard
fig.show()

## Generation X (1965 - 1980)  

In [5]:
# Filter the DataFrame for The Generation X years
df_generation_x = df_baby_names[["Name", "Year", "Gender", "Count"]][
    (df_baby_names.Year >= 1965) & (df_baby_names.Year <= 1980)
]

# Group by "Name" and "Gender" to calculate the total count for each gender
grouped = df_generation_x.groupby(["Name", "Gender"], as_index=False).agg({"Count": "sum"})

# Find the most popular gender for each name
most_popular_gender = (
    grouped.loc[grouped.groupby("Name")["Count"].idxmax()] 
    .rename(columns={"Count": "MaxGenderCount"})           
    [["Name", "Gender"]]                                   
)

# Merge the most popular gender back into the original DataFrame
df_generation_x = (
    df_generation_x.groupby("Name", as_index=False)
    .agg({
        "Year": "min",         
        "Count": "sum"         
    })
    .merge(most_popular_gender, on="Name", how="left")       # Add the most popular gender
)

# Sort by Count in descending order
df_generation_x = df_generation_x.sort_values(by="Count", ascending=False)

bar_chart = go.Bar(
    x=df_generation_x.iloc[:20]["Name"],
    y=df_generation_x.iloc[:20]["Count"],
    text=df_generation_x["Count"],
    textposition='auto',
    name="Name Popularity",
    marker=dict(color='teal')
)

gender_distribution = df_generation_x["Gender"].value_counts()
pie_chart = go.Pie(
    labels=gender_distribution.index,
    values=gender_distribution.values,
    name="Gender Distribution"
)

# Retrieves the 5 most popular names for the Generation
df_line_chart_name = df_generation_x.iloc[:5][["Name", "Gender"]] 

# Return only the names in the previous dataset from the original dataset
df_filtered = df_baby_names[df_baby_names["Name"].isin(df_line_chart_name["Name"])] 

# Filter the filtered dataset to only return the years between 1965 and 1980 
df_filtered = df_filtered[(df_filtered["Year"] >= 1965) & (df_filtered["Year"] <= 1980)]

# Return all the fields Name, Year, Count, Gender
df_line_chart = df_filtered[["Name", "Year", "Count", "Gender"]] 

# Merge the previous df_line_chart_name with df_line_chart to return the right names with most popular gender by years in the generation
df_line_chart_actual = pd.merge(df_line_chart, df_line_chart_name, on=['Name', 'Gender'], how='inner')

line_chart = go.Figure()

# Group by "Name" and iterate through the groups directly
for name, group in df_line_chart_actual.groupby("Name"):
    line_chart.add_trace(go.Scatter(
        x=group["Year"],
        y=group["Count"],
        mode='lines+markers',
        name=name,  
        line=dict(width=1),
        marker=dict(size=6),
        text=group["Name"]  
    ))

# Update Layout for Better Visualization
line_chart.update_layout(
    title="Popularity of Names Over Time",
    xaxis_title="Year",
    yaxis_title="Count",
    template="plotly_white",
    showlegend=True,
)

# Create a Subplot Layout
fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "bar"}, {"type": "pie"}], [{"type": "scatter", "colspan": 2}, None]],
    subplot_titles=("Most Popular Names", "Gender Distribution", "Popularity Over Time")
)

# Add the Bar Chart and Pie Chart to the Subplot
fig.add_trace(bar_chart, row=1, col=1)
fig.add_trace(pie_chart, row=1, col=2)

# Add the scatter plot from the line_chart to the subplot
for trace in line_chart.data:
    fig.add_trace(trace, row=2, col=1)

# Update Layout for Better Visualization
fig.update_layout(
    title_text="Generation X Name Analysis Dashboard",
    title_x=0.5,
    height=700,
    showlegend=True,
    template="plotly_white",
    xaxis_title="Names",
    yaxis_title="Count"
)
# Display the Dashboard
fig.show()

## Millennial Generation (1981 - 1996) 

In [6]:
# Filter the DataFrame for The Millennial Generation years
df_millennial_generation = df_baby_names[["Name", "Year", "Gender", "Count"]][
    (df_baby_names.Year >= 1981) & (df_baby_names.Year <= 1996)
]

# Group by "Name" and "Gender" to calculate the total count for each gender
grouped = df_millennial_generation.groupby(["Name", "Gender"], as_index=False).agg({"Count": "sum"})

# Find the most popular gender for each name
most_popular_gender = (
    grouped.loc[grouped.groupby("Name")["Count"].idxmax()] 
    .rename(columns={"Count": "MaxGenderCount"})           
    [["Name", "Gender"]]                                   
)

# Merge the most popular gender back into the original DataFrame
df_millennial_generation = (
    df_millennial_generation.groupby("Name", as_index=False)
    .agg({
        "Year": "min",         
        "Count": "sum"         
    })
    .merge(most_popular_gender, on="Name", how="left")    
)

# Sort by Count in descending order
df_millennial_generation = df_millennial_generation.sort_values(by="Count", ascending=False)

bar_chart = go.Bar(
    x=df_millennial_generation.iloc[:20]["Name"],
    y=df_millennial_generation.iloc[:20]["Count"],
    text=df_millennial_generation["Count"],
    textposition='auto',
    name="Name Popularity",
    marker=dict(color='teal')
)

gender_distribution = df_millennial_generation["Gender"].value_counts()
pie_chart = go.Pie(
    labels=gender_distribution.index,
    values=gender_distribution.values,
    name="Gender Distribution"
)

# Retrieves the 5 most popular names for the Generation
df_line_chart_name = df_millennial_generation.iloc[:5][["Name", "Gender"]] 

# Return only the names in the previous dataset from the original dataset
df_filtered = df_baby_names[df_baby_names["Name"].isin(df_line_chart_name["Name"])] 

# Filter the filtered dataset to only return the years between 1981 and 1996
df_filtered = df_filtered[(df_filtered["Year"] >= 1981) & (df_filtered["Year"] <= 1996)]

# Return all the fields Name, Year, Count, Gender
df_line_chart = df_filtered[["Name", "Year", "Count", "Gender"]] 

# Merge the previous df_line_chart_name with df_line_chart to return the right names with most popular gender by years in the generation
df_line_chart_actual = pd.merge(df_line_chart, df_line_chart_name, on=['Name', 'Gender'], how='inner')

line_chart = go.Figure()

# Group by "Name" and iterate through the groups directly
for name, group in df_line_chart_actual.groupby("Name"):
    line_chart.add_trace(go.Scatter(
        x=group["Year"],
        y=group["Count"],
        mode='lines+markers',
        name=name, 
        line=dict(width=1),
        marker=dict(size=6),
        text=group["Name"] 
    ))

# Update Layout for Better Visualization
line_chart.update_layout(
    title="Popularity of Names Over Time",
    xaxis_title="Year",
    yaxis_title="Count",
    template="plotly_white",
    showlegend=True,
)

# Create a Subplot Layout
fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "bar"}, {"type": "pie"}], [{"type": "scatter", "colspan": 2}, None]],
    subplot_titles=("Most Popular Names", "Gender Distribution", "Popularity Over Time")
)

# Add the Bar Chart and Pie Chart to the Subplot
fig.add_trace(bar_chart, row=1, col=1)
fig.add_trace(pie_chart, row=1, col=2)

# Add the scatter plot from the line_chart to the subplot
for trace in line_chart.data:
    fig.add_trace(trace, row=2, col=1)

# Update Layout for Better Visualization
fig.update_layout(
    title_text="Millenial Generation Name Analysis Dashboard",
    title_x=0.5,
    height=700,
    showlegend=True,
    template="plotly_white",
    xaxis_title="Names",
    yaxis_title="Count"
)
# Display the Dashboard
fig.show()


## Generation Z (1997 - 2009) 

In [7]:
# Filter the DataFrame for The Generation Z years
df_generation_z = df_baby_names[["Name", "Year", "Gender", "Count"]][
    (df_baby_names.Year >= 1997) & (df_baby_names.Year <= 2009)
]

# Group by "Name" and "Gender" to calculate the total count for each gender
grouped = df_generation_z.groupby(["Name", "Gender"], as_index=False).agg({"Count": "sum"})

# Find the most popular gender for each name
most_popular_gender = (
    grouped.loc[grouped.groupby("Name")["Count"].idxmax()] 
    .rename(columns={"Count": "MaxGenderCount"})           
    [["Name", "Gender"]]                                   
)

# Merge the most popular gender back into the original DataFrame
df_generation_z = (
    df_generation_z.groupby("Name", as_index=False)
    .agg({
        "Year": "min",       
        "Count": "sum"       
    })
    .merge(most_popular_gender, on="Name", how="left")
)

# Sort by Count in descending order
df_generation_z = df_generation_z.sort_values(by="Count", ascending=False)

bar_chart = go.Bar(
    x=df_generation_z.iloc[:20]["Name"],
    y=df_generation_z.iloc[:20]["Count"],
    text=df_generation_z["Count"],
    textposition='auto',
    name="Name Popularity",
    marker=dict(color='teal')
)

gender_distribution = df_generation_z["Gender"].value_counts()
pie_chart = go.Pie(
    labels=gender_distribution.index,
    values=gender_distribution.values,
    name="Gender Distribution"
)

# Retrieves the 5 most popular names for the Generation
df_line_chart_name = df_generation_z.iloc[:5][["Name", "Gender"]] 

# Return only the names in the previous dataset from the original dataset
df_filtered = df_baby_names[df_baby_names["Name"].isin(df_line_chart_name["Name"])] 

# Filter the filtered dataset to only return the years between 1997 and 2009
df_filtered = df_filtered[(df_filtered["Year"] >= 1997) & (df_filtered["Year"] <= 2009)]

# Return all the fields Name, Year, Count, Gender
df_line_chart = df_filtered[["Name", "Year", "Count", "Gender"]] 

# Merge the previous df_line_chart_name with df_line_chart to return the right names with most popular gender by years in the generation
df_line_chart_actual = pd.merge(df_line_chart, df_line_chart_name, on=['Name', 'Gender'], how='inner')

line_chart = go.Figure()

# Group by "Name" and iterate through the groups directly
for name, group in df_line_chart_actual.groupby("Name"):
    line_chart.add_trace(go.Scatter(
        x=group["Year"],
        y=group["Count"],
        mode='lines+markers',
        name=name,  
        line=dict(width=1),
        marker=dict(size=6),
        text=group["Name"]
    ))

# Update Layout for Better Visualization
line_chart.update_layout(
    title="Popularity of Names Over Time",
    xaxis_title="Year",
    yaxis_title="Count",
    template="plotly_white",
    showlegend=True,
)

# Create a Subplot Layout
fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "bar"}, {"type": "pie"}], [{"type": "scatter", "colspan": 2}, None]],
    subplot_titles=("Most Popular Names", "Gender Distribution", "Popularity Over Time")
)

# Add the Bar Chart and Pie Chart to the Subplot
fig.add_trace(bar_chart, row=1, col=1)
fig.add_trace(pie_chart, row=1, col=2)

# Add the scatter plot from the line_chart to the subplot
for trace in line_chart.data:
    fig.add_trace(trace, row=2, col=1)

# Update Layout for Better Visualization
fig.update_layout(
    title_text="Generation Z Name Analysis Dashboard",
    title_x=0.5,
    height=700,
    showlegend=True,
    template="plotly_white",
    xaxis_title="Names",
    yaxis_title="Count"
)
# Display the Dashboard
fig.show()

## Generation Alpha (2010 - 2024)  

In [8]:
# Filter the DataFrame for The Generation Alpha years
df_generation_alpha = df_baby_names[["Name", "Year", "Gender", "Count"]][
    df_baby_names.Year >= 2010
]

# Group by "Name" and "Gender" to calculate the total count for each gender
grouped = df_generation_alpha.groupby(["Name", "Gender"], as_index=False).agg({"Count": "sum"})

# Find the most popular gender for each name
most_popular_gender = (
    grouped.loc[grouped.groupby("Name")["Count"].idxmax()]  
    .rename(columns={"Count": "MaxGenderCount"})            
    [["Name", "Gender"]]                                    
)

# Merge the most popular gender back into the original DataFrame
df_generation_alpha = (
    df_generation_alpha.groupby("Name", as_index=False)
    .agg({
        "Year": "min",          
        "Count": "sum"          
    })
    .merge(most_popular_gender, on="Name", how="left")
)

# Sort by Count in descending order
df_generation_alpha = df_generation_alpha.sort_values(by="Count", ascending=False)

bar_chart = go.Bar(
    x=df_generation_alpha.iloc[:20]["Name"],
    y=df_generation_alpha.iloc[:20]["Count"],
    text=df_generation_alpha["Count"],
    textposition='auto',
    name="Name Popularity",
    marker=dict(color='teal')
)

gender_distribution = df_generation_alpha["Gender"].value_counts()
pie_chart = go.Pie(
    labels=gender_distribution.index,
    values=gender_distribution.values,
    name="Gender Distribution"
)

# Retrieves the 5 most popular names for the Generation
df_line_chart_name = df_generation_alpha.iloc[:5][["Name", "Gender"]] 

# Return only the names in the previous dataset from the original dataset
df_filtered = df_baby_names[df_baby_names["Name"].isin(df_line_chart_name["Name"])] 

# Filter the filtered dataset to only return the years 2010 and above
df_filtered = df_filtered[df_filtered["Year"] >= 2010]

# Return all the fields Name, Year, Count, Gender
df_line_chart = df_filtered[["Name", "Year", "Count", "Gender"]] 

# Merge the previous df_line_chart_name with df_line_chart to return the right names with most popular gender by years in the generation
df_line_chart_actual = pd.merge(df_line_chart, df_line_chart_name, on=['Name', 'Gender'], how='inner')

line_chart = go.Figure()

# Group by "Name" and iterate through the groups directly
for name, group in df_line_chart_actual.groupby("Name"):
    line_chart.add_trace(go.Scatter(
        x=group["Year"],
        y=group["Count"],
        mode='lines+markers',
        name=name,  
        line=dict(width=1),
        marker=dict(size=6),
        text=group["Name"]
    ))

# Update Layout for Better Visualization
line_chart.update_layout(
    title="Popularity of Names Over Time",
    xaxis_title="Year",
    yaxis_title="Count",
    template="plotly_white",
    showlegend=True,
)

# Create a Subplot Layout
fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "bar"}, {"type": "pie"}], [{"type": "scatter", "colspan": 2}, None]],
    subplot_titles=("Most Popular Names", "Gender Distribution", "Popularity Over Time")
)

# Add the Bar Chart and Pie Chart to the Subplot
fig.add_trace(bar_chart, row=1, col=1)
fig.add_trace(pie_chart, row=1, col=2)

# Add the scatter plot from the line_chart to the subplot
for trace in line_chart.data:
    fig.add_trace(trace, row=2, col=1)

# Update Layout for Better Visualization
fig.update_layout(
    title_text="Generation Alpha Name Analysis Dashboard",
    title_x=0.5,
    height=700,
    showlegend=True,
    template="plotly_white",
    xaxis_title="Names",
    yaxis_title="Count"
)
# Display the Dashboard
fig.show()


## Are there spikes in the popularity of names related to historical figures, celebrities, or fictional characters?

### Elvis Presley
### Franklin Delano Roosevelt
### Mariah Carey

In [9]:
# Popular historical figures in the 1900s: Franklin Delano Roosevelt
df_popular_Franklin = df_baby_names[["Name", "Year", "Gender", "Count"]][(df_baby_names.Name =="Franklin") & (df_baby_names.Gender =="M")]

# Create the figure
fig = go.Figure()

# Add the line trace
fig.add_trace(go.Scatter(x=df_popular_Franklin["Year"], y=df_popular_Franklin["Count"], mode='lines', name='Line Chart'))

# Update layout 
fig.update_layout(
    title='Franklin popularity over the years',
    xaxis_title='Years', 
    yaxis_title='Count',
    showlegend=True,
    template="plotly_white",
    )

# Show the plot
fig.show()

In [None]:
# Popular celebrity figures 
df_popular_Mariah = df_baby_names[["Name", "Year", "Gender", "Count"]][(df_baby_names.Name =="Mariah") & (df_baby_names.Gender =="F")]

# Create the figure
fig = go.Figure()

# Add the line trace
fig.add_trace(go.Scatter(x=df_popular_Mariah["Year"], y=df_popular_Mariah["Count"], mode='lines', name='Line Chart'))

# Update the layout
fig.update_layout(
    title='Mariah popularity over the years',
    xaxis_title='Years', 
    yaxis_title='Count',
    showlegend=True,
    template="plotly_white",
    )

# Show the plot
fig.show()


In [11]:
# Popular historical figures in the 1900s: Elvis Presley.
df_popular_elvis = df_baby_names[["Name", "Year", "Gender", "Count"]][(df_baby_names.Name =="Elvis") & (df_baby_names.Gender =="M")]

# Create the figure
fig = go.Figure()

# Add the line trace
fig.add_trace(go.Scatter(x=df_popular_elvis["Year"], y=df_popular_elvis["Count"], mode='lines', name='Line Chart'))

# Update the layout
fig.update_layout(
    title='Elvis popularity over the years',
    xaxis_title='Years', 
    yaxis_title='Count',
    showlegend=True,
    template="plotly_white",
    )

# Show the plot
fig.show()