In [2]:
import numpy as np
import pandas as pd
import zipfile
import plotly.express as px
import matplotlib.pyplot as plt
import requests
from io import BytesIO
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [3]:
from my_plots import *

---

## National Names

You can find popular baby names data from the [Social Security webpage](https://www.ssa.gov/oact/babynames/limits.html).  The code below does not require you to download the data first. 

In [3]:
# # This is a smaller file with only the most popular names
# url = 'https://github.com/esnt/Data/raw/main/Names/popular_names.csv'
# data = pd.read_csv(url)
# data.columns = ['name','sex','count','year']
# data['pct'] = data['count'] / data.groupby(['year', 'sex'])['count'].transform('sum')

# This is a larger file with all names
def load_name_data():
    names_file = 'https://www.ssa.gov/oact/babynames/names.zip'
    response = requests.get(names_file)
    with zipfile.ZipFile(BytesIO(response.content)) as z:
        dfs = []
        files = [file for file in z.namelist() if file.endswith('.txt')]
        for file in files:
            with z.open(file) as f:
                df = pd.read_csv(f, header=None)
                df.columns = ['name','sex','count']
                df['year'] = int(file[3:7])
                dfs.append(df)
        data = pd.concat(dfs, ignore_index=True)
    data['pct'] = data['count'] / data.groupby(['year', 'sex'])['count'].transform('sum')
    return data
data = load_name_data()

In [4]:
def ohw(df):
    nunique_year = df.groupby(['name', 'sex'])['year'].nunique()
    one_hit_wonders = nunique_year[nunique_year == 1].index
    one_hit_wonder_data = df.set_index(['name', 'sex']).loc[one_hit_wonders].reset_index()
    return one_hit_wonder_data

In [5]:
ohw_data = ohw(data)

In [None]:
data.head(2)

In [None]:
data.shape

## Year

Pick a year

In [6]:
fig = top_names_plot(data, n=5, year=1977)
fig.show()

NameError: name 'data' is not defined

In [None]:
## Pick year and variable (count or pct)

input_year = 1977
variable = 'pct'
year_data = data[data['year'] == input_year].copy()

In [None]:
print(unique_names_summary(data, 1977))

In [None]:
fig = name_frequencies_plot(data, year=1977)
fig.show()

In [None]:
one_hit_wonders(ohw_data, year=1977)

Summary of One-Hit Wonders in 1977:
Number of female one-hit wonders: 168
Number of male one-hit wonders: 92
Most common female one-hit wonder: Kashka with 16 occurrences
Most common male one-hit wonder: Ebay with 12 occurrences


In [None]:
one_hit_wonders(ohw_data, 2000)

Summary of One-Hit Wonders in 2000:
Number of female one-hit wonders: 235
Number of male one-hit wonders: 171
Most common female one-hit wonder: Jadakiss with 13 occurrences
Most common male one-hit wonder: Zaykeese with 13 occurrences


In [None]:

input_year = 2011
# Print summary of the number of and the most common one-hit wonders for that year
ohw_year = one_hit_wonder_data[one_hit_wonder_data['year']==input_year]
if ohw_year.empty:
    print(f"No one-hit wonders found in {input_year}")
else:
    one_hit_wonder_counts = ohw_year['sex'].value_counts()
    common_one_hit_wonders = ohw_year.groupby(['name', 'sex'])['count'].sum().reset_index()

    try:
    # Sort to find the most common one-hit wonder for each sex
        most_common_female = common_one_hit_wonders[common_one_hit_wonders['sex'] == 'F'].sort_values(by='count', ascending=False).iloc[0]
        most_common_male = common_one_hit_wonders[common_one_hit_wonders['sex'] == 'M'].sort_values(by='count', ascending=False).iloc[0]

        print(f"Summary of One-Hit Wonders in {input_year}:")
        print(f"Number of female one-hit wonders: {one_hit_wonder_counts.get('F', 0)}")
        print(f"Number of male one-hit wonders: {one_hit_wonder_counts.get('M', 0)}")

        print(f"Most common female one-hit wonder: {most_common_female['name']} with {most_common_female['count']} occurrences")
        print(f"Most common male one-hit wonder: {most_common_male['name']} with {most_common_male['count']} occurrences")
    except:
        print("not enough data")

## Sex-balance of names

In [None]:
fig = name_trend_plot(data, 'Taylor')
fig.show()

In [None]:
fig = name_sex_balance_plot(data, 'Taylor')