In [12]:
import pandas as pd
import os

def read_and_write_babyname_dat(fname, gender):
    d = pd.read_csv(fname, header=None, names=["types", "gender", "counts"], dtype={"types": str, "counts": int})
    d = d[d['gender'] == gender]
    d['probs'] = d['counts'] / d['counts'].sum()
    d['total_unique'] = len(d)
    year = int(os.path.basename(fname)[-8:-4])
    d['year'] = year

    return d

folder_path = "data"
boys_dataframes = []
girls_dataframes = []

for filename in os.listdir(folder_path):
    if filename.startswith("names-boys") and filename.endswith(".txt"):
        df = read_and_write_babyname_dat(os.path.join(folder_path, filename), "M")
        boys_dataframes.append(df)

    if filename.startswith("names-girls") and filename.endswith(".txt"):
        df = read_and_write_babyname_dat(os.path.join(folder_path, filename), "F")
        girls_dataframes.append(df)

boys_df = pd.concat(boys_dataframes, ignore_index=True) if boys_dataframes else pd.DataFrame(columns=['types', 'counts', 'probs', 'total_unique'])
girls_df = pd.concat(girls_dataframes, ignore_index=True) if girls_dataframes else pd.DataFrame(columns=['types', 'counts', 'probs', 'total_unique'])

boys_2023_df = boys_df[boys_df['year'] == 2023].drop(['year'], axis=1)
boys_2022_df = boys_df[boys_df['year'] == 2022].drop(['year'], axis=1)

boys_2023_df.to_csv('boys_2023_df.csv', index=False)  
boys_2022_df.to_csv('boys_2022_df.csv', index=False)
