# Most popular names in North Carolina by decade
Source: My analysis of data from [NC voter registrations](https://www.ncsbe.gov/results-data/voter-registration-data)

In [None]:
# # Get the name of the database file
from pathlib import Path

# # Note: this assumes you have copied the `ncvoters.db` database to this directory
# dbfile = Path("ncvoters.db")

# # # EKH
# for some reason it got mad at me when I tried to use the Path, but it was happy with the string
dbpath = Path("~/ncvoters.db").expanduser()

# Change this to how many columns you want to see (i.e., 10 for the top 10)
NUMBER_OF_BUCKETS = 10



SQL query to be used:

In [None]:
import sqlite3
import pandas as pd
import numpy as np

def query(race_code, gender_code, start_year=1900, end_year=2000, top=NUMBER_OF_BUCKETS):

    def decadify(x): return int(10 * np.floor(x / 10))

    assert (start_year == decadify(start_year)) & (
        end_year == decadify(end_year)), "Make sure your year ends in a zero"

    q = f'''
    SELECT *
    FROM (
        SELECT  decade,
                first_name,
                ROW_NUMBER() OVER(PARTITION BY decade ORDER BY n_rows DESC) rank
        FROM    (
            SELECT      10 * CAST(birth_year / 10 AS INT) decade,
                        first_name,
                        COUNT(first_name) n_rows
            FROM        voters
            WHERE       race_code = '{race_code}'
            AND         gender_code = '{gender_code}'
            AND         birth_year BETWEEN {start_year} AND {end_year}
            GROUP BY    decade, first_name
        ) a
    ) b
    WHERE rank <= {top}
    '''

    # pandas has a sql query reader built right in!
    # I think the pivoting here can also be done in SQL, but I've never gotten the hang of it.
    # much easier to pivot in pandas imho
    with sqlite3.connect(dbpath) as con:
        df = (
            pd.read_sql(q, con)
            .pivot(
                index="decade",
                columns="rank",
                values="first_name"
            )
            .sort_index(ascending=False)
        )

    df.columns.name = None  # this is just to make the dataframe look nice, it's not necessary
    df.reset_index(inplace=True)

    return df


In [None]:
query("W", "M")


In [None]:
query("W", "F")


In [None]:
query("B", "M")


In [None]:
query("B", "F")
