# Most popular names in North Carolina by decade
Source: My analysis of data from [NC voter registrations](https://www.ncsbe.gov/results-data/voter-registration-data)

In [11]:
# # Get the name of the database file
# from pathlib import Path

# # Note: this assumes you have copied the `ncvoters.db` database to this directory
# dbfile = Path("ncvoters.db")

# # # EKH
dbpath = "/home/saspeh/ncvoters.db" # for some reason it got mad at me when I tried to use the Path, but it was happy with the string

SQL query to be used:

In [12]:
import sqlite3
import pandas as pd
import numpy as np

def query(race_code, gender_code, start_year = 1900, end_year = 2000, top = 10):
    
    decadify = lambda x: int(10 * np.floor(x / 10))
    
    assert (start_year == decadify(start_year)) & (end_year == decadify(end_year)), "Make sure your year ends in a zero"

    q = f'''
    select *
    from (
        select  decade,
                first_name,
                row_number() over(partition by decade order by n_rows desc) rank
        from    (
            select      10 * cast(birth_year / 10 as int) decade,
                        first_name,
                        count(first_name) n_rows
            from        voters
            where       race_code = '{race_code}'
            and         gender_code = '{gender_code}'
            and         birth_year between {start_year} and {end_year}
            group by    decade, first_name
        ) a
    ) b
    where rank <= {top}
    '''
    
    # pandas has a sql query reader built right in!
    # I think the pivoting here can also be done in SQL, but I've never gotten the hang of it.
    # much easier to pivot in pandas imho
    with sqlite3.connect(dbpath) as con:
        df = (
            pd.read_sql(q, con)
            .pivot(
                index = "decade",
                columns = "rank",
                values = "first_name"
            )
            .sort_index(ascending = False)
        )
        
    df.columns.name = None # this is just to make the dataframe look nice, it's not necessary
    df.reset_index(inplace = True)
    
    return df

In [13]:
x = query("W","M")

In [14]:
x

Unnamed: 0,decade,1,2,3,4,5,6,7,8,9,10
0,2000,JACOB,WILLIAM,MATTHEW,JOSHUA,MICHAEL,NICHOLAS,JOHN,ANDREW,CHRISTOPHER,JAMES
1,1990,MATTHEW,MICHAEL,WILLIAM,JOSHUA,CHRISTOPHER,JACOB,JOHN,ANDREW,JAMES,ZACHARY
2,1980,MICHAEL,CHRISTOPHER,MATTHEW,JOSHUA,JAMES,DAVID,JOHN,WILLIAM,DANIEL,ROBERT
3,1970,MICHAEL,CHRISTOPHER,JAMES,DAVID,JASON,JOHN,ROBERT,WILLIAM,BRIAN,MATTHEW
4,1960,MICHAEL,JAMES,DAVID,JOHN,ROBERT,WILLIAM,MARK,JEFFREY,RICHARD,TIMOTHY
5,1950,JAMES,MICHAEL,ROBERT,DAVID,JOHN,WILLIAM,RICHARD,THOMAS,CHARLES,MARK
6,1940,JAMES,ROBERT,WILLIAM,JOHN,DAVID,CHARLES,RICHARD,THOMAS,MICHAEL,LARRY
7,1930,JAMES,ROBERT,WILLIAM,JOHN,CHARLES,RICHARD,DONALD,THOMAS,DAVID,GEORGE
8,1920,WILLIAM,JAMES,JOHN,ROBERT,CHARLES,GEORGE,THOMAS,RICHARD,JOSEPH,DONALD
9,1910,JOHN,WILLIAM,JAMES,ROBERT,CHARLES,GEORGE,JOSEPH,THOMAS,PAUL,FRANK
