# Most popular names in North Carolina by decade
Source: My analysis of data from [NC voter registrations](https://www.ncsbe.gov/results-data/voter-registration-data)

In [17]:
# # Get the name of the database file
# from pathlib import Path

# # Note: this assumes you have copied the `ncvoters.db` database to this directory
# dbfile = Path("ncvoters.db")

# # # EKH
dbpath = "/home/saspeh/ncvoters.db" # for some reason it got mad at me when I tried to use the Path, but it was happy with the string

SQL query to be used:

In [18]:
import sqlite3
import pandas as pd
import numpy as np

def query(race_code, gender_code, start_year = 1900, end_year = 2000, top = 10):
    
    decadify = lambda x: int(10 * np.floor(x / 10))
    
    assert (start_year == decadify(start_year)) & (end_year == decadify(end_year)), "Make sure your year ends in a zero"

    q = f'''
    SELECT *
    FROM (
        SELECT  decade,
                first_name,
                ROW_NUMBER() OVER(PARTITION BY decade ORDER BY n_rows DESC) rank
        FROM    (
            SELECT      10 * CAST(birth_year / 10 AS INT) decade,
                        first_name,
                        COUNT(first_name) n_rows
            FROM        voters
            WHERE       race_code = '{race_code}'
            AND         gender_code = '{gender_code}'
            AND         birth_year BETWEEN {start_year} AND {end_year}
            GROUP BY    decade, first_name
        ) a
    ) b
    WHERE rank <= {top}
    '''
    
    # pandas has a sql query reader built right in!
    # I think the pivoting here can also be done in SQL, but I've never gotten the hang of it.
    # much easier to pivot in pandas imho
    with sqlite3.connect(dbpath) as con:
        df = (
            pd.read_sql(q, con)
            .pivot(
                index = "decade",
                columns = "rank",
                values = "first_name"
            )
            .sort_index(ascending = False)
        )
        
    df.columns.name = None # this is just to make the dataframe look nice, it's not necessary
    df.reset_index(inplace = True)
    
    return df

In [19]:
query("W","M")

Unnamed: 0,decade,1,2,3,4,5,6,7,8,9,10
0,2000,JACOB,WILLIAM,MATTHEW,JOSHUA,MICHAEL,NICHOLAS,JOHN,ANDREW,CHRISTOPHER,JAMES
1,1990,MATTHEW,MICHAEL,WILLIAM,JOSHUA,CHRISTOPHER,JACOB,JOHN,ANDREW,JAMES,ZACHARY
2,1980,MICHAEL,CHRISTOPHER,MATTHEW,JOSHUA,JAMES,DAVID,JOHN,WILLIAM,DANIEL,ROBERT
3,1970,MICHAEL,CHRISTOPHER,JAMES,DAVID,JASON,JOHN,ROBERT,WILLIAM,BRIAN,MATTHEW
4,1960,MICHAEL,JAMES,DAVID,JOHN,ROBERT,WILLIAM,MARK,JEFFREY,RICHARD,TIMOTHY
5,1950,JAMES,MICHAEL,ROBERT,DAVID,JOHN,WILLIAM,RICHARD,THOMAS,CHARLES,MARK
6,1940,JAMES,ROBERT,WILLIAM,JOHN,DAVID,CHARLES,RICHARD,THOMAS,MICHAEL,LARRY
7,1930,JAMES,ROBERT,WILLIAM,JOHN,CHARLES,RICHARD,DONALD,THOMAS,DAVID,GEORGE
8,1920,WILLIAM,JAMES,JOHN,ROBERT,CHARLES,GEORGE,THOMAS,RICHARD,JOSEPH,DONALD
9,1910,JOHN,WILLIAM,JAMES,ROBERT,CHARLES,GEORGE,JOSEPH,THOMAS,PAUL,FRANK


In [20]:
query("W","F")

Unnamed: 0,decade,1,2,3,4,5,6,7,8,9,10
0,2000,HANNAH,MADISON,EMILY,SARAH,TAYLOR,LAUREN,ELIZABETH,ANNA,EMMA,ABIGAIL
1,1990,SARAH,EMILY,JESSICA,ASHLEY,HANNAH,BRITTANY,LAUREN,ELIZABETH,TAYLOR,MEGAN
2,1980,JESSICA,JENNIFER,AMANDA,ASHLEY,SARAH,ELIZABETH,HEATHER,LAUREN,STEPHANIE,MELISSA
3,1970,JENNIFER,AMY,MELISSA,KIMBERLY,ANGELA,HEATHER,LISA,MICHELLE,ELIZABETH,STEPHANIE
4,1960,LISA,KIMBERLY,SUSAN,MARY,KAREN,DONNA,ELIZABETH,CYNTHIA,TAMMY,ANGELA
5,1950,DEBORAH,SUSAN,MARY,LINDA,PATRICIA,DEBRA,KAREN,BARBARA,DONNA,NANCY
6,1940,LINDA,MARY,PATRICIA,BARBARA,NANCY,BRENDA,SANDRA,CAROL,CAROLYN,BETTY
7,1930,MARY,BETTY,BARBARA,SHIRLEY,NANCY,PATRICIA,MARGARET,DOROTHY,PEGGY,DORIS
8,1920,MARY,DOROTHY,MARGARET,BETTY,HELEN,RUTH,DORIS,FRANCES,ELIZABETH,VIRGINIA
9,1910,MARY,MARGARET,RUTH,HELEN,DOROTHY,ELIZABETH,VIRGINIA,FRANCES,RUBY,MILDRED


In [21]:
query("B","M")

Unnamed: 0,decade,1,2,3,4,5,6,7,8,9,10
0,2000,JOSHUA,ISAIAH,MICHAEL,JALEN,JORDAN,CHRISTOPHER,ELIJAH,JAMES,CAMERON,CHRISTIAN
1,1990,CHRISTOPHER,MICHAEL,JOSHUA,BRANDON,JAMES,ANTHONY,JUSTIN,WILLIAM,JORDAN,MARCUS
2,1980,MICHAEL,CHRISTOPHER,BRANDON,JAMES,ANTHONY,MARCUS,WILLIAM,ROBERT,DAVID,ANTONIO
3,1970,MICHAEL,JAMES,CHRISTOPHER,ANTHONY,WILLIAM,ROBERT,KEVIN,ERIC,DAVID,JOHN
4,1960,JAMES,MICHAEL,ANTHONY,ROBERT,WILLIAM,JOHN,KENNETH,DAVID,CHARLES,GREGORY
5,1950,JAMES,WILLIAM,MICHAEL,LARRY,ROBERT,JOHN,CHARLES,WILLIE,DAVID,RONALD
6,1940,JAMES,WILLIAM,JOHN,ROBERT,WILLIE,CHARLES,GEORGE,THOMAS,DAVID,JOSEPH
7,1930,JAMES,WILLIAM,JOHN,ROBERT,WILLIE,CHARLES,GEORGE,JOSEPH,THOMAS,DAVID
8,1920,JAMES,JOHN,WILLIAM,WILLIE,ROBERT,GEORGE,CHARLES,JOSEPH,THOMAS,WALTER
9,1910,JAMES,JOHN,WILLIAM,ROBERT,JOSEPH,WILLIE,WALTER,GEORGE,CHARLIE,DAVID


In [22]:
query("B","F")

Unnamed: 0,decade,1,2,3,4,5,6,7,8,9,10
0,2000,DESTINY,KAYLA,JADA,JASMINE,ALEXIS,BRIANNA,TAYLOR,DIAMOND,MAKAYLA,JORDAN
1,1990,JASMINE,BRITTANY,ASHLEY,ALEXIS,JESSICA,BRIANNA,KAYLA,DESTINY,BRIANA,COURTNEY
2,1980,TIFFANY,ASHLEY,JESSICA,LATOYA,CRYSTAL,BRITTANY,KIMBERLY,JASMINE,ERICA,EBONY
3,1970,ANGELA,KIMBERLY,TONYA,NICOLE,TIFFANY,STEPHANIE,MICHELLE,CRYSTAL,YOLANDA,FELICIA
4,1960,ANGELA,SHARON,LISA,CYNTHIA,PAMELA,MARY,JACQUELINE,PATRICIA,LINDA,SANDRA
5,1950,MARY,LINDA,PATRICIA,BRENDA,BARBARA,DEBORAH,CAROLYN,SHIRLEY,CYNTHIA,GLORIA
6,1940,MARY,BARBARA,ANNIE,BETTY,SHIRLEY,DOROTHY,CAROLYN,PATRICIA,LINDA,BRENDA
7,1930,MARY,ANNIE,DOROTHY,SHIRLEY,BETTY,MARGARET,BARBARA,DORIS,HELEN,SARAH
8,1920,MARY,ANNIE,DOROTHY,RUTH,MARGARET,HELEN,ELIZABETH,LILLIE,MILDRED,LOUISE
9,1910,MARY,ANNIE,ELIZABETH,LILLIE,RUTH,SARAH,ROSA,ALICE,ETHEL,MARGARET
