# Most popular names in North Carolina by decade
Source: My analysis of data from [NC voter registrations](https://www.ncsbe.gov/results-data/voter-registration-data)

In [15]:
# # Get the name of the database file
from pathlib import Path

# # Note: this assumes you have copied the `ncvoters.db` database to this directory
# dbfile = Path("ncvoters.db")

# # # EKH
# for some reason it got mad at me when I tried to use the Path, but it was happy with the string
dbpath = Path("~/ncvoters.db").expanduser()


SQL query to be used:

In [16]:
import sqlite3
import pandas as pd
import numpy as np


def query(race_code, gender_code, start_year=1900, end_year=2000, top=10):

    def decadify(x): return int(10 * np.floor(x / 10))

    assert (start_year == decadify(start_year)) & (
        end_year == decadify(end_year)), "Make sure your year ends in a zero"

    q = f'''
    SELECT *
    FROM (
        SELECT  decade,
                first_name,
                ROW_NUMBER() OVER(PARTITION BY decade ORDER BY n_rows DESC) rank
        FROM    (
            SELECT      10 * CAST(birth_year / 10 AS INT) decade,
                        first_name,
                        COUNT(first_name) n_rows
            FROM        voters
            WHERE       race_code = '{race_code}'
            AND         gender_code = '{gender_code}'
            AND         birth_year BETWEEN {start_year} AND {end_year}
            GROUP BY    decade, first_name
        ) a
    ) b
    WHERE rank <= {top}
    '''

    # pandas has a sql query reader built right in!
    # I think the pivoting here can also be done in SQL, but I've never gotten the hang of it.
    # much easier to pivot in pandas imho
    with sqlite3.connect(dbpath) as con:
        df = (
            pd.read_sql(q, con)
            .pivot(
                index="decade",
                columns="rank",
                values="first_name"
            )
            .sort_index(ascending=False)
        )

    df.columns.name = None  # this is just to make the dataframe look nice, it's not necessary
    df.reset_index(inplace=True)

    return df


In [17]:
query("W", "M")


Unnamed: 0,decade,1,2,3,4,5,6,7,8,9,10
0,2000,JACOB,WILLIAM,MATTHEW,JOSHUA,MICHAEL,NICHOLAS,JOHN,ANDREW,CHRISTOPHER,JAMES
1,1990,MATTHEW,MICHAEL,WILLIAM,JOSHUA,CHRISTOPHER,JACOB,JOHN,ANDREW,JAMES,ZACHARY
2,1980,MICHAEL,CHRISTOPHER,MATTHEW,JOSHUA,JAMES,DAVID,JOHN,WILLIAM,DANIEL,ROBERT
3,1970,MICHAEL,CHRISTOPHER,JAMES,DAVID,JASON,JOHN,ROBERT,WILLIAM,BRIAN,MATTHEW
4,1960,MICHAEL,JAMES,DAVID,JOHN,ROBERT,WILLIAM,MARK,JEFFREY,RICHARD,TIMOTHY
5,1950,JAMES,MICHAEL,ROBERT,JOHN,DAVID,WILLIAM,RICHARD,THOMAS,CHARLES,MARK
6,1940,JAMES,WILLIAM,ROBERT,JOHN,DAVID,CHARLES,RICHARD,THOMAS,MICHAEL,LARRY
7,1930,JAMES,ROBERT,WILLIAM,JOHN,CHARLES,RICHARD,DONALD,THOMAS,DAVID,GEORGE
8,1920,WILLIAM,JAMES,JOHN,ROBERT,CHARLES,GEORGE,RICHARD,THOMAS,JOSEPH,DONALD
9,1910,JOHN,WILLIAM,JAMES,ROBERT,GEORGE,CHARLES,JOSEPH,THOMAS,PAUL,WALTER


In [18]:
query("W", "F")


Unnamed: 0,decade,1,2,3,4,5,6,7,8,9,10
0,2000,HANNAH,MADISON,EMILY,SARAH,TAYLOR,LAUREN,ELIZABETH,ANNA,EMMA,ABIGAIL
1,1990,SARAH,EMILY,JESSICA,ASHLEY,HANNAH,LAUREN,ELIZABETH,BRITTANY,TAYLOR,MEGAN
2,1980,JESSICA,JENNIFER,AMANDA,ASHLEY,SARAH,ELIZABETH,HEATHER,LAUREN,STEPHANIE,MELISSA
3,1970,JENNIFER,AMY,MELISSA,KIMBERLY,ANGELA,HEATHER,LISA,MICHELLE,ELIZABETH,STEPHANIE
4,1960,LISA,KIMBERLY,SUSAN,MARY,KAREN,DONNA,ELIZABETH,CYNTHIA,TAMMY,ANGELA
5,1950,DEBORAH,SUSAN,MARY,LINDA,PATRICIA,KAREN,DEBRA,BARBARA,DONNA,NANCY
6,1940,LINDA,MARY,PATRICIA,BARBARA,NANCY,BRENDA,SANDRA,CAROL,CAROLYN,JUDITH
7,1930,MARY,BETTY,BARBARA,SHIRLEY,NANCY,PATRICIA,MARGARET,DOROTHY,PEGGY,DORIS
8,1920,MARY,DOROTHY,BETTY,MARGARET,HELEN,RUTH,DORIS,FRANCES,ELIZABETH,VIRGINIA
9,1910,MARY,HELEN,ELIZABETH,MARGARET,RUTH,DOROTHY,FRANCES,VIRGINIA,EVELYN,MILDRED


In [19]:
query("B", "M")


Unnamed: 0,decade,1,2,3,4,5,6,7,8,9,10
0,2000,JOSHUA,ISAIAH,MICHAEL,JALEN,JORDAN,CHRISTOPHER,ELIJAH,JAMES,CHRISTIAN,CAMERON
1,1990,CHRISTOPHER,MICHAEL,JOSHUA,BRANDON,JAMES,ANTHONY,JUSTIN,WILLIAM,JORDAN,MARCUS
2,1980,MICHAEL,CHRISTOPHER,BRANDON,JAMES,ANTHONY,MARCUS,WILLIAM,ROBERT,DAVID,ANTONIO
3,1970,MICHAEL,JAMES,CHRISTOPHER,ANTHONY,WILLIAM,ROBERT,ERIC,KEVIN,DAVID,JOHN
4,1960,JAMES,MICHAEL,ANTHONY,ROBERT,WILLIAM,JOHN,KENNETH,DAVID,CHARLES,GREGORY
5,1950,JAMES,WILLIAM,MICHAEL,ROBERT,LARRY,JOHN,CHARLES,WILLIE,DAVID,RONALD
6,1940,JAMES,WILLIAM,JOHN,ROBERT,WILLIE,CHARLES,GEORGE,THOMAS,DAVID,JOSEPH
7,1930,JAMES,WILLIAM,ROBERT,JOHN,WILLIE,CHARLES,GEORGE,JOSEPH,THOMAS,DAVID
8,1920,JAMES,JOHN,WILLIAM,WILLIE,ROBERT,GEORGE,CHARLES,JOSEPH,CHARLIE,WALTER
9,1910,JAMES,WILLIAM,JOHN,WILLIE,GEORGE,ROBERT,JOSEPH,WALTER,ARTHUR,EDWARD


In [20]:
query("B", "F")


Unnamed: 0,decade,1,2,3,4,5,6,7,8,9,10
0,2000,DESTINY,KAYLA,JADA,JASMINE,ALEXIS,BRIANNA,TAYLOR,DIAMOND,MAKAYLA,JORDAN
1,1990,JASMINE,BRITTANY,ASHLEY,ALEXIS,BRIANNA,JESSICA,KAYLA,DESTINY,BRIANA,COURTNEY
2,1980,TIFFANY,ASHLEY,JESSICA,LATOYA,CRYSTAL,BRITTANY,KIMBERLY,ERICA,JASMINE,EBONY
3,1970,ANGELA,KIMBERLY,TONYA,NICOLE,TIFFANY,STEPHANIE,MICHELLE,CRYSTAL,YOLANDA,FELICIA
4,1960,ANGELA,SHARON,LISA,CYNTHIA,PAMELA,MARY,JACQUELINE,PATRICIA,LINDA,BRENDA
5,1950,MARY,LINDA,PATRICIA,BRENDA,BARBARA,DEBORAH,CAROLYN,SHIRLEY,GLORIA,CYNTHIA
6,1940,MARY,BARBARA,ANNIE,BETTY,SHIRLEY,DOROTHY,CAROLYN,PATRICIA,LINDA,BRENDA
7,1930,MARY,ANNIE,DOROTHY,SHIRLEY,BETTY,MARGARET,BARBARA,DORIS,HELEN,SARAH
8,1920,MARY,ANNIE,DOROTHY,RUTH,MARGARET,HELEN,ELIZABETH,LILLIE,MILDRED,GLADYS
9,1910,MARY,ANNIE,ELIZABETH,SARAH,MARTHA,MINNIE,MARGARET,GENEVA,LILLIE,MILDRED
