## Get median age county data, 2000-2022

### Concatenating 2000-2009 data

In [1]:
import pandas as pd
pd.options.mode.copy_on_write = True

Data from US Census here: https://www2.census.gov/programs-surveys/popest/datasets/2000-2009/counties/asrh/ \
Unlike the 2010-2019 and 2020-2022 data, a combined dataframe for the entire United States is not provided.\
Here I create one by concatenating all of the states.

In [2]:
dfs = []
for i in range(1,57):
    if i in [3,7,14,43,52]: #These state codes are reserved and do not currently refer to any existing data
        continue
    #Specifying the encoding is necessary due to a New Mexican county having a non-standard character in its name.
    current_df = pd.read_csv(f'https://www2.census.gov/programs-surveys/popest/datasets/2000-2009/counties/asrh/cc-est2009-agesex-{i:02d}.csv',engine='python',encoding='latin1')
    dfs.append(current_df)
df1 = pd.concat(dfs)
df1

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,POPESTIMATE,POPEST_MALE,POPEST_FEM,UNDER5_TOT,...,AGE4564_FEM,AGE65PLUS_TOT,AGE65PLUS_MALE,AGE65PLUS_FEM,AGE85PLUS_TOT,AGE85PLUS_MALE,AGE85PLUS_FEM,MEDIAN_AGE_TOT,MEDIAN_AGE_MALE,MEDIAN_AGE_FEM
0,50,1,1,Alabama,Autauga County,1,43671,21221,22450,3023,...,5078,4451,1817,2634,428,113,315,35.1,33.9,36.1
1,50,1,1,Alabama,Autauga County,2,43671,21221,22450,3023,...,5078,4451,1817,2634,428,113,315,35.1,33.9,36.1
2,50,1,1,Alabama,Autauga County,3,43872,21318,22554,3027,...,5126,4490,1833,2657,434,115,319,35.2,34.0,36.1
3,50,1,1,Alabama,Autauga County,4,44434,21591,22843,3096,...,5307,4615,1894,2721,431,113,318,35.6,34.3,36.6
4,50,1,1,Alabama,Autauga County,5,45157,21928,23229,3106,...,5453,4723,1937,2786,422,111,311,35.9,34.6,36.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,50,56,45,Wyoming,Weston County,8,6475,3325,3150,326,...,931,1110,501,609,163,52,111,43.7,42.7,44.5
272,50,56,45,Wyoming,Weston County,9,6568,3370,3198,350,...,978,1130,509,621,152,48,104,44.0,42.6,45.0
273,50,56,45,Wyoming,Weston County,10,6845,3553,3292,373,...,996,1135,508,627,157,48,109,42.9,40.8,44.5
274,50,56,45,Wyoming,Weston County,11,6928,3590,3338,395,...,992,1135,503,632,161,47,114,42.1,40.1,44.1


### Cleaning YEAR column

This must be done now, before concatenating with the subsequent dataframes, because they make use of the same number code. The key for the YEAR code can be found here: https://www2.census.gov/programs-surveys/popest/technical-documentation/file-layouts/2000-2009/cc-est2009-agesex.pdf

In [3]:
# The first two year codes refer to the census data from April rather than July, and an estimate of that data.
# For consistency, I've chosen to remove these rows.
df1 = df1[df1.YEAR >= 3]
df1.head()

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,POPESTIMATE,POPEST_MALE,POPEST_FEM,UNDER5_TOT,...,AGE4564_FEM,AGE65PLUS_TOT,AGE65PLUS_MALE,AGE65PLUS_FEM,AGE85PLUS_TOT,AGE85PLUS_MALE,AGE85PLUS_FEM,MEDIAN_AGE_TOT,MEDIAN_AGE_MALE,MEDIAN_AGE_FEM
2,50,1,1,Alabama,Autauga County,3,43872,21318,22554,3027,...,5126,4490,1833,2657,434,115,319,35.2,34.0,36.1
3,50,1,1,Alabama,Autauga County,4,44434,21591,22843,3096,...,5307,4615,1894,2721,431,113,318,35.6,34.3,36.6
4,50,1,1,Alabama,Autauga County,5,45157,21928,23229,3106,...,5453,4723,1937,2786,422,111,311,35.9,34.6,36.9
5,50,1,1,Alabama,Autauga County,6,45762,22222,23540,3110,...,5635,4866,2053,2813,413,103,310,35.9,34.9,37.0
6,50,1,1,Alabama,Autauga County,7,46933,22765,24168,3237,...,5829,5037,2133,2904,429,113,316,35.8,34.8,36.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,50,56,45,Wyoming,Weston County,8,6475,3325,3150,326,...,931,1110,501,609,163,52,111,43.7,42.7,44.5
272,50,56,45,Wyoming,Weston County,9,6568,3370,3198,350,...,978,1130,509,621,152,48,104,44.0,42.6,45.0
273,50,56,45,Wyoming,Weston County,10,6845,3553,3292,373,...,996,1135,508,627,157,48,109,42.9,40.8,44.5
274,50,56,45,Wyoming,Weston County,11,6928,3590,3338,395,...,992,1135,503,632,161,47,114,42.1,40.1,44.1


In [4]:
year_replacements = dict(zip(list(range(3,13)), [f'200{i}-07-01' for i in range(0,10)]))
year_replacements

{3: '2000-07-01',
 4: '2001-07-01',
 5: '2002-07-01',
 6: '2003-07-01',
 7: '2004-07-01',
 8: '2005-07-01',
 9: '2006-07-01',
 10: '2007-07-01',
 11: '2008-07-01',
 12: '2009-07-01'}

In [5]:
for code, date in year_replacements.items():
    df1['YEAR'] = df1.YEAR.replace(code, date)
df1.head()

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,POPESTIMATE,POPEST_MALE,POPEST_FEM,UNDER5_TOT,...,AGE4564_FEM,AGE65PLUS_TOT,AGE65PLUS_MALE,AGE65PLUS_FEM,AGE85PLUS_TOT,AGE85PLUS_MALE,AGE85PLUS_FEM,MEDIAN_AGE_TOT,MEDIAN_AGE_MALE,MEDIAN_AGE_FEM
2,50,1,1,Alabama,Autauga County,2000-07-01,43872,21318,22554,3027,...,5126,4490,1833,2657,434,115,319,35.2,34.0,36.1
3,50,1,1,Alabama,Autauga County,2001-07-01,44434,21591,22843,3096,...,5307,4615,1894,2721,431,113,318,35.6,34.3,36.6
4,50,1,1,Alabama,Autauga County,2002-07-01,45157,21928,23229,3106,...,5453,4723,1937,2786,422,111,311,35.9,34.6,36.9
5,50,1,1,Alabama,Autauga County,2003-07-01,45762,22222,23540,3110,...,5635,4866,2053,2813,413,103,310,35.9,34.9,37.0
6,50,1,1,Alabama,Autauga County,2004-07-01,46933,22765,24168,3237,...,5829,5037,2133,2904,429,113,316,35.8,34.8,36.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,50,56,45,Wyoming,Weston County,2005-07-01,6475,3325,3150,326,...,931,1110,501,609,163,52,111,43.7,42.7,44.5
272,50,56,45,Wyoming,Weston County,2006-07-01,6568,3370,3198,350,...,978,1130,509,621,152,48,104,44.0,42.6,45.0
273,50,56,45,Wyoming,Weston County,2007-07-01,6845,3553,3292,373,...,996,1135,508,627,157,48,109,42.9,40.8,44.5
274,50,56,45,Wyoming,Weston County,2008-07-01,6928,3590,3338,395,...,992,1135,503,632,161,47,114,42.1,40.1,44.1


## Importing and cleaning YEAR column for 2010-2019 data

Data from: https://www2.census.gov/programs-surveys/popest/technical-documentation/file-layouts/2010-2020/ \
\
Year codes from: https://www2.census.gov/programs-surveys/popest/technical-documentation/file-layouts/2010-2020/cc-est2020-agesex.pdf

In [6]:
df2 = pd.read_csv('CC-EST2020-AGESEX-ALL.csv',engine='python',encoding='latin1')
df2

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,POPESTIMATE,POPEST_MALE,POPEST_FEM,UNDER5_TOT,...,AGE7579_FEM,AGE8084_TOT,AGE8084_MALE,AGE8084_FEM,AGE85PLUS_TOT,AGE85PLUS_MALE,AGE85PLUS_FEM,MEDIAN_AGE_TOT,MEDIAN_AGE_MALE,MEDIAN_AGE_FEM
0,50,1,1,Alabama,Autauga County,1,54571,26569,28002,3579,...,705,731,295,436,551,159,392,37,35.9,37.9
1,50,1,1,Alabama,Autauga County,2,54582,26576,28006,3582,...,704,730,294,436,551,159,392,37,35.9,37.8
2,50,1,1,Alabama,Autauga County,3,54761,26667,28094,3575,...,707,743,299,444,556,164,392,37.1,36,37.9
3,50,1,1,Alabama,Autauga County,4,55229,26980,28249,3552,...,727,782,315,467,601,191,410,37.3,36.2,38.4
4,50,1,1,Alabama,Autauga County,5,54970,26830,28140,3405,...,751,834,340,494,632,206,426,37.7,36.4,38.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43997,50,56,45,Wyoming,Weston County,10,6962,3665,3297,373,...,123,182,91,91,195,66,129,43.6,42.9,44.5
43998,50,56,45,Wyoming,Weston County,11,6895,3624,3271,351,...,116,195,86,109,188,65,123,43.8,43.2,44.6
43999,50,56,45,Wyoming,Weston County,12,6880,3615,3265,333,...,112,193,81,112,195,73,122,44.3,43.8,44.9
44000,50,56,45,Wyoming,Weston County,13,6752,3551,3201,318,...,115,184,74,110,202,80,122,44.6,43.9,45.3


In [7]:
# Year codes 13 and 14 overlap with data in the next dataframe, and so are unneeded here.
df2 = df2[(df2.YEAR >= 3) & (df2.YEAR <= 12)]
df2.head()

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,POPESTIMATE,POPEST_MALE,POPEST_FEM,UNDER5_TOT,...,AGE7579_FEM,AGE8084_TOT,AGE8084_MALE,AGE8084_FEM,AGE85PLUS_TOT,AGE85PLUS_MALE,AGE85PLUS_FEM,MEDIAN_AGE_TOT,MEDIAN_AGE_MALE,MEDIAN_AGE_FEM
2,50,1,1,Alabama,Autauga County,3,54761,26667,28094,3575,...,707,743,299,444,556,164,392,37.1,36,37.9
3,50,1,1,Alabama,Autauga County,4,55229,26980,28249,3552,...,727,782,315,467,601,191,410,37.3,36.2,38.4
4,50,1,1,Alabama,Autauga County,5,54970,26830,28140,3405,...,751,834,340,494,632,206,426,37.7,36.4,38.8
5,50,1,1,Alabama,Autauga County,6,54747,26588,28159,3217,...,794,890,378,512,636,209,427,38.1,36.8,39.1
6,50,1,1,Alabama,Autauga County,7,54922,26804,28118,3183,...,822,944,400,544,668,232,436,38.2,36.8,39.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43995,50,56,45,Wyoming,Weston County,8,7202,3796,3406,440,...,138,164,74,90,187,71,116,42.3,41.3,43.6
43996,50,56,45,Wyoming,Weston County,9,7228,3794,3434,448,...,131,172,86,86,193,71,122,42.3,41.7,43
43997,50,56,45,Wyoming,Weston County,10,6962,3665,3297,373,...,123,182,91,91,195,66,129,43.6,42.9,44.5
43998,50,56,45,Wyoming,Weston County,11,6895,3624,3271,351,...,116,195,86,109,188,65,123,43.8,43.2,44.6


In [8]:
year_replacements2 = dict(zip(list(range(3,13)), [f'201{i}-07-01' for i in range(0,10)]))
year_replacements2

{3: '2010-07-01',
 4: '2011-07-01',
 5: '2012-07-01',
 6: '2013-07-01',
 7: '2014-07-01',
 8: '2015-07-01',
 9: '2016-07-01',
 10: '2017-07-01',
 11: '2018-07-01',
 12: '2019-07-01'}

In [9]:
for code, date in year_replacements2.items():
    df2['YEAR'] = df2.YEAR.replace(code, date)
df2.head()

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,POPESTIMATE,POPEST_MALE,POPEST_FEM,UNDER5_TOT,...,AGE7579_FEM,AGE8084_TOT,AGE8084_MALE,AGE8084_FEM,AGE85PLUS_TOT,AGE85PLUS_MALE,AGE85PLUS_FEM,MEDIAN_AGE_TOT,MEDIAN_AGE_MALE,MEDIAN_AGE_FEM
2,50,1,1,Alabama,Autauga County,2010-07-01,54761,26667,28094,3575,...,707,743,299,444,556,164,392,37.1,36,37.9
3,50,1,1,Alabama,Autauga County,2011-07-01,55229,26980,28249,3552,...,727,782,315,467,601,191,410,37.3,36.2,38.4
4,50,1,1,Alabama,Autauga County,2012-07-01,54970,26830,28140,3405,...,751,834,340,494,632,206,426,37.7,36.4,38.8
5,50,1,1,Alabama,Autauga County,2013-07-01,54747,26588,28159,3217,...,794,890,378,512,636,209,427,38.1,36.8,39.1
6,50,1,1,Alabama,Autauga County,2014-07-01,54922,26804,28118,3183,...,822,944,400,544,668,232,436,38.2,36.8,39.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43995,50,56,45,Wyoming,Weston County,2015-07-01,7202,3796,3406,440,...,138,164,74,90,187,71,116,42.3,41.3,43.6
43996,50,56,45,Wyoming,Weston County,2016-07-01,7228,3794,3434,448,...,131,172,86,86,193,71,122,42.3,41.7,43
43997,50,56,45,Wyoming,Weston County,2017-07-01,6962,3665,3297,373,...,123,182,91,91,195,66,129,43.6,42.9,44.5
43998,50,56,45,Wyoming,Weston County,2018-07-01,6895,3624,3271,351,...,116,195,86,109,188,65,123,43.8,43.2,44.6


In [10]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 31430 entries, 2 to 43999
Data columns (total 96 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   SUMLEV           31430 non-null  int64 
 1   STATE            31430 non-null  int64 
 2   COUNTY           31430 non-null  int64 
 3   STNAME           31430 non-null  object
 4   CTYNAME          31430 non-null  object
 5   YEAR             31430 non-null  object
 6   POPESTIMATE      31430 non-null  object
 7   POPEST_MALE      31430 non-null  object
 8   POPEST_FEM       31430 non-null  object
 9   UNDER5_TOT       31430 non-null  object
 10  UNDER5_MALE      31430 non-null  object
 11  UNDER5_FEM       31430 non-null  object
 12  AGE513_TOT       31430 non-null  object
 13  AGE513_MALE      31430 non-null  object
 14  AGE513_FEM       31430 non-null  object
 15  AGE1417_TOT      31430 non-null  object
 16  AGE1417_MALE     31430 non-null  object
 17  AGE1417_FEM      31430 non-null  obj

## Importing and cleaning YEAR column for 2020-2022 data

Data from: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html \
\
Year codes from: https://www2.census.gov/programs-surveys/popest/technical-documentation/file-layouts/2020-2022/cc-est2022-agesex.pdf

In [11]:
df3 = pd.read_csv('cc-est2022-agesex-all.csv',engine='python',encoding='latin1')
df3

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,POPESTIMATE,POPEST_MALE,POPEST_FEM,UNDER5_TOT,...,AGE7579_FEM,AGE8084_TOT,AGE8084_MALE,AGE8084_FEM,AGE85PLUS_TOT,AGE85PLUS_MALE,AGE85PLUS_FEM,MEDIAN_AGE_TOT,MEDIAN_AGE_MALE,MEDIAN_AGE_FEM
0,50,1,1,Alabama,Autauga County,1,58802,28761,30041,3443,...,1055,1165,501,664,955,345,610,39.2,37.9,40.3
1,50,1,1,Alabama,Autauga County,2,58902,28819,30083,3457,...,1064,1170,501,669,958,350,608,39.1,37.9,40.3
2,50,1,1,Alabama,Autauga County,3,59210,28851,30359,3416,...,1074,1190,492,698,972,362,610,39.2,38.0,40.3
3,50,1,1,Alabama,Autauga County,4,59759,29105,30654,3460,...,1152,1225,495,730,1019,385,634,39.2,38.0,40.4
4,50,1,3,Alabama,Baldwin County,1,231761,113388,118373,12046,...,4861,5601,2542,3059,4610,1812,2798,43.7,42.5,44.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12571,50,56,43,Wyoming,Washakie County,4,7719,3973,3746,378,...,187,250,125,125,212,73,139,44.8,43.3,46.6
12572,50,56,45,Wyoming,Weston County,1,6840,3724,3116,308,...,109,175,70,105,191,73,118,43.5,42.7,44.8
12573,50,56,45,Wyoming,Weston County,2,6818,3724,3094,310,...,110,171,70,101,193,73,120,43.5,42.6,44.8
12574,50,56,45,Wyoming,Weston County,3,6766,3729,3037,302,...,115,151,66,85,184,69,115,43.9,43.1,45.3


In [12]:
# Unlie the previous two dataframes, only the first year needs to be removed for consistency.
df3 = df3[df3.YEAR != 1]
df3.head()

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,POPESTIMATE,POPEST_MALE,POPEST_FEM,UNDER5_TOT,...,AGE7579_FEM,AGE8084_TOT,AGE8084_MALE,AGE8084_FEM,AGE85PLUS_TOT,AGE85PLUS_MALE,AGE85PLUS_FEM,MEDIAN_AGE_TOT,MEDIAN_AGE_MALE,MEDIAN_AGE_FEM
1,50,1,1,Alabama,Autauga County,2,58902,28819,30083,3457,...,1064,1170,501,669,958,350,608,39.1,37.9,40.3
2,50,1,1,Alabama,Autauga County,3,59210,28851,30359,3416,...,1074,1190,492,698,972,362,610,39.2,38.0,40.3
3,50,1,1,Alabama,Autauga County,4,59759,29105,30654,3460,...,1152,1225,495,730,1019,385,634,39.2,38.0,40.4
5,50,1,3,Alabama,Baldwin County,2,233219,114050,119169,12046,...,4934,5653,2561,3092,4667,1844,2823,43.8,42.6,45.0
6,50,1,3,Alabama,Baldwin County,3,239361,116853,122508,12273,...,5105,5836,2653,3183,4725,1875,2850,43.9,42.7,45.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12570,50,56,43,Wyoming,Washakie County,3,7712,3968,3744,370,...,171,241,117,124,218,78,140,44.7,43.4,46.1
12571,50,56,43,Wyoming,Washakie County,4,7719,3973,3746,378,...,187,250,125,125,212,73,139,44.8,43.3,46.6
12573,50,56,45,Wyoming,Weston County,2,6818,3724,3094,310,...,110,171,70,101,193,73,120,43.5,42.6,44.8
12574,50,56,45,Wyoming,Weston County,3,6766,3729,3037,302,...,115,151,66,85,184,69,115,43.9,43.1,45.3


In [13]:
year_replacements3 = dict(zip(list(range(2,5)), [f'202{i}-07-01' for i in range(0,4)]))
year_replacements3

{2: '2020-07-01', 3: '2021-07-01', 4: '2022-07-01'}

In [14]:
for code, date in year_replacements3.items():
    df3['YEAR'] = df3.YEAR.replace(code, date)
df3.head()

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,POPESTIMATE,POPEST_MALE,POPEST_FEM,UNDER5_TOT,...,AGE7579_FEM,AGE8084_TOT,AGE8084_MALE,AGE8084_FEM,AGE85PLUS_TOT,AGE85PLUS_MALE,AGE85PLUS_FEM,MEDIAN_AGE_TOT,MEDIAN_AGE_MALE,MEDIAN_AGE_FEM
1,50,1,1,Alabama,Autauga County,2020-07-01,58902,28819,30083,3457,...,1064,1170,501,669,958,350,608,39.1,37.9,40.3
2,50,1,1,Alabama,Autauga County,2021-07-01,59210,28851,30359,3416,...,1074,1190,492,698,972,362,610,39.2,38.0,40.3
3,50,1,1,Alabama,Autauga County,2022-07-01,59759,29105,30654,3460,...,1152,1225,495,730,1019,385,634,39.2,38.0,40.4
5,50,1,3,Alabama,Baldwin County,2020-07-01,233219,114050,119169,12046,...,4934,5653,2561,3092,4667,1844,2823,43.8,42.6,45.0
6,50,1,3,Alabama,Baldwin County,2021-07-01,239361,116853,122508,12273,...,5105,5836,2653,3183,4725,1875,2850,43.9,42.7,45.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12570,50,56,43,Wyoming,Washakie County,2021-07-01,7712,3968,3744,370,...,171,241,117,124,218,78,140,44.7,43.4,46.1
12571,50,56,43,Wyoming,Washakie County,2022-07-01,7719,3973,3746,378,...,187,250,125,125,212,73,139,44.8,43.3,46.6
12573,50,56,45,Wyoming,Weston County,2020-07-01,6818,3724,3094,310,...,110,171,70,101,193,73,120,43.5,42.6,44.8
12574,50,56,45,Wyoming,Weston County,2021-07-01,6766,3729,3037,302,...,115,151,66,85,184,69,115,43.9,43.1,45.3


In [15]:
df3.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9432 entries, 1 to 12575
Data columns (total 96 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   SUMLEV           9432 non-null   int64  
 1   STATE            9432 non-null   int64  
 2   COUNTY           9432 non-null   int64  
 3   STNAME           9432 non-null   object 
 4   CTYNAME          9432 non-null   object 
 5   YEAR             9432 non-null   object 
 6   POPESTIMATE      9432 non-null   int64  
 7   POPEST_MALE      9432 non-null   int64  
 8   POPEST_FEM       9432 non-null   int64  
 9   UNDER5_TOT       9432 non-null   int64  
 10  UNDER5_MALE      9432 non-null   int64  
 11  UNDER5_FEM       9432 non-null   int64  
 12  AGE513_TOT       9432 non-null   int64  
 13  AGE513_MALE      9432 non-null   int64  
 14  AGE513_FEM       9432 non-null   int64  
 15  AGE1417_TOT      9432 non-null   int64  
 16  AGE1417_MALE     9432 non-null   int64  
 17  AGE1417_FEM      9

## Concatenating 2000-2009, 2010-2019, and 2020-2022 data

In [16]:
# The 2000-2010 data has less specificity and therefore fewer columns.
# An outer join will allow the merge and fill the missing columns with NaN.
df = pd.concat([df1, df2, df3], axis=0, join='outer')
df

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,POPESTIMATE,POPEST_MALE,POPEST_FEM,UNDER5_TOT,...,AGE6569_FEM,AGE7074_TOT,AGE7074_MALE,AGE7074_FEM,AGE7579_TOT,AGE7579_MALE,AGE7579_FEM,AGE8084_TOT,AGE8084_MALE,AGE8084_FEM
2,50,1,1,Alabama,Autauga County,2000-07-01,43872,21318,22554,3027,...,,,,,,,,,,
3,50,1,1,Alabama,Autauga County,2001-07-01,44434,21591,22843,3096,...,,,,,,,,,,
4,50,1,1,Alabama,Autauga County,2002-07-01,45157,21928,23229,3106,...,,,,,,,,,,
5,50,1,1,Alabama,Autauga County,2003-07-01,45762,22222,23540,3110,...,,,,,,,,,,
6,50,1,1,Alabama,Autauga County,2004-07-01,46933,22765,24168,3237,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12570,50,56,43,Wyoming,Washakie County,2021-07-01,7712,3968,3744,370,...,270,446,220,226,339,168,171,241,117,124
12571,50,56,43,Wyoming,Washakie County,2022-07-01,7719,3973,3746,378,...,280,441,215,226,356,169,187,250,125,125
12573,50,56,45,Wyoming,Weston County,2020-07-01,6818,3724,3094,310,...,237,355,190,165,216,106,110,171,70,101
12574,50,56,45,Wyoming,Weston County,2021-07-01,6766,3729,3037,302,...,251,378,209,169,225,110,115,151,66,85


In [17]:
df = df.reset_index(drop=True)
df.tail()

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,POPESTIMATE,POPEST_MALE,POPEST_FEM,UNDER5_TOT,...,AGE6569_FEM,AGE7074_TOT,AGE7074_MALE,AGE7074_FEM,AGE7579_TOT,AGE7579_MALE,AGE7579_FEM,AGE8084_TOT,AGE8084_MALE,AGE8084_FEM
72287,50,56,43,Wyoming,Washakie County,2021-07-01,7712,3968,3744,370,...,270,446,220,226,339,168,171,241,117,124
72288,50,56,43,Wyoming,Washakie County,2022-07-01,7719,3973,3746,378,...,280,441,215,226,356,169,187,250,125,125
72289,50,56,45,Wyoming,Weston County,2020-07-01,6818,3724,3094,310,...,237,355,190,165,216,106,110,171,70,101
72290,50,56,45,Wyoming,Weston County,2021-07-01,6766,3729,3037,302,...,251,378,209,169,225,110,115,151,66,85
72291,50,56,45,Wyoming,Weston County,2022-07-01,6860,3800,3060,296,...,267,415,231,184,254,127,127,141,71,70


### Sorting by year, then state, then county

In [18]:
df = df.sort_values(['YEAR','STNAME','CTYNAME'])
df

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,POPESTIMATE,POPEST_MALE,POPEST_FEM,UNDER5_TOT,...,AGE6569_FEM,AGE7074_TOT,AGE7074_MALE,AGE7074_FEM,AGE7579_TOT,AGE7579_MALE,AGE7579_FEM,AGE8084_TOT,AGE8084_MALE,AGE8084_FEM
0,50,1,1,Alabama,Autauga County,2000-07-01,43872,21318,22554,3027,...,,,,,,,,,,
10,50,1,3,Alabama,Baldwin County,2000-07-01,141358,69323,72035,8653,...,,,,,,,,,,
20,50,1,5,Alabama,Barbour County,2000-07-01,29035,14969,14066,1781,...,,,,,,,,,,
30,50,1,7,Alabama,Bibb County,2000-07-01,19936,9834,10102,1447,...,,,,,,,,,,
40,50,1,9,Alabama,Blount County,2000-07-01,51181,25559,25622,3533,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72279,50,56,37,Wyoming,Sweetwater County,2022-07-01,41345,21469,19876,2319,...,1099,1686,866,820,1014,511,503,590,287,303
72282,50,56,39,Wyoming,Teton County,2022-07-01,23287,12206,11081,1069,...,674,1131,628,503,781,387,394,395,183,212
72285,50,56,41,Wyoming,Uinta County,2022-07-01,20712,10598,10114,1226,...,647,965,506,459,561,306,255,316,147,169
72288,50,56,43,Wyoming,Washakie County,2022-07-01,7719,3973,3746,378,...,280,441,215,226,356,169,187,250,125,125


### Export to csv

In [19]:
df.to_csv('county_median_age_2000-2022_raw.csv', index=False)