In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import statsmodels.api as sm
%matplotlib inline
import requests
import json
from urllib.request import urlretrieve
from bs4 import BeautifulSoup
from collections import defaultdict

In [2]:
# read in csv file, remove first 3 rows and last 5 rows
tn_pop = pd.read_csv('../data/us_census_tn_pop_by_county_2010_2019.csv', skiprows=3, skipfooter=5, engine='python')
tn_pop.head(10)

Unnamed: 0.1,Unnamed: 0,Census,Estimates Base,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,Tennessee,6346105,6346276,6355311,6399291,6453898,6494340,6541223,6591170,6646010,6708799,6771631,6829174
1,".Anderson County, Tennessee",75129,75082,75098,75209,75225,75299,75157,75456,75528,76056,76287,76978
2,".Bedford County, Tennessee",45058,45057,45078,45247,45254,45565,46251,46948,47442,48211,49146,49713
3,".Benton County, Tennessee",16489,16491,16511,16529,16445,16360,16197,16220,16082,15993,16246,16160
4,".Bledsoe County, Tennessee",12876,12874,12884,12992,12926,13910,14501,14601,14738,14895,14883,15064
5,".Blount County, Tennessee",123010,123098,123199,123664,124024,124874,125847,126954,128264,129999,131331,133088
6,".Bradley County, Tennessee",98963,98926,99083,99798,101049,101778,102771,103774,104390,105421,107050,108110
7,".Campbell County, Tennessee",40716,40723,40735,40688,40532,40247,39893,39772,39784,39791,39795,39842
8,".Cannon County, Tennessee",13801,13813,13797,13724,13789,13700,13572,13740,13876,14157,14440,14678
9,".Carroll County, Tennessee",28522,28486,28454,28547,28667,28606,28411,27982,27901,27807,27975,27767


In [3]:
tn_pop.tail(15)

Unnamed: 0.1,Unnamed: 0,Census,Estimates Base,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
81,".Stewart County, Tennessee",13324,13313,13348,13223,13309,13277,13210,13187,13190,13422,13619,13715
82,".Sullivan County, Tennessee",156823,156800,156749,156876,156322,156185,156404,156199,156182,156924,157599,158348
83,".Sumner County, Tennessee",160645,160634,161214,163680,165967,168754,172269,175326,179332,183756,187490,191283
84,".Tipton County, Tennessee",61081,61006,61065,61305,61524,61572,61596,61533,61223,61305,61576,61599
85,".Trousdale County, Tennessee",7870,7864,7874,7815,7794,7810,7998,8051,9955,10831,11033,11284
86,".Unicoi County, Tennessee",18313,18311,18284,18302,18253,18095,17951,17819,17737,17796,17822,17883
87,".Union County, Tennessee",19109,19107,19116,19191,19105,19063,18991,19159,19219,19399,19689,19972
88,".Van Buren County, Tennessee",5548,5558,5569,5555,5653,5574,5696,5695,5721,5747,5763,5872
89,".Warren County, Tennessee",39839,39824,39858,39907,39744,39875,39983,40262,40428,40717,40826,41277
90,".Washington County, Tennessee",122979,123063,123384,123887,124873,125397,125745,126137,127247,127603,128661,129375


In [4]:
#drop all columns except for 2018, 2019
tn_pop = tn_pop.drop(columns = ['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017'], axis=1)

In [5]:
tn_pop.head()

Unnamed: 0.1,Unnamed: 0,Census,Estimates Base,2018,2019
0,Tennessee,6346105,6346276,6771631,6829174
1,".Anderson County, Tennessee",75129,75082,76287,76978
2,".Bedford County, Tennessee",45058,45057,49146,49713
3,".Benton County, Tennessee",16489,16491,16246,16160
4,".Bledsoe County, Tennessee",12876,12874,14883,15064


In [6]:
#rename 'Unnamed:0' column to 'County' 
tn_pop.columns = ['County', 'Census', 'Estimates_Base', '2018', '2019']
tn_pop.reset_index()
tn_pop.head()

Unnamed: 0,County,Census,Estimates_Base,2018,2019
0,Tennessee,6346105,6346276,6771631,6829174
1,".Anderson County, Tennessee",75129,75082,76287,76978
2,".Bedford County, Tennessee",45058,45057,49146,49713
3,".Benton County, Tennessee",16489,16491,16246,16160
4,".Bledsoe County, Tennessee",12876,12874,14883,15064


In [7]:
tn_pop.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96 entries, 0 to 95
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   County          96 non-null     object
 1   Census          96 non-null     object
 2   Estimates_Base  96 non-null     object
 3   2018            96 non-null     object
 4   2019            96 non-null     object
dtypes: object(5)
memory usage: 3.9+ KB


In [8]:
# remove '.' before each county name.  code below does not work.  also tried .strip and r'
# tn_pop['County'].str.replace('\.', '')
# tn_pop.head(10)

In [14]:
tn_pop['2019']=tn_pop['2019'].str.replace(',', '')
tn_pop

Unnamed: 0,County,Census,Estimates_Base,2018,2019
0,Tennessee,6346105,6346276,6771631,6829174
1,".Anderson County, Tennessee",75129,75082,76287,76978
2,".Bedford County, Tennessee",45058,45057,49146,49713
3,".Benton County, Tennessee",16489,16491,16246,16160
4,".Bledsoe County, Tennessee",12876,12874,14883,15064
...,...,...,...,...,...
91,".Wayne County, Tennessee",17021,17025,16635,16673
92,".Weakley County, Tennessee",35021,35015,33413,33328
93,".White County, Tennessee",25841,25844,27086,27345
94,".Williamson County, Tennessee",183182,183277,231978,238412


In [15]:
tn_pop['2019']=tn_pop['2019'].astype(int)
tn_pop

Unnamed: 0,County,Census,Estimates_Base,2018,2019
0,Tennessee,6346105,6346276,6771631,6829174
1,".Anderson County, Tennessee",75129,75082,76287,76978
2,".Bedford County, Tennessee",45058,45057,49146,49713
3,".Benton County, Tennessee",16489,16491,16246,16160
4,".Bledsoe County, Tennessee",12876,12874,14883,15064
...,...,...,...,...,...
91,".Wayne County, Tennessee",17021,17025,16635,16673
92,".Weakley County, Tennessee",35021,35015,33413,33328
93,".White County, Tennessee",25841,25844,27086,27345
94,".Williamson County, Tennessee",183182,183277,231978,238412


In [17]:
tn_pop.sort_values('2019', ascending = False)

Unnamed: 0,County,Census,Estimates_Base,2018,2019
0,Tennessee,6346105,6346276,6771631,6829174
79,".Shelby County, Tennessee",927644,927682,936365,937166
19,".Davidson County, Tennessee",626681,626558,690516,694144
47,".Knox County, Tennessee",432226,432260,466258,470313
33,".Hamilton County, Tennessee",336463,336477,364293,367804
...,...,...,...,...,...
48,".Lake County, Tennessee",7832,7832,7407,7016
34,".Hancock County, Tennessee",6819,6811,6561,6620
64,".Moore County, Tennessee",6362,6342,6457,6488
88,".Van Buren County, Tennessee",5548,5558,5763,5872
