In [27]:
#dependencies and setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st
from scipy.stats import linregress

## Population Statistics

In [28]:
#csv file to read
population_path = r"C:\Users\marie\Documents\KU_Bootcamp\Housing_Project\data.csv"
employment_path = r"C:\Users\marie\Documents\KU_Bootcamp\Housing_Project\Current-Employment-Statistics-December'22.csv"
#read the file
population_data = pd.read_csv(population_path, encoding="UTF-8")
population_data

Unnamed: 0,year,fips,pop2010,pop2023,state,stateCode,growthSince2010,name,ctyname,slug,landArea,density,StatePercentage,StateRank,densityMi,area
0,2023,29189,998954,1005676,Missouri,MO,0.006729,St. Louis County,St. Louis County,st-louis-county-mo,508,1979.677165,0.162083,1,1979.677165,508
1,2023,29095,674158,730119,Missouri,MO,0.083009,Jackson County,Jackson County,jackson-county-mo,604,1208.806291,0.117672,2,1208.806291,604
2,2023,29183,360485,418696,Missouri,MO,0.161480,St. Charles County,St. Charles County,st-charles-county-mo,560,747.671429,0.067480,3,747.671429,560
3,2023,29077,275174,306037,Missouri,MO,0.112158,Greene County,Greene County,greene-county-mo,675,453.388148,0.049323,4,453.388148,675
4,2023,29510,319294,296262,Missouri,MO,-0.072134,St. Louis City,St. Louis City,st-louis-city-mo,62,4778.419355,0.047748,5,4778.419355,62
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,2023,29087,4912,4016,Missouri,MO,-0.182410,Holt County,Holt County,holt-county-mo,463,8.673866,0.000647,111,8.673866,463
111,2023,29197,4431,3912,Missouri,MO,-0.117129,Schuyler County,Schuyler County,schuyler-county-mo,307,12.742671,0.000630,112,12.742671,307
112,2023,29103,4131,3627,Missouri,MO,-0.122004,Knox County,Knox County,knox-county-mo,504,7.196429,0.000585,113,7.196429,504
113,2023,29129,3785,3463,Missouri,MO,-0.085073,Mercer County,Mercer County,mercer-county-mo,454,7.627753,0.000558,114,7.627753,454


In [29]:
#list of all the column names
population_data.columns

Index(['year', 'fips', 'pop2010', 'pop2023', 'state', 'stateCode',
       'growthSince2010', 'name', 'ctyname', 'slug', 'landArea', 'density',
       'StatePercentage', 'StateRank', 'densityMi', 'area'],
      dtype='object')

In [30]:
#remove unnecessary columns
cleaned_population = population_data.drop(['year', 'fips', 'state', 'stateCode',
       'growthSince2010', 'ctyname', 'slug', 'landArea', 'density',
       'StatePercentage', 'StateRank', 'densityMi', 'area'], axis=1)
cleaned_population

Unnamed: 0,pop2010,pop2023,name
0,998954,1005676,St. Louis County
1,674158,730119,Jackson County
2,360485,418696,St. Charles County
3,275174,306037,Greene County
4,319294,296262,St. Louis City
...,...,...,...
110,4912,4016,Holt County
111,4431,3912,Schuyler County
112,4131,3627,Knox County
113,3785,3463,Mercer County


In [31]:
#reorganize the columns
organized_population = cleaned_population[["name", "pop2010", "pop2023"]]
organized_population

Unnamed: 0,name,pop2010,pop2023
0,St. Louis County,998954,1005676
1,Jackson County,674158,730119
2,St. Charles County,360485,418696
3,Greene County,275174,306037
4,St. Louis City,319294,296262
...,...,...,...
110,Holt County,4912,4016
111,Schuyler County,4431,3912
112,Knox County,4131,3627
113,Mercer County,3785,3463


In [32]:
#rename the columns
final_population = organized_population.rename(columns={"name":"County", "pop2010":"2010 Population", 
                                                         "pop2023":"2023 Population"})
final_population

Unnamed: 0,Missouri County,2010 Population,2023 Population
0,St. Louis County,998954,1005676
1,Jackson County,674158,730119
2,St. Charles County,360485,418696
3,Greene County,275174,306037
4,St. Louis City,319294,296262
...,...,...,...
110,Holt County,4912,4016
111,Schuyler County,4431,3912
112,Knox County,4131,3627
113,Mercer County,3785,3463


## Employment Rate

In [34]:
#read employment csv
employment_data = pd.read_csv(employment_path)
employment_data

Unnamed: 0,Month,Year,AreaCode,SeriesCode,Industry,JobsCurrentMonth,JobsLastMonth,JobsLastyear,NetChangeFromLastMonth,NetChangeFromLastyear,NaicsCode,SeriesLevel,PercentChangeFromLastYear
0,12,2022,2901000000,0,Total nonfarm,2957000,2938300,2885700,18700,71300,Total nonfarm,1,2.47%
1,12,2022,2901000000,5000000,Total private,2529700,2511400,2459300,18300,70400,Total private,2,2.86%
2,12,2022,2901000000,6000000,Goods-producing,431300,426100,411600,5200,19700,Goods-producing,3,4.79%
3,12,2022,2901000000,7000000,Service-providing,2525700,2512200,2474100,13500,51600,Service-providing,3,2.09%
4,12,2022,2901000000,8000000,Private service-providing,2098400,2085300,2047700,13100,50700,Private service-prov,3,2.48%
5,12,2022,2901000000,15000000,"Mining, Logging and Construction",141000,149600,141800,-8600,-800,-,4,-0.56%
6,12,2022,2901000000,10000000,Mining and logging,4400,4700,4600,-300,-200,Mining and logging,4,-4.35%
7,12,2022,2901000000,20000000,Construction,136600,144900,137200,-8300,-600,23,4,-0.44%
8,12,2022,2901000000,30000000,Manufacturing,290300,276500,269800,13800,20500,-,3,7.60%
9,12,2022,2901000000,31000000,Durable goods,174300,168000,159800,6300,14500,-,4,9.07%


In [35]:
#list all of the column names
employment_data.columns

Index(['Month', 'Year', 'AreaCode', 'SeriesCode', 'Industry',
       'JobsCurrentMonth', 'JobsLastMonth', 'JobsLastyear',
       'NetChangeFromLastMonth', 'NetChangeFromLastyear', 'NaicsCode',
       'SeriesLevel', 'PercentChangeFromLastYear'],
      dtype='object')

In [38]:
#clean up the data
cleaned_rates = employment_data.drop(['AreaCode', 'SeriesCode',
       'JobsCurrentMonth', 'JobsLastMonth', 
       'NetChangeFromLastMonth', 'NaicsCode',
       'SeriesLevel'], axis=1)
cleaned_rates

Unnamed: 0,Month,Year,Industry,JobsLastyear,NetChangeFromLastyear,PercentChangeFromLastYear
0,12,2022,Total nonfarm,2885700,71300,2.47%
1,12,2022,Total private,2459300,70400,2.86%
2,12,2022,Goods-producing,411600,19700,4.79%
3,12,2022,Service-providing,2474100,51600,2.09%
4,12,2022,Private service-providing,2047700,50700,2.48%
5,12,2022,"Mining, Logging and Construction",141800,-800,-0.56%
6,12,2022,Mining and logging,4600,-200,-4.35%
7,12,2022,Construction,137200,-600,-0.44%
8,12,2022,Manufacturing,269800,20500,7.60%
9,12,2022,Durable goods,159800,14500,9.07%


In [39]:
final_employment = cleaned_rates.rename(columns={"JobsLastyear":"Total Jobs", "NetChangeFromLastyear":"Net Change",
                                                 "PercentChangeFromLastYear":"Percent Change"})
final_employment

Unnamed: 0,Month,Year,Industry,Total Jobs,Net Change,Percent Change
0,12,2022,Total nonfarm,2885700,71300,2.47%
1,12,2022,Total private,2459300,70400,2.86%
2,12,2022,Goods-producing,411600,19700,4.79%
3,12,2022,Service-providing,2474100,51600,2.09%
4,12,2022,Private service-providing,2047700,50700,2.48%
5,12,2022,"Mining, Logging and Construction",141800,-800,-0.56%
6,12,2022,Mining and logging,4600,-200,-4.35%
7,12,2022,Construction,137200,-600,-0.44%
8,12,2022,Manufacturing,269800,20500,7.60%
9,12,2022,Durable goods,159800,14500,9.07%


## Income Statistics

## House Prices Statistics