In [1]:
# import dependencies
import pandas as pd

In [2]:
# FK - 2023-02-18
# Python SQL toolkit and Object Relational Mapper
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine #, func, extract

# ######################
# DB Connect
# ######################
from utils_common import get_db_engine
db_engine = get_db_engine()

In [3]:
# read in data file as DataFrame
file = 'Resources/Veterans.csv'
veterans_df = pd.read_csv(file)
veterans_df.head()

Unnamed: 0,FIPS,State,County,Vets18OPct,GulfWar2VetsPct,GulfWar1VetsPct,VietnamEraVetsPct,KoreanWarVetsPct,WW2VetsPct,MaleVetsPct,...,PctVetsDisabilty,PctNonVetsDisabilty,CivPopVets18to64Num,CivPop18ONum,Vets18ONum,NonVetsDisabilty,NonVetsPoor,VetsDisabilty,VetsPoor,CLFVets18to64Num
0,0,US,United States,7.286424,19.312637,20.719749,35.710834,8.118271,3.504486,91.118977,...,18.294605,9.935824,9143042.0,250195726.0,18230322.0,18404982.0,23793332.0,1648352.0,734648.0,7003778.0
1,1000,AL,Alabama,8.76678,21.520743,24.649992,36.22667,7.214868,2.170154,89.794281,...,22.349785,13.814824,177164.0,3766571.0,330207.0,373473.0,441454.0,38993.0,16418.0,126372.0
2,1001,AL,Autauga,12.635111,37.708649,37.5,31.885432,7.169954,4.191958,87.253414,...,20.056854,15.398961,3182.0,41725.0,5272.0,4624.0,4432.0,635.0,146.0,2754.0
3,1003,AL,Baldwin,11.8155,19.030667,22.331282,42.516401,7.87774,2.380105,92.147689,...,17.060425,11.288564,9337.0,166417.0,19663.0,12737.0,11811.0,1567.0,705.0,6996.0
4,1005,AL,Barbour,6.62212,15.737952,16.942771,48.493976,7.605422,3.388554,92.093373,...,26.706827,17.370544,605.0,20054.0,1328.0,2110.0,3408.0,133.0,77.0,301.0


In [4]:
# FK - 2023-02-18
# Put it in the Database
veterans_df.to_sql(name='veterans', con=db_engine, if_exists='replace', index=False)

In [5]:
veterans_df.columns

Index(['FIPS', 'State', 'County', 'Vets18OPct', 'GulfWar2VetsPct',
       'GulfWar1VetsPct', 'VietnamEraVetsPct', 'KoreanWarVetsPct',
       'WW2VetsPct', 'MaleVetsPct', 'FemaleVetsPct', 'WhiteNonHispVetsPct',
       'BlackVetsPct', 'HispanicVetsPct', 'OtherRaceVetsPct', 'MedianVetsInc',
       'MedianNonVetsInc', 'LessThanHSVetsPct', 'HighSchOnlyVetsPct',
       'SomeCollegeVetsPct', 'CollegeDegreeVetsPct', 'LFPVetsRate',
       'UEVetsRate', 'PctVetsPoor', 'PctNonVetsPoor', 'PctVetsDisabilty',
       'PctNonVetsDisabilty', 'CivPopVets18to64Num', 'CivPop18ONum',
       'Vets18ONum', 'NonVetsDisabilty', 'NonVetsPoor', 'VetsDisabilty',
       'VetsPoor', 'CLFVets18to64Num'],
      dtype='object')

In [11]:
# create DataFrame for data for project
sorted_veterans_df = veterans_df[['State','County','Vets18ONum','Vets18OPct','MaleVetsPct','FemaleVetsPct','WhiteNonHispVetsPct',
                                 'BlackVetsPct','HispanicVetsPct','OtherRaceVetsPct','LessThanHSVetsPct',
                                 'HighSchOnlyVetsPct','SomeCollegeVetsPct','CollegeDegreeVetsPct','LFPVetsRate',
                                 'UEVetsRate']]
sorted_veterans_df.head()

Unnamed: 0,State,County,Vets18ONum,Vets18OPct,MaleVetsPct,FemaleVetsPct,WhiteNonHispVetsPct,BlackVetsPct,HispanicVetsPct,OtherRaceVetsPct,LessThanHSVetsPct,HighSchOnlyVetsPct,SomeCollegeVetsPct,CollegeDegreeVetsPct,LFPVetsRate,UEVetsRate
0,US,United States,18230322.0,7.286424,91.118977,8.881023,77.169196,11.886855,6.933229,5.9686,5.897847,27.971309,37.316451,28.814393,76.602273,4.379651
1,AL,Alabama,330207.0,8.76678,89.794281,10.205719,72.534198,22.945607,2.022065,3.059596,6.396034,28.257189,37.38537,27.961408,71.330519,4.319786
2,AL,Autauga,5272.0,12.635111,87.253414,12.746586,84.028832,9.673748,5.576631,1.11912,3.007812,20.859375,31.621094,44.511719,86.54934,3.55846
3,AL,Baldwin,19663.0,11.8155,92.147689,7.852311,90.62198,5.360321,1.439251,2.92936,3.612916,27.659792,36.676731,32.05056,74.927707,3.430532
4,AL,Barbour,1328.0,6.62212,92.093373,7.906627,59.563253,37.349398,1.054217,3.087349,12.349398,39.834337,36.746988,11.069277,49.752066,9.966777


In [12]:
# check for null values
sorted_veterans_df.isnull()

Unnamed: 0,State,County,Vets18ONum,Vets18OPct,MaleVetsPct,FemaleVetsPct,WhiteNonHispVetsPct,BlackVetsPct,HispanicVetsPct,OtherRaceVetsPct,LessThanHSVetsPct,HighSchOnlyVetsPct,SomeCollegeVetsPct,CollegeDegreeVetsPct,LFPVetsRate,UEVetsRate
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3273,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3274,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3275,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3276,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [13]:
# fill in missing values (NaN as 0)
sorted_veterans_df.fillna(0)

Unnamed: 0,State,County,Vets18ONum,Vets18OPct,MaleVetsPct,FemaleVetsPct,WhiteNonHispVetsPct,BlackVetsPct,HispanicVetsPct,OtherRaceVetsPct,LessThanHSVetsPct,HighSchOnlyVetsPct,SomeCollegeVetsPct,CollegeDegreeVetsPct,LFPVetsRate,UEVetsRate
0,US,United States,18230322.0,7.286424,91.118977,8.881023,77.169196,11.886855,6.933229,5.968600,5.897847,27.971309,37.316451,28.814393,76.602273,4.379651
1,AL,Alabama,330207.0,8.766780,89.794281,10.205719,72.534198,22.945607,2.022065,3.059596,6.396034,28.257189,37.385370,27.961408,71.330519,4.319786
2,AL,Autauga,5272.0,12.635111,87.253414,12.746586,84.028832,9.673748,5.576631,1.119120,3.007812,20.859375,31.621094,44.511719,86.549340,3.558460
3,AL,Baldwin,19663.0,11.815500,92.147689,7.852311,90.621980,5.360321,1.439251,2.929360,3.612916,27.659792,36.676731,32.050560,74.927707,3.430532
4,AL,Barbour,1328.0,6.622120,92.093373,7.906627,59.563253,37.349398,1.054217,3.087349,12.349398,39.834337,36.746988,11.069277,49.752066,9.966777
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3273,PR,Vega Baja,1010.0,2.414881,93.861386,6.138614,0.000000,10.594059,100.000000,12.673267,22.155689,28.243513,24.051896,25.548902,46.012270,28.666667
3274,PR,Vieques,387.0,5.571552,100.000000,0.000000,17.829457,2.325581,82.170543,47.286822,9.043928,48.062016,4.134367,38.759690,42.857143,0.000000
3275,PR,Villalba,431.0,2.459344,97.679814,2.320186,0.000000,3.944316,100.000000,45.243619,12.470588,28.705882,24.941176,33.882353,65.354331,7.228916
3276,PR,Yabucoa,605.0,2.240824,98.512397,1.487603,0.000000,74.545455,100.000000,4.132231,7.933884,25.123967,36.033058,30.909091,68.907563,60.975610


In [14]:
# rename sorted_veterans_df columns
renamed_df = sorted_veterans_df.rename(columns={"Vets18ONum":"TotalVets","Vets18OPct":"VeteranPopulationPct","WhiteNonHispVetsPct":"WhiteVetsPct",
                                  "LFPVetsRate":"EmployeedVetsPct","UEVetsRate":"UnemployeedVetsPct"})
renamed_df.head()

Unnamed: 0,State,County,TotalVets,VeteranPopulationPct,MaleVetsPct,FemaleVetsPct,WhiteVetsPct,BlackVetsPct,HispanicVetsPct,OtherRaceVetsPct,LessThanHSVetsPct,HighSchOnlyVetsPct,SomeCollegeVetsPct,CollegeDegreeVetsPct,EmployeedVetsPct,UnemployeedVetsPct
0,US,United States,18230322.0,7.286424,91.118977,8.881023,77.169196,11.886855,6.933229,5.9686,5.897847,27.971309,37.316451,28.814393,76.602273,4.379651
1,AL,Alabama,330207.0,8.76678,89.794281,10.205719,72.534198,22.945607,2.022065,3.059596,6.396034,28.257189,37.38537,27.961408,71.330519,4.319786
2,AL,Autauga,5272.0,12.635111,87.253414,12.746586,84.028832,9.673748,5.576631,1.11912,3.007812,20.859375,31.621094,44.511719,86.54934,3.55846
3,AL,Baldwin,19663.0,11.8155,92.147689,7.852311,90.62198,5.360321,1.439251,2.92936,3.612916,27.659792,36.676731,32.05056,74.927707,3.430532
4,AL,Barbour,1328.0,6.62212,92.093373,7.906627,59.563253,37.349398,1.054217,3.087349,12.349398,39.834337,36.746988,11.069277,49.752066,9.966777


In [16]:
renamed_df = renamed_df.drop(index=0, axis=0)
renamed_df.head()

Unnamed: 0,State,County,TotalVets,VeteranPopulationPct,MaleVetsPct,FemaleVetsPct,WhiteVetsPct,BlackVetsPct,HispanicVetsPct,OtherRaceVetsPct,LessThanHSVetsPct,HighSchOnlyVetsPct,SomeCollegeVetsPct,CollegeDegreeVetsPct,EmployeedVetsPct,UnemployeedVetsPct
1,AL,Alabama,330207.0,8.76678,89.794281,10.205719,72.534198,22.945607,2.022065,3.059596,6.396034,28.257189,37.38537,27.961408,71.330519,4.319786
2,AL,Autauga,5272.0,12.635111,87.253414,12.746586,84.028832,9.673748,5.576631,1.11912,3.007812,20.859375,31.621094,44.511719,86.54934,3.55846
3,AL,Baldwin,19663.0,11.8155,92.147689,7.852311,90.62198,5.360321,1.439251,2.92936,3.612916,27.659792,36.676731,32.05056,74.927707,3.430532
4,AL,Barbour,1328.0,6.62212,92.093373,7.906627,59.563253,37.349398,1.054217,3.087349,12.349398,39.834337,36.746988,11.069277,49.752066,9.966777
5,AL,Bibb,1433.0,8.022618,95.882763,4.117237,79.134682,18.492673,2.372645,0.0,6.90151,54.708843,22.286125,16.103523,56.901408,0.0


In [14]:
# export new data frame to csv
renamed_df.to_csv('Resources/sorted_veterans_data.csv',encoding='utf-8')

In [15]:
# FK - 2023-02-18
# Put it in the Database
renamed_df.to_sql(name='sorted_veterans_data', con=db_engine, if_exists='replace', index=False)