In [1]:
import pandas as pd
from sqlalchemy import create_engine

In [2]:
hospital_df=pd.read_csv('Resources/hospital_beds_global_regional_v1.csv')

In [3]:
hospital_df.head()

Unnamed: 0,country,state,county,lat,lng,type,measure,beds,population,year,source,source_url
0,AD,,,42.5,1.5,ICU,1000HAB,0.071,83747,2011,icm-journal,https://link.springer.com/article/10.1007/s001...
1,AD,,,42.5,1.5,TOTAL,1000HAB,2.5,84463,2009,wdi,https://data.worldbank.org/indicator/SH.MED.BE...
2,AE,,,24.0,54.0,TOTAL,1000HAB,1.2,9197910,2013,wdi,https://data.worldbank.org/indicator/SH.MED.BE...
3,AF,,,33.0,65.0,TOTAL,1000HAB,0.5,34413603,2015,wdi,https://data.worldbank.org/indicator/SH.MED.BE...
4,AG,,,17.05,-61.8,TOTAL,1000HAB,3.8,92562,2014,wdi,https://data.worldbank.org/indicator/SH.MED.BE...


In [4]:
reduced_hospital=hospital_df[['country','state','county','type','beds','measure','population','year','source']]
reduced_hospital.head()

Unnamed: 0,country,state,county,type,beds,measure,population,year,source
0,AD,,,ICU,0.071,1000HAB,83747,2011,icm-journal
1,AD,,,TOTAL,2.5,1000HAB,84463,2009,wdi
2,AE,,,TOTAL,1.2,1000HAB,9197910,2013,wdi
3,AF,,,TOTAL,0.5,1000HAB,34413603,2015,wdi
4,AG,,,TOTAL,3.8,1000HAB,92562,2014,wdi


In [5]:
us_hospital=reduced_hospital.loc[(reduced_hospital['year']==2019)&(reduced_hospital['country'] == 'US')]

us_hospital                                 

Unnamed: 0,country,state,county,type,beds,measure,population,year,source
469,US,AK,aleutians east,ICU,0.000000,1000HAB,3338,2019,khn
470,US,AK,aleutians west,ICU,0.000000,1000HAB,5784,2019,khn
472,US,AK,anchorage,ICU,0.244782,1000HAB,298225,2019,khn
473,US,AK,anchorage,OTHER,0.191131,1000HAB,298225,2019,arcgis
476,US,AK,bristol bay,ICU,0.000000,1000HAB,917,2019,khn
...,...,...,...,...,...,...,...,...,...
6178,US,WY,uinta,ICU,0.289045,1000HAB,20758,2019,khn
6182,US,,,ACUTE,2.487640,1000HAB,328004407,2019,argis
6183,US,,,ICU,0.302904,1000HAB,328004407,2019,argis
6184,US,,,OTHER,0.242003,1000HAB,328004407,2019,argis


In [6]:
us_hospital.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2332 entries, 469 to 6185
Data columns (total 9 columns):
country       2332 non-null object
state         2328 non-null object
county        2328 non-null object
type          2332 non-null object
beds          2332 non-null float64
measure       2332 non-null object
population    2332 non-null int64
year          2332 non-null int64
source        2332 non-null object
dtypes: float64(1), int64(2), object(6)
memory usage: 182.2+ KB


In [7]:
us_hospital = us_hospital.dropna(how='any')

In [8]:
us_hospital.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2328 entries, 469 to 6178
Data columns (total 9 columns):
country       2328 non-null object
state         2328 non-null object
county        2328 non-null object
type          2328 non-null object
beds          2328 non-null float64
measure       2328 non-null object
population    2328 non-null int64
year          2328 non-null int64
source        2328 non-null object
dtypes: float64(1), int64(2), object(6)
memory usage: 181.9+ KB


In [9]:
duplicate_h = us_hospital[us_hospital.duplicated(['state','type','beds','source'])]
duplicate_h

Unnamed: 0,country,state,county,type,beds,measure,population,year,source
470,US,AK,aleutians west,ICU,0.0,1000HAB,5784,2019,khn
476,US,AK,bristol bay,ICU,0.0,1000HAB,917,2019,khn
477,US,AK,denali,ICU,0.0,1000HAB,2303,2019,khn
481,US,AK,haines,ICU,0.0,1000HAB,2537,2019,khn
482,US,AK,hoonah-angoon,ICU,0.0,1000HAB,2146,2019,khn
...,...,...,...,...,...,...,...,...,...
6130,US,WV,ritchie,ICU,0.0,1000HAB,10005,2019,khn
6134,US,WV,tucker,ICU,0.0,1000HAB,7035,2019,khn
6137,US,WV,wayne,ICU,0.0,1000HAB,41063,2019,khn
6141,US,WV,wirt,ICU,0.0,1000HAB,5800,2019,khn


In [10]:
final_hospital=us_hospital[['state','type','beds']]

In [11]:
final_hospital.head()

Unnamed: 0,state,type,beds
469,AK,ICU,0.0
470,AK,ICU,0.0
472,AK,ICU,0.244782
473,AK,OTHER,0.191131
476,AK,ICU,0.0


In [12]:
hospitals=final_hospital.groupby(['state']).sum()
hospitals.head()

Unnamed: 0_level_0,beds
state,Unnamed: 1_level_1
AK,4.789014
AL,11.967593
AR,8.403947
AZ,2.050268
CA,8.182707


In [13]:
# conntecting to DB
connection_string = "postgres:postgres@localhost:5432/covid19_db"
engine = create_engine(f'postgresql://{connection_string}')


In [14]:
#retrieve existing tables
engine.table_names()

['state',
 'country',
 'index_prices',
 'country_cases',
 'us_unemployment_stats',
 'hospital_beds',
 'gas_price',
 'us_states_cases']

In [15]:
#Use pandas to load csv converted DataFrame into database
hospitals.to_sql(name='hospital_beds', con=engine, if_exists='replace', index=True)

In [16]:
# read data from teable
pd.read_sql_query('select * from hospital_beds', con=engine).head()

Unnamed: 0,state,beds
0,AK,4.789014
1,AL,11.967593
2,AR,8.403947
3,AZ,2.050268
4,CA,8.182707
