In [1]:
# Importing Dependencies
import pandas as pd
import requests
import sqlalchemy
from sqlalchemy import create_engine, Column, Integer, String, Float, PrimaryKeyConstraint, func,inspect,text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, Session
import sqlite3
from sqlalchemy.ext.automap import automap_base
import config
from config import geoapify_key

In [2]:
#Setting up the base url to build off of.
url="https://api.census.gov/data/2019/pep/charagegroups?"

In [3]:
#Setting up the two queries needed using the age codes specified on the census API Website.
all_population="000"
age_65_up="026"
query_url=url+ "get=NAME,POP&for=county:*&in=state:12&AGEGROUP="
all_pop_query=query_url+all_population
age_65_up_query=query_url+age_65_up
results_cols=["County, State","Population","Age Group","State Code","County Code"]

In [4]:
#Pulling the data from the API with the URLs.
all_pop_results=requests.get(all_pop_query).json()
age_65_up_results=requests.get(age_65_up_query).json()

In [5]:
# Converting the json data retrieved from the API to a dataframe. 
age_65_up_df=pd.DataFrame(age_65_up_results,columns=results_cols)

#Pulled the column names from the first row of information.
age_65_up_df.columns=age_65_up_df.iloc[0]

#Removed the first row from the dataframe.
age_65_up_df=age_65_up_df.tail(-1)

In [6]:
# Converting the json data retrieved from the API to a dataframe. 
all_pop_df=pd.DataFrame(all_pop_results,columns=results_cols)

#Pulled the column names from the first row of information.
all_pop_df.columns=all_pop_df.iloc[0]

#Removed the first row from the dataframe.
all_pop_df=all_pop_df.tail(-1)

In [7]:
#Merged the two dataframes on county so the dataframe has total population and over 65 population.
pop_percent_df=pd.merge(all_pop_df,age_65_up_df,on='county',how='outer')

In [8]:
#Split the name column into the columns County and State.
pop_percent_df[['County','State']]=pop_percent_df['NAME_x'].str.split(', ',expand=True)

#Removed unnecessary columns from the dataframe.
pop_percent_df.drop(columns=['NAME_x','NAME_y','AGEGROUP_x','AGEGROUP_y','state_x','state_y','State'],inplace=True)

In [9]:
#Formatted the column names.
pop_percent_df=pop_percent_df.rename(columns={'county':'County_ID','POP_x':'Total_Population','POP_y':'Elderly_Population','state':'State_ID'})

#Set up the column datatypes.
data_types={'Total_Population':'int','County_ID':'int','Elderly_Population':'int','County':'str'}

#Changed the selected column datatypes.
pop_percent_df=pop_percent_df.astype(data_types)

In [10]:
#Created a new column for the percent of elderly population in a county.
#Formatted the new column to be a float with two decimal places.
pop_percent_df['Percent_Elderly']=round((pop_percent_df['Elderly_Population']/pop_percent_df['Total_Population'])*100,2)

#Removed the word 'County' from all of the county names, for uniformity.
pop_percent_df['County'] = pop_percent_df['County'].str.replace(' County', '')

In [19]:
#Set up the url needed to pull hospitals in florida from the Geoapify API.
hospital_url = "https://api.geoapify.com/v2/places?categories=healthcare.hospital&filter=place:512bfac038c5a254c059f90f01cee96b3c40f00101f9010279020000000000920307466c6f72696461&limit=300&apiKey="+geoapify_key    

#Pulled hospital information from the API and converted to json format.
hospital_response = requests.get(hospital_url).json()

#Appended a dictionary of select hospital information to a list to convert to a dataframe.
hospital_information = []
features = hospital_response["features"]

for feature in features:
    properties = feature['properties']
    hospital_name = properties.get('name')
    hospital_county = properties.get('county')
    hospital_city = properties.get('city')
    hospital_lat = properties.get('lat')  
    hospital_lon = properties.get('lon')   

    # Check if both keys are defined before appending to hospital_information
    if hospital_name is not None and hospital_county is not None:
        hospital_information.append({'Name': hospital_name, 'City': hospital_city,'County':hospital_county,'Latitude':hospital_lat,'Longitude':hospital_lon})
hospital_df=pd.DataFrame(hospital_information)

#Created a new column 'ID' to be used as the primary key for the SQLite database.
hospital_df['ID'] = range(1, len(hospital_df) + 1)

#Removed the word 'County' from the county column, for uniformity.
hospital_df['County'] = hospital_df['County'].str.replace(' County', '')

Unnamed: 0,Name,City,County,Latitude,Longitude,ID
0,Johns Hopkins All Children's Hospital,Saint Petersburg,Pinellas,27.762944,-82.639751,1
1,Tampa General Hospital,Tampa,Hillsborough,27.937501,-82.4589,2
2,Morton Plant North Bay Hospital,New Port Richey,Pasco,28.254698,-82.714225,3
3,AdventHealth Tampa,Tampa,Hillsborough,28.071491,-82.421897,4
4,Saint Joseph's Hospital,Tampa,Hillsborough,27.98275,-82.491464,5


In [15]:
#Converted the population information dataframe to a sqlite database with County_ID as the primary key.
conn = sqlite3.connect('florida_info_db.sqlite')
pop_percent_df.to_sql('elderly_people', conn, index=False, if_exists='replace', dtype={'County_ID': 'INTEGER PRIMARY KEY'})


67

In [20]:
#Converted the hospital dataframe to a SQLite database with ID as the primary key.
hospital_df.to_sql('hospitals', conn, index=False, if_exists='replace', dtype={'ID': 'INTEGER PRIMARY KEY'})

85