In [27]:
import pandas as pd
import csv
import json
import requests
import os
import pymongo
from pymongo import MongoClient
from sqlalchemy import create_engine
from pandas.io.json import json_normalize

In [28]:
hci_low_income = pd.read_csv("Data/hci_low_income.csv")
hci_low_income = hci_low_income[["race_eth_name","geoname","county_name","fam_lt_lw","families","livingwage"]]
hci_low_income_clean = hci_low_income.dropna(how="any", axis = 0)
hci_low_income_clean = hci_low_income_clean.rename(columns={"race_eth_name": "race", "geoname": "geoname","county_name": "county", "fam_lt_lw":"families_below_the_living_wage", "families": "number_of_families", "livingwage": "living_wage"})
hci_low_income_clean.head()

Unnamed: 0,race,geoname,county,families_below_the_living_wage,number_of_families,living_wage
9,AIAN,Alameda,Alameda,71.0,320.0,22.64
10,Asian,Alameda,Alameda,5735.0,41863.0,22.64
11,AfricanAm,Alameda,Alameda,1149.0,6431.0,22.64
12,Latino,Alameda,Alameda,9281.0,25776.0,22.64
13,NHOPI,Alameda,Alameda,118.0,1046.0,22.64


In [35]:
hci_low_income_clean_grouped = hci_low_income_clean.groupby(["county"]).mean().reset_index()
hci_low_income_clean_grouped["percent_of_families_below_the_living_wage"] = hci_low_income_clean_grouped["families_below_the_living_wage"]/hci_low_income_clean_grouped["number_of_families"] *100
hci_low_income_clean_grouped = hci_low_income_clean_grouped.sort_values("percent_of_families_below_the_living_wage", ascending = False)
hci_low_income_clean_grouped = hci_low_income_clean_grouped.reset_index()
del hci_low_income_clean_grouped['index']
hci_low_income_clean_grouped = hci_low_income_clean_grouped.round({'families_below_the_living_wage': 2, 'number_of_families': 2, 'living_wage':2, 'percent_of_families_below_the_living_wage':2})
# hci_low_income_clean_grouped = hci_low_income_clean_grouped.reset_index(drop=True)
hci_low_income_clean_grouped

Unnamed: 0,county,families_below_the_living_wage,number_of_families,living_wage,percent_of_families_below_the_living_wage
0,Colusa,266.08,524.83,20.8,50.7
1,Lake,227.06,457.14,21.13,49.67
2,Modoc,117.5,237.62,20.55,49.45
3,Plumas,156.31,317.75,21.14,49.19
4,Tehama,355.64,727.0,20.34,48.92
5,Imperial,752.53,1569.94,20.83,47.93
6,Trinity,73.07,153.14,20.37,47.71
7,Mendocino,358.65,761.73,21.74,47.08
8,Siskiyou,151.66,325.75,20.27,46.56
9,Del Norte,124.6,268.9,20.71,46.34


In [36]:
hci_low_income_clean_grouped.to_csv("Data/cleaned_lowincome.csv", encoding="utf-8", index=False, header=True)

In [37]:
hci_low_income_clean_race = hci_low_income_clean.groupby(["race", "county"]).mean()
hci_low_income_clean_race["percent_of_families_below_the_living_wage"] = (hci_low_income_clean_race["families_below_the_living_wage"]/
                            hci_low_income_clean_race["number_of_families"])*100
hci_low_income_clean_race = hci_low_income_clean_race.reset_index()
hci_low_income_clean_race = hci_low_income_clean_race.round({'living_wage': 2,'percent_of_families_below_the_living_wage':2})
hci_low_income_clean_race.head()

Unnamed: 0,race,county,families_below_the_living_wage,number_of_families,living_wage,percent_of_families_below_the_living_wage
0,AIAN,Alameda,158.5,285.0,24.74,55.61
1,AIAN,Butte,56.0,91.5,20.88,61.2
2,AIAN,Contra Costa,23.0,125.5,24.74,18.33
3,AIAN,Del Norte,0.0,22.0,20.71,0.0
4,AIAN,El Dorado,3.5,66.0,22.26,5.3


In [38]:
hci_low_income_clean_race.to_csv("Data/cleaned_lowincomegrouped.csv", encoding="utf-8", index=False, header=True)

In [39]:
#Load the dataframes to Mongo DB using a new function

def write_df_to_mongoDB(  my_df,\
                          database_name = 'mydatabasename' ,\
                          collection_name = 'mycollectionname',
                          server = 'localhost',\
                          mongodb_port = 27017,\
                          chunk_size = 100):

    client = MongoClient('localhost',int(mongodb_port))
    db = client[database_name]
    collection = db[collection_name]
    # To write
    collection.delete_many({})  # Destroy the collection
    #aux_df=aux_df.drop_duplicates(subset=None, keep='last') # To avoid repetitions
    my_list = my_df.to_dict('records')
    l =  len(my_list)
    ran = range(l)
    steps=list(ran[chunk_size::chunk_size])
    steps.append([l])

    # Insert chunks of the dataframe
    i = 0
    for j in steps:
        if type(j) != list:
            collection.insert_many(my_list[i:j]) 
        i = j

    print('Done')
    return

In [40]:
ca_counties = pd.read_json('Data/ca_counties.json', encoding = "ISO-8859-1")
ca_counties.head()

Unnamed: 0,type,features
0,FeatureCollection,"{'type': 'Feature', 'properties': {'GEO_ID': '..."
1,FeatureCollection,"{'type': 'Feature', 'properties': {'GEO_ID': '..."
2,FeatureCollection,"{'type': 'Feature', 'properties': {'GEO_ID': '..."
3,FeatureCollection,"{'type': 'Feature', 'properties': {'GEO_ID': '..."
4,FeatureCollection,"{'type': 'Feature', 'properties': {'GEO_ID': '..."


In [9]:
medically_underserved_data = json.load(open('Data/Medically_Underserved_Areas.geojson'))
medically_underserved_df = pd.DataFrame(medically_underserved_data["features"])
medically_underserved_df.head()

Unnamed: 0,type,properties,geometry
0,Feature,"{'OBJECTID': 1, 'OBJECTID_1': 198, 'MuaSrcID':...","{'type': 'Polygon', 'coordinates': [[[-121.495..."
1,Feature,"{'OBJECTID': 2, 'OBJECTID_1': 199, 'MuaSrcID':...","{'type': 'Polygon', 'coordinates': [[[-121.571..."
2,Feature,"{'OBJECTID': 3, 'OBJECTID_1': 200, 'MuaSrcID':...","{'type': 'Polygon', 'coordinates': [[[-121.621..."
3,Feature,"{'OBJECTID': 4, 'OBJECTID_1': 201, 'MuaSrcID':...","{'type': 'Polygon', 'coordinates': [[[-124.316..."
4,Feature,"{'OBJECTID': 5, 'OBJECTID_1': 202, 'MuaSrcID':...","{'type': 'Polygon', 'coordinates': [[[-119.731..."


In [10]:
with open('Data/medical_county.json') as json_data:
    data = json.load(json_data)
medical_county = pd.DataFrame(data['records'])
medical_county = pd.DataFrame(medical_county.rename(columns={0: 'County',1: 'Provider',2: 'Address',3: 'City',
                4: 'State',5: 'Zip',6: 'Phone',7: 'Website',8: 'Category',9: 'Type',10: 'Services',
                11: 'Latitute',12: 'Longitude',13: 'Location'}))
medical_county.head()

Unnamed: 0,County,Provider,Address,City,State,Zip,Phone,Website,Category,Type,Services,Latitute,Longitude,Location
0,Alameda,Oakland Community Action Partnership,"150 Frank H. Ogawa Plaza 4th Floor, Suite 4340",Oakland,CA,94612,(510) 238-2362,www.AC-OCAP.com,Community Services,Community Services,Child/Youth Services,37.8052939,-122.2711085,"(37.805293900000002, -122.2711085)"
1,Alameda,Oakland Community Action Partnership,"150 Frank H. Ogawa Plaza 4th Floor, Suite 4340",Oakland,CA,94612,(510) 238-2362,www.AC-OCAP.com,Community Services,Community Services,Education Services,37.8052939,-122.2711085,"(37.805293900000002, -122.2711085)"
2,Alameda,Oakland Community Action Partnership,"150 Frank H. Ogawa Plaza 4th Floor, Suite 4340",Oakland,CA,94612,(510) 238-2362,www.AC-OCAP.com,Community Services,Community Services,Emergency Services,37.8052939,-122.2711085,"(37.805293900000002, -122.2711085)"
3,Alameda,Oakland Community Action Partnership,"150 Frank H. Ogawa Plaza 4th Floor, Suite 4340",Oakland,CA,94612,(510) 238-2362,www.AC-OCAP.com,Community Services,Community Services,Employment Services,37.8052939,-122.2711085,"(37.805293900000002, -122.2711085)"
4,Alameda,Oakland Community Action Partnership,"150 Frank H. Ogawa Plaza 4th Floor, Suite 4340",Oakland,CA,94612,(510) 238-2362,www.AC-OCAP.com,Community Services,Community Services,Health Services,37.8052939,-122.2711085,"(37.805293900000002, -122.2711085)"


In [11]:
#Use above function to create a collection to MongoDB

write_df_to_mongoDB(ca_counties,\
                          database_name = 'HealthcareProject' ,\
                          collection_name = 'ca_counties',
                          server = 'localhost',\
                          mongodb_port = 27017,\
                          chunk_size = 100)

Done


In [12]:
write_df_to_mongoDB(medical_county,\
                          database_name = 'HealthcareProject' ,\
                          collection_name = 'medical_county',
                          server = 'localhost',\
                          mongodb_port = 27017,\
                          chunk_size = 10) 

Done


In [13]:
write_df_to_mongoDB(medically_underserved_df,\
                          database_name = 'HealthcareProject' ,\
                          collection_name = 'medically_underserved_df',
                          server = 'localhost',\
                          mongodb_port = 27017,\
                          chunk_size = 100)

Done


In [18]:
# Upload CSV files to SQL
rds_connection_string = "admin2:12345@localhost:5432/Healthcare_LowIncome"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [19]:
# CREATE TABLE low_income(
# 	County VARCHAR PRIMARY KEY,
# 	Families_Below_the_Living_Wage INT,
# 	Number_of_Families INT,
# 	Living_Wage INT,
# 	Percent_of_Families_Below_the_Living_Wage INT
# );

In [20]:
# CREATE TABLE low_income_race(
# 	Race VARCHAR,
# 	County VARCHAR,
# 	Families_Below_the_Living_Wage INT,
# 	Number_of_Families INT,
# 	Living_Wage INT,
# 	Percent_of_Families_Below_the_Living_Wage INT,
# 	FOREIGN KEY (County) REFERENCES low_income(County)
# );

In [21]:
engine.table_names()

['low_income', 'low_income_race']

In [22]:
hci_low_income_clean_grouped.to_sql(name='low_income', con=engine, if_exists='append', index=False)

In [23]:
hci_low_income_clean_race.to_sql(name='low_income_race', con=engine, if_exists='append', index=False)

In [24]:
pd.read_sql_query('select * from low_income', con=engine).head()

Unnamed: 0,county,families_below_the_living_wage,number_of_families,living_wage,percent_of_families_below_the_living_wage
0,Colusa,266,525,21,51
1,Lake,227,457,21,50
2,Modoc,118,238,21,49
3,Plumas,156,318,21,49
4,Tehama,356,727,20,49


In [25]:
pd.read_sql_query('select * from low_income_race', con=engine).head()

Unnamed: 0,race,county,families_below_the_living_wage,number_of_families,living_wage,percent_of_families_below_the_living_wage
0,AIAN,Alameda,159,285,25,56.0
1,AIAN,Butte,56,92,21,61.0
2,AIAN,Contra Costa,23,126,25,18.0
3,AIAN,Del Norte,0,22,21,0.0
4,AIAN,El Dorado,4,66,22,5.0
