In [5]:
# Dependencies
import pandas as pd
from pathlib import Path
import pprint
pp = pprint.PrettyPrinter(indent=4)

In [6]:
# Store filepath in a variable
file = Path("us_hospital_locations.csv")
hospitals_df = pd.read_csv(file)

hospitals_df.head()

Unnamed: 0,X,Y,FID,ID,NAME,ADDRESS,CITY,STATE,ZIP,ZIP4,...,VAL_DATE,WEBSITE,STATE_ID,ALT_NAME,ST_FIPS,OWNER,TTL_STAFF,BEDS,TRAUMA,HELIPAD
0,-13318890.0,4346975.0,1,5793230,CENTRAL VALLEY GENERAL HOSPITAL,1025 NORTH DOUTY STREET,HANFORD,CA,93230,NOT AVAILABLE,...,2014/02/10 00:00:00,http://www.hanfordhealth.com,NOT AVAILABLE,NOT AVAILABLE,6,PROPRIETARY,-999,49,NOT AVAILABLE,N
1,-13226510.0,4049626.0,2,53391362,LOS ROBLES HOSPITAL & MEDICAL CENTER - EAST CA...,150 VIA MERIDA,WESTLAKE VILAGE,CA,91362,NOT AVAILABLE,...,2014/02/10 00:00:00,http://www.losrobleshospital.com,NOT AVAILABLE,NOT AVAILABLE,6,PROPRIETARY,-999,62,NOT AVAILABLE,N
2,-13156200.0,4031978.0,3,11190023,EAST LOS ANGELES DOCTORS HOSPITAL,4060 WHITTIER BOULEVARD,LOS ANGELES,CA,90023,NOT AVAILABLE,...,2014/02/10 00:00:00,http://www.elalax.com,NOT AVAILABLE,NOT AVAILABLE,6,PROPRIETARY,-999,127,NOT AVAILABLE,N
3,-13171900.0,4041752.0,4,17090028,SOUTHERN CALIFORNIA HOSPITAL AT HOLLYWOOD,6245 DE LONGPRE AVENUE,HOLLYWOOD,CA,90028,NOT AVAILABLE,...,2014/02/10 00:00:00,http://sch-hollywood.com/,NOT AVAILABLE,HOLLYWOOD COMMUNITY HOSPITAL OF HOLLYWOOD,6,PROPRIETARY,-999,100,NOT AVAILABLE,N
4,-13132080.0,4037270.0,5,23691706,KINDRED HOSPITAL BALDWIN PARK,14148 FRANCISQUITO AVENUE,BALDWIN PARK,CA,91706,NOT AVAILABLE,...,2014/02/10 00:00:00,http://www.khbaldwinpark.com,NOT AVAILABLE,NOT AVAILABLE,6,PROPRIETARY,-999,95,NOT AVAILABLE,N


In [7]:
hospitals_df.columns

Index(['X', 'Y', 'FID', 'ID', 'NAME', 'ADDRESS', 'CITY', 'STATE', 'ZIP',
       'ZIP4', 'TELEPHONE', 'TYPE', 'STATUS', 'POPULATION', 'COUNTY',
       'COUNTYFIPS', 'COUNTRY', 'LATITUDE', 'LONGITUDE', 'NAICS_CODE',
       'NAICS_DESC', 'SOURCE', 'SOURCEDATE', 'VAL_METHOD', 'VAL_DATE',
       'WEBSITE', 'STATE_ID', 'ALT_NAME', 'ST_FIPS', 'OWNER', 'TTL_STAFF',
       'BEDS', 'TRAUMA', 'HELIPAD'],
      dtype='object')

In [8]:
# Choosing columns of interest for our project
cleaned_hospitals_df = hospitals_df[['ID','LATITUDE','LONGITUDE','NAME','CITY','STATE','COUNTY','COUNTRY', 'STATUS']]
cleaned_hospitals_df.head()

Unnamed: 0,ID,LATITUDE,LONGITUDE,NAME,CITY,STATE,COUNTY,COUNTRY,STATUS
0,5793230,36.336159,-119.645667,CENTRAL VALLEY GENERAL HOSPITAL,HANFORD,CA,KINGS,USA,CLOSED
1,53391362,34.154939,-118.815736,LOS ROBLES HOSPITAL & MEDICAL CENTER - EAST CA...,WESTLAKE VILAGE,CA,VENTURA,USA,OPEN
2,11190023,34.023647,-118.184165,EAST LOS ANGELES DOCTORS HOSPITAL,LOS ANGELES,CA,LOS ANGELES,USA,OPEN
3,17090028,34.096391,-118.325235,SOUTHERN CALIFORNIA HOSPITAL AT HOLLYWOOD,HOLLYWOOD,CA,LOS ANGELES,USA,OPEN
4,23691706,34.063039,-117.967438,KINDRED HOSPITAL BALDWIN PARK,BALDWIN PARK,CA,LOS ANGELES,USA,OPEN


In [9]:
cleaned_hospitals_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7596 entries, 0 to 7595
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   ID         7596 non-null   int64  
 1   LATITUDE   7596 non-null   float64
 2   LONGITUDE  7596 non-null   float64
 3   NAME       7596 non-null   object 
 4   CITY       7596 non-null   object 
 5   STATE      7596 non-null   object 
 6   COUNTY     7596 non-null   object 
 7   COUNTRY    7596 non-null   object 
 8   STATUS     7596 non-null   object 
dtypes: float64(2), int64(1), object(6)
memory usage: 534.2+ KB


In [10]:
# Check for NULL values in the 'COUNTY' column
null_county_values = cleaned_hospitals_df.loc[cleaned_hospitals_df['COUNTY'].isnull()]

# Display the rows with NULL values, if any.
print(null_county_values)

Empty DataFrame
Columns: [ID, LATITUDE, LONGITUDE, NAME, CITY, STATE, COUNTY, COUNTRY, STATUS]
Index: []


In [13]:
# Removing Hospitals with a STATUS of "CLOSED".
filtered_hospitals_df = cleaned_hospitals_df[cleaned_hospitals_df['STATUS'] != 'CLOSED']
filtered_hospitals_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7277 entries, 1 to 7595
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   ID         7277 non-null   int64  
 1   LATITUDE   7277 non-null   float64
 2   LONGITUDE  7277 non-null   float64
 3   NAME       7277 non-null   object 
 4   CITY       7277 non-null   object 
 5   STATE      7277 non-null   object 
 6   COUNTY     7277 non-null   object 
 7   COUNTRY    7277 non-null   object 
 8   STATUS     7277 non-null   object 
dtypes: float64(2), int64(1), object(6)
memory usage: 568.5+ KB


In [14]:
filtered_hospitals_df.head()

Unnamed: 0,ID,LATITUDE,LONGITUDE,NAME,CITY,STATE,COUNTY,COUNTRY,STATUS
1,53391362,34.154939,-118.815736,LOS ROBLES HOSPITAL & MEDICAL CENTER - EAST CA...,WESTLAKE VILAGE,CA,VENTURA,USA,OPEN
2,11190023,34.023647,-118.184165,EAST LOS ANGELES DOCTORS HOSPITAL,LOS ANGELES,CA,LOS ANGELES,USA,OPEN
3,17090028,34.096391,-118.325235,SOUTHERN CALIFORNIA HOSPITAL AT HOLLYWOOD,HOLLYWOOD,CA,LOS ANGELES,USA,OPEN
4,23691706,34.063039,-117.967438,KINDRED HOSPITAL BALDWIN PARK,BALDWIN PARK,CA,LOS ANGELES,USA,OPEN
5,25190712,33.859707,-118.148403,LAKEWOOD REGIONAL MEDICAL CENTER,LAKEWOOD,CA,LOS ANGELES,USA,OPEN


In [15]:
# Initialize an empty list to store dictionaries
hospital_list = []

# Iterate over the DataFrame rows
for index, row in filtered_hospitals_df.iterrows():
    hospital_dict = {'name': row['NAME'], 'info': row.drop('NAME').to_dict()}
    hospital_list.append(hospital_dict)

# Display the list of dictionaries
pp.pprint(hospital_list)

[   {   'info': {   'CITY': 'WESTLAKE VILAGE',
                    'COUNTRY': 'USA',
                    'COUNTY': 'VENTURA',
                    'ID': 53391362,
                    'LATITUDE': 34.1549388720001,
                    'LONGITUDE': -118.815736391,
                    'STATE': 'CA',
                    'STATUS': 'OPEN'},
        'name': 'LOS ROBLES HOSPITAL & MEDICAL CENTER - EAST CAMPUS'},
    {   'info': {   'CITY': 'LOS ANGELES',
                    'COUNTRY': 'USA',
                    'COUNTY': 'LOS ANGELES',
                    'ID': 11190023,
                    'LATITUDE': 34.023647302,
                    'LONGITUDE': -118.184164805,
                    'STATE': 'CA',
                    'STATUS': 'OPEN'},
        'name': 'EAST LOS ANGELES DOCTORS HOSPITAL'},
    {   'info': {   'CITY': 'HOLLYWOOD',
                    'COUNTRY': 'USA',
                    'COUNTY': 'LOS ANGELES',
                    'ID': 17090028,
                    'LATITUDE': 34.0963913570001,

In [16]:
# Export the DataFrame to a CSV file
filtered_hospitals_df.to_csv('cleaned_hospitals_data.csv', index=False)