## health_care_facilities.json

## Data loading

### Subtask:
Load the "health_care_facilities.json" file into a GeoDataFrame using GeoPandas.


In [11]:
import geopandas as gpd
import pandas as pd
import plotly.express as px
import json
from IPython.display import display

try:
    # Load the GeoJSON file
    gdf = gpd.read_file("/content/health_care_facilities.json")

    # Ensure the CRS is set correctly
    if gdf.crs is None:
        gdf.set_crs(epsg=4326, inplace=True)
    else:
        gdf = gdf.to_crs(epsg=4326)

    # Extract latitude and longitude
    gdf["longitude"] = gdf.geometry.x
    gdf["latitude"] = gdf.geometry.y

    # Display sample data
    display(gdf.head())
except Exception as e:
    print(f"An error occurred: {e}")
    gdf = None

Unnamed: 0,OBJECTID,TYPE,FLU_CASES,ADDRESS,CITY,ZIP,NAME,x,y,geometry,longitude,latitude
0,1,Nursing homes,50,506 s jackson,Ritzville,99169,Life care center of ritzville,"118° 22' 38.81477376"" W","47° 07' 17.98509752"" N",POINT (-118.37745 47.12166),-118.377449,47.121663
1,2,Nursing homes,39,495 north 13th street,Othello,99344,"Avalon care center - othello, llc","119° 09' 22.35944555"" W","46° 49' 51.91884130"" N",POINT (-119.15621 46.83109),-119.156211,46.831089
2,3,Nursing homes,90,1242 11th st,Clarkston,99403,Prestige care & rehabilitation - clarkston,"117° 03' 07.61724119"" W","46° 24' 23.24267987"" N",POINT (-117.05212 46.40646),-117.052116,46.406456
3,4,Nursing homes,104,44 goethals drive,Richland,99352,Life care center of richland,"119° 16' 53.98656280"" W","46° 15' 41.92346282"" N",POINT (-119.28166 46.26165),-119.281663,46.261645
4,5,Nursing homes,136,1508 west 7th avenue,Kennewick,99336,Life care center of kennewick,"119° 08' 21.45593089"" W","46° 12' 08.00065524"" N",POINT (-119.13929 46.20222),-119.139293,46.202222


## Data exploration

In [12]:
# Print the shape of the GeoDataFrame
print("Shape of the GeoDataFrame:", gdf.shape)

# Print the data types of each column
print("\nData types of each column:")
print(gdf.dtypes)

# Calculate and print descriptive statistics for relevant numerical columns
print("\nDescriptive statistics for numerical columns:")
numerical_cols = ['FLU_CASES', 'ZIP', 'x', 'y']  # Specify numerical columns
display(gdf[numerical_cols].describe())

# Print the names of columns relevant for creating maps
print("\nColumns relevant for mapping:")
relevant_cols = ['geometry', 'TYPE', 'FLU_CASES', 'CITY', 'ZIP'] # Example relevant columns
print(relevant_cols)

Shape of the GeoDataFrame: (2726, 12)

Data types of each column:
OBJECTID        int32
TYPE           object
FLU_CASES       int16
ADDRESS        object
CITY           object
ZIP            object
NAME           object
x              object
y              object
geometry     geometry
longitude     float64
latitude      float64
dtype: object

Descriptive statistics for numerical columns:


Unnamed: 0,FLU_CASES
count,2726.0
mean,189.169112
std,143.436122
min,0.0
25%,70.0
50%,148.5
75%,286.0
max,552.0



Columns relevant for mapping:
['geometry', 'TYPE', 'FLU_CASES', 'CITY', 'ZIP']


## Data wrangling

### Subtask:
 Handle missing values in the 'TYPE' column.


In [13]:

# Identify and count missing values in the 'TYPE' column
missing_type_count_before = gdf['TYPE'].isnull().sum()
print(f"Number of missing values in 'TYPE' before handling: {missing_type_count_before}")

Number of missing values in 'TYPE' before handling: 0


## Data visualization



In [14]:
import plotly.express as px

# Ensure gdf is valid before plotting
if gdf is not None and "longitude" in gdf.columns and "latitude" in gdf.columns:
    fig = px.scatter_mapbox(gdf,
                            lat="latitude",
                            lon="longitude",
                            color="TYPE",
                            hover_name="NAME",
                            hover_data=["ADDRESS", "CITY", "FLU_CASES"],
                            zoom=4,
                            height=600,
                            title="Healthcare Facilities in Washington",
                            mapbox_style="carto-positron")

    # Customize the map layout
    fig.update_layout(
        title_font_size=20,
        title_x=0.5,
        margin={"r": 0, "t": 50, "l": 0, "b": 0},
        font=dict(size=12),
        mapbox=dict(
            style="carto-positron",
            center=dict(lat=47.0, lon=-120.5),
            zoom=5.5,
        ),
    )

    fig.show()
else:
    print("Error: GeoDataFrame is empty or missing coordinate columns.")

In [15]:
import plotly.express as px

# Ensure gdf is valid before plotting
if gdf is not None and "longitude" in gdf.columns and "latitude" in gdf.columns:
    fig = px.density_mapbox(gdf, lat="latitude", lon="longitude", z="FLU_CASES", radius=10,
                            center=dict(lat=47.0, lon=-120.5), zoom=5,
                            mapbox_style="carto-positron", color_continuous_scale="Viridis")

    # Customize the map layout
    fig.update_layout(
        title="Density of Healthcare Facilities by Flu Cases",
        title_x=0.5,
        title_font_size=20,
        margin={"r": 0, "t": 50, "l": 0, "b": 0},
        font=dict(size=12),
        mapbox=dict(
            style="carto-positron",
            center=dict(lat=47.0, lon=-120.5),
            zoom=5.5,
        ),
    )

    fig.show()
else:
    print("Error: GeoDataFrame is empty or missing coordinate columns.")