<a href="https://colab.research.google.com/github/sonleh96/wb-gpbp-ldt/blob/dev-ghinwa/Diversity_Health_Services_District.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Measures Diversity of Health Services on the District Level


In [None]:
!pip install google-cloud-storage
!pip install gcsfs
import pandas as pd
import gcsfs
import geopandas as gpd
from collections import Counter
import numpy as np



In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
from google.cloud import storage

# Create a client
client = storage.Client()

#Access the Google Bucket
bucket_name = 'wb-ldt'
bucket = client.get_bucket(bucket_name)

**Shape File for Serbia**

In [None]:
#Name of Shape File
file_path = "shapefiles/gadm41_SRB_2.json"
gcs_file_path = 'gs://' + bucket_name + '/' + file_path

#Read Shape File --> The shape file gives a MultiPolygon Geometry Column
gdf = gpd.read_file(gcs_file_path)

#Adjust for GeoSpatial Data
center = gpd.GeoDataFrame(gdf[['GID_1', 'NAME_1']])

#Change the MultiPolygon Geometry Column to make it more useful
center['geometry'] = gdf.centroid
center = center.to_crs(gdf.crs)
center['lat'] = center.geometry.y
center['lon'] = center.geometry.x
#center


  center['geometry'] = gdf.centroid
  center['geometry'] = gdf.centroid


**Extract Healthcare Facilities Data using OverPass API**

In [None]:
import requests

# Overpass API endpoint
overpass_url = "http://overpass-api.de/api/interpreter"

# Overpass Query: Fetching all healthcare-related facilities in Serbia
overpass_query = """
[out:json];
area["ISO3166-1"="RS"]->.searchArea;
(
  node["healthcare"](area.searchArea);
  way["healthcare"](area.searchArea);
  relation["healthcare"](area.searchArea);

  node["amenity"="hospital"](area.searchArea);
  way["amenity"="hospital"](area.searchArea);
  relation["amenity"="hospital"](area.searchArea);

  node["amenity"="clinic"](area.searchArea);
  way["amenity"="clinic"](area.searchArea);
  relation["amenity"="clinic"](area.searchArea);

  node["amenity"="pharmacy"](area.searchArea);
  way["amenity"="pharmacy"](area.searchArea);
  relation["amenity"="pharmacy"](area.searchArea);

  node["amenity"="dentist"](area.searchArea);
  way["amenity"="dentist"](area.searchArea);
  relation["amenity"="dentist"](area.searchArea);
);
out center;
"""

# Make the request to Overpass API
response = requests.get(overpass_url, params={'data': overpass_query})
data = response.json()

# Convert the response to a Pandas DataFrame
df_facilities = pd.DataFrame(data['elements'])

# Extract the 'name', 'amenity/healthcare', 'lat', 'lon', 'type' of facility
df_facilities['name'] = df_facilities['tags'].apply(lambda x: x.get('name', None))
df_facilities['amenity_or_healthcare'] = df_facilities['tags'].apply(
    lambda x: x.get('amenity', x.get('healthcare', None))
)

# Filter necessary columns: 'id', 'lat', 'lon', 'name', 'amenity_or_healthcare', and deduplicate
df_facilities = df_facilities[['id', 'lat', 'lon', 'name', 'amenity_or_healthcare']].drop_duplicates()

**Connect Healthcare Data to Shape File**

In [None]:
#GeoSpatial DataFrame
df_facilities = gpd.GeoDataFrame(df_facilities, geometry=gpd.points_from_xy(df_facilities.lon, df_facilities.lat))
df_facilities = df_facilities.set_crs(gdf.crs)

#Join
serbia_facilities = gpd.sjoin(df_facilities, gdf, predicate='within')

In [None]:
len(serbia_facilities)

1812

**Calculate the Shannon Entropy Index for Each Municipality**

In [None]:
diversity_healthcare = list()
year = '2022'

In [None]:
districts = gdf['GID_1'].unique()

for district in districts:
  serbia_facilities_temp = serbia_facilities[serbia_facilities['GID_1'] == district]

  type_counts = serbia_facilities_temp['amenity_or_healthcare'].value_counts()

  # Step 2: Convert counts to proportions
  total_facilities = type_counts.sum()
  proportions = type_counts / total_facilities

  # Step 3: Calculate Shannon Diversity Index
  shannon_diversity = -np.sum(proportions * np.log(proportions))

  diversity_healthcare.append([district, year, shannon_diversity])

In [None]:
#Save into Data Frame
columns = ['GID_1', 'Year', 'healthcare-facilities-diversity']

# Create DataFrame
df = pd.DataFrame(diversity_healthcare, columns=columns)

df = df.merge(center[['GID_1', 'NAME_1']],
              how = 'left')

# Define new column order
new_column_order = ['GID_1', 'NAME_1', 'Year', 'healthcare-facilities-diversity']

# Reorder columns
df = df[new_column_order]

In [None]:
len(df['GID_1'].unique())

25

In [None]:
#Save the HealthCare Facilities in Serbia CSV file
from io import BytesIO

# Convert DataFrame to CSV in memory
csv_buffer = BytesIO()
df.to_csv(csv_buffer, index=False)

# Move the buffer's position to the beginning
csv_buffer.seek(0)

def upload_csv_to_gcs(bucket_name, destination_blob_name, file_buffer):
    """Uploads a file from a buffer to Google Cloud Storage."""
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    # Upload the file-like object (buffer)
    blob.upload_from_file(file_buffer, content_type='text/csv')
    print(f"File uploaded to {destination_blob_name}.")

# Define your GCS bucket name and destination path
destination_blob_name = 'RS/processed-data-district/SRB_healthcarefacilities_diversity.csv'

# Upload the CSV from the buffer directly
upload_csv_to_gcs(bucket_name, destination_blob_name, csv_buffer)

File uploaded to RS/processed-data-district/SRB_healthcarefacilities_diversity.csv.
