## Environment Setup for Genetic Algorithm

In [1]:
# installing libraries
# %%capture --no-stderr
# %pip install shapely pyproj pyogrio geopandas[all]

In [2]:
# connect to goolge colab
from google.colab import drive
drive.mount('/content/drive')

# import pandas to read data
import pandas as pd

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# read data
data = pd.read_csv('/content/drive/MyDrive/SLU/2025 Spring/Week 6/Mo_pop_Sim.csv')

# 100 random samples from data - UR: Uraral/Urban , long: longitude, lat: latitude
data.sample(n = 100, random_state = 1)

Unnamed: 0,UR,long,lat
4570878,U,-90.344156,38.721628
3476120,R,-91.302490,38.856925
4994615,U,-90.313552,38.489079
5813344,R,-92.000587,37.406058
700996,R,-90.705070,36.921746
...,...,...,...
5736948,R,-93.314691,36.809588
2060904,U,-94.531707,38.920738
4177675,U,-90.499447,38.766409
2966350,U,-90.567699,38.229303


In [4]:
# check the directory
import os
path = '/content/drive/MyDrive/SLU/2025 Spring/Week 6/'
os.listdir(path)

['Mo_pop_Sim.csv',
 'MO_2018_Federally_Qualified_Health_Center_Locations.shp',
 'MO_2018_Federally_Qualified_Health_Center_Locations.prj',
 'MO_2018_Federally_Qualified_Health_Center_Locations.xml',
 'MO_2018_Federally_Qualified_Health_Center_Locations.cpg',
 'MO_2018_Federally_Qualified_Health_Center_Locations.dbf',
 'MO_2018_Federally_Qualified_Health_Center_Locations.shx',
 'assignment-starter.ipynb',
 'Untitled11.ipynb',
 'assignment-starter1.ipynb',
 'genetic-moth-1.ipynb',
 'genetic-moth-2.ipynb',
 'Week6_assignment.ipynb']

In [5]:
from shapely.geometry import Point
import geopandas as gpd

# read the shapefile with geopandas library
shapefile_path = '/content/drive/MyDrive/SLU/2025 Spring/Week 6/MO_2018_Federally_Qualified_Health_Center_Locations.shp'
gdf = gpd.read_file(shapefile_path)

# check the type of the data, ensuring its not pandas dataframe
print(type(gdf))

<class 'geopandas.geodataframe.GeoDataFrame'>


In [6]:
# check the coordinate reference system
print(gdf.crs)
# assign crs
crs = gdf.crs
# set the variable "OBJECTID" as index
gdf = gdf.set_index("OBJECTID")

EPSG:4326


In [7]:
# check the columns
print(gdf.columns)
# subset the geometry column which has latitude and longitude
fqhc_df = gdf['geometry']
fqhc_df

Index(['Group_Name', 'Facility', 'Address', 'City', 'County', 'State', 'Zip',
       'Phone', 'Latitude', 'Longitude', 'Loc_Code', 'geometry'],
      dtype='object')


Unnamed: 0_level_0,geometry
OBJECTID,Unnamed: 1_level_1
1,POINT (-90.55472 38.43597)
2,POINT (-91.13403 37.71462)
3,POINT (-92.60144 38.16025)
4,POINT (-90.45724 36.77261)
5,POINT (-94.49886 38.9629)
...,...
193,POINT (-91.77391 37.94594)
194,POINT (-93.29166 37.2243)
195,POINT (-91.56258 36.56534)
196,POINT (-90.23025 38.67777)


In [8]:
# In Missouri it is roughly 56 miles per degree of longitude and 69 miles per degree of latitude. We'll use an approximation of 63 miles per unit for our circle.

# calculate how many degrees of lat or longitude correspond to 1 mile in Missouri
DEGREES_PER_MILE = 1/ 63
# create buffer (circular area) of 30 miles around each FQHC location.
circles_geom = gdf.buffer( 30 * DEGREES_PER_MILE)
# create new Geo dataframe combining the original gdf data with new bugger geometry
fqhc_circles_df = gpd.GeoDataFrame(gdf, geometry=circles_geom, crs=crs)
fqhc_circles_df


  circles_geom = gdf.buffer( 30 * DEGREES_PER_MILE)


Unnamed: 0_level_0,Group_Name,Facility,Address,City,County,State,Zip,Phone,Latitude,Longitude,Loc_Code,geometry
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,COMTREA,COMTREA Byrnes Mill Health Center,100 Osage Executive Circle,House Springs,Jefferson,MO,63051,6367893372,38.435946,-90.554678,MAP,"POLYGON ((-90.07853 38.43597, -90.08082 38.389..."
2,Missouri Highlands Health Care,Viburnum Medical Clinic,18 Viburnum Center Road,Viburnum,Iron,MO,65566,5732445406,37.714620,-91.133983,MAP,"POLYGON ((-90.65784 37.71462, -90.66013 37.667..."
3,Central Ozarks Medical Center,Central Ozarks Medical Center At The Lake,3870 Columbia Avenue,Osage Beach,Miller,MO,65065,5733027490,38.160258,-92.601463,MAP,"POLYGON ((-92.12525 38.16025, -92.12754 38.113..."
4,Missouri Highlands Health Care,Missouri Highland Medical Clinic - Poplar Bluf...,"225 Physicians Park Drive, Suite 303",Poplar Bluff,Butler,MO,63901,5737856536,36.772568,-90.457206,MAP,"POLYGON ((-89.98105 36.77261, -89.98334 36.725..."
5,Swope Health Services,Swope Health Hickman Mills,8800 Blue Ridge Boulevard,Kansas City,Jackson,MO,64138,8163213200,38.962882,-94.498847,MAP,"POLYGON ((-94.02267 38.9629, -94.02496 38.9162..."
...,...,...,...,...,...,...,...,...,...,...,...,...
193,Your Community Health Center,Your Community Health Center - Health Department,"200 North Main, Suite G51",Rolla,Phelps,MO,65401,5734586950,37.945925,-91.773948,MAP,"POLYGON ((-91.29772 37.94594, -91.30001 37.899..."
194,Jordan Valley Community Health Center,Jordan Valley Community Health Center - Medica...,"1443 N. Roberson, Suite 505",Springfield,Greene,MO,65802,4178511554,37.224321,-93.291591,MAP,"POLYGON ((-92.81547 37.2243, -92.81776 37.1776..."
195,Southern Missouri Community Health Center,Oregon County Community Health Center,"US Highway 63 North (RR 3, Box 3703)",Thayer,Oregon,MO,65791,4172642990,36.565336,-91.562618,MAP,"POLYGON ((-91.08639 36.56534, -91.08869 36.518..."
196,CareSTL Health,Pope Avenue Health Center,4500 Pope Ave,St. Louis,St. Louis City,MO,63115,3143853990,38.677759,-90.230247,MAP,"POLYGON ((-89.75406 38.67777, -89.75636 38.631..."


In [9]:
def create_point_column(df, lon_col, lat_col, crs="EPSG:4326"):
    """
    Creates a geometry column of Point objects from longitude and latitude columns in a DataFrame.

    Args:
        df (pandas.DataFrame): The DataFrame containing longitude and latitude columns.
        lon_col (str): The name of the longitude column.
        lat_col (str): The name of the latitude column.
        crs (str, optional): Coordinate Reference System. Defaults to "EPSG:4326".

    Returns:
        geopandas.GeoDataFrame: A GeoDataFrame with the added geometry column.
    """
    # create a list of Point objects, pairing the lingitude and latitude values
    geometry = [Point(xy) for xy in zip(df[lon_col], df[lat_col])]
    # create a new Geo dataframe with updated geometry points
    gdf = gpd.GeoDataFrame(df, geometry=geometry, crs=crs)
    return gdf

In [10]:
# copy the original pandas dataframe
df = data.copy()
big_pop_df = df
# get random 1 percent of the rows
pop_df = big_pop_df.sample(frac=0.01)

In [11]:
pop_df

Unnamed: 0,UR,long,lat
979152,U,-94.575700,39.383144
703333,R,-90.715613,36.897098
4107195,U,-90.858473,38.783082
5137509,U,-90.297655,38.748177
982305,U,-94.415457,39.249146
...,...,...,...
1015842,R,-94.456253,39.338660
249152,U,-92.333448,38.982641
2945526,R,-90.365751,38.133654
5115721,U,-90.352043,38.541184


In [12]:
# apply the create_pount_column function defiend earlier
pop_gdf = create_point_column(pop_df, 'long', 'lat', crs)
pop_gdf

Unnamed: 0,UR,long,lat,geometry
979152,U,-94.575700,39.383144,POINT (-94.5757 39.38314)
703333,R,-90.715613,36.897098,POINT (-90.71561 36.8971)
4107195,U,-90.858473,38.783082,POINT (-90.85847 38.78308)
5137509,U,-90.297655,38.748177,POINT (-90.29766 38.74818)
982305,U,-94.415457,39.249146,POINT (-94.41546 39.24915)
...,...,...,...,...
1015842,R,-94.456253,39.338660,POINT (-94.45625 39.33866)
249152,U,-92.333448,38.982641,POINT (-92.33345 38.98264)
2945526,R,-90.365751,38.133654,POINT (-90.36575 38.13365)
5115721,U,-90.352043,38.541184,POINT (-90.35204 38.54118)


In [13]:
# check which population points are inside the buffer and group the points by OBJECTID
pop_in_circles = gpd.sjoin(pop_gdf, fqhc_circles_df, predicate='within').groupby('OBJECTID').count()
# dataframe that has the result of how many points are located inside the 30 mile radius of each health center
pop_in_circles

Unnamed: 0_level_0,UR,long,lat,geometry,Group_Name,Facility,Address,City,County,State,Zip,Phone,Latitude,Longitude,Loc_Code
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,19628,19628,19628,19628,19628,19628,19628,19628,19628,19628,19628,19628,19628,19628,19628
2,640,640,640,640,640,640,640,640,640,640,640,640,640,640,640
3,1332,1332,1332,1332,1332,1332,1332,1332,1332,1332,1332,1332,1332,1332,1332
4,905,905,905,905,905,905,905,905,905,905,905,905,905,905,905
5,10664,10664,10664,10664,10664,10664,10664,10664,10664,10664,10664,10664,10664,10664,10664
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473,1473
194,4646,4646,4646,4646,4646,4646,4646,4646,4646,4646,4646,4646,4646,4646,4646
195,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542
196,17271,17271,17271,17271,17271,17271,17271,17271,17271,17271,17271,17271,17271,17271,17271


In [14]:
# locate the nearest health center for each point
closest_fqhc = gpd.sjoin_nearest(pop_gdf, gdf)
closest_fqhc




Unnamed: 0,UR,long,lat,geometry,OBJECTID,Group_Name,Facility,Address,City,County,State,Zip,Phone,Latitude,Longitude,Loc_Code
979152,U,-94.575700,39.383144,POINT (-94.5757 39.38314),129,Samuel U. Rodgers Health Center,Samuel U. Rodgers Health Center-Clay County Fa...,800 Haines Drive,Liberty,Clay,MO,64068,8164130662,39.257031,-94.451666,MAP
703333,R,-90.715613,36.897098,POINT (-90.71561 36.8971),4,Missouri Highlands Health Care,Missouri Highland Medical Clinic - Poplar Bluf...,"225 Physicians Park Drive, Suite 303",Poplar Bluff,Butler,MO,63901,5737856536,36.772568,-90.457206,MAP
4107195,U,-90.858473,38.783082,POINT (-90.85847 38.78308),50,Compass Health,Compass Health/Crider Health Center (Behaviora...,1032 Crosswinds Court,Wentzville,St. Charles,MO,63385,6363326000,38.803048,-90.817251,MAP
5137509,U,-90.297655,38.748177,POINT (-90.29766 38.74818),35,Betty Jean Kerr People's Health Center,Betty Jean Kerr People's Health Centers North ...,11642 West Florissant Avenue,Florissant,St. Louis County-North,MO,63033,3148388220,38.777407,-90.280733,MAP
982305,U,-94.415457,39.249146,POINT (-94.41546 39.24915),129,Samuel U. Rodgers Health Center,Samuel U. Rodgers Health Center-Clay County Fa...,800 Haines Drive,Liberty,Clay,MO,64068,8164130662,39.257031,-94.451666,MAP
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
249152,U,-92.333448,38.982641,POINT (-92.33345 38.98264),67,Family Health Center,Family Dental Center,1101 North Providence Road,Columbia,Boone,MO,65203,5737778997,38.963038,-92.333919,MAP
2945526,R,-90.365751,38.133654,POINT (-90.36575 38.13365),53,COMTREA Comprehensive Health Center,COMTREA Comprehensive Health Center Administra...,227 East Main Street,Festus,Jefferson,MO,63028,6366779977,38.219992,-90.392666,MAP
5115721,U,-90.352043,38.541184,POINT (-90.35204 38.54118),36,Betty Jean Kerr People's Health Center,Betty Jean Kerr People's Health Centers West Site,7200 Manchester Road,Maplewood,St. Louis,MO,63143,3147819162,38.614255,-90.313396,MAP
3274979,R,-93.351547,39.816965,POINT (-93.35155 39.81697),161,Family Health Center,Family Dental Center @ Marceline,1600 N. Missouri Avenue,Marceline,Linn,MO,64658,6603768000,39.728467,-92.941267,MAP


In [15]:
# counts how many closest points are assigned to each points
count = closest_fqhc.groupby('OBJECTID').count()
count

Unnamed: 0_level_0,UR,long,lat,geometry,Group_Name,Facility,Address,City,County,State,Zip,Phone,Latitude,Longitude,Loc_Code
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,297,297,297,297,297,297,297,297,297,297,297,297,297,297,297
2,222,222,222,222,222,222,222,222,222,222,222,222,222,222,222
3,378,378,378,378,378,378,378,378,378,378,378,378,378,378,378
4,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119
5,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330,1330
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,535,535,535,535,535,535,535,535,535,535,535,535,535,535,535
194,786,786,786,786,786,786,786,786,786,786,786,786,786,786,786
195,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147
196,156,156,156,156,156,156,156,156,156,156,156,156,156,156,156


## Approach selection and Rationale

My choice is the second approach: selecting locations with the highest densities of residences within a 30-mile radius. This approach aims to focus on the FQHCs capable of maximum number of patients within specific range.

* Focusing on the areas with higher population densities can provide services in mental health to maximum number of patients in Missouri. This aspect is crucial as there are only 8 centers available (limited resources).

* There are higher chances of helping patients who are at mental risk when we are capable of having more patients.

* Efficient deployment of resources is capable when we can predict the location that needs resources.

## Assumptions
Here are some basic assumptions for computing the fitness of the location

* A higher number of people indicates better accessibility to the healthcare center.
* FHQCs with higher population densities are considered better clinics as they serve more patients.
* There must be no overalp of the healthcare centers.
* The random 1 percent of the population is enough to accuratley represent the whole population.

In [16]:
# importing necessary libraries
import random
import numpy as np

In [17]:
# convert the count data to dictionary for better data access
fqhc_coverage = {}
for i, r in pop_in_circles.iterrows():
  # population points
    fqhc_coverage[i] = r['geometry']

In [18]:
# get fqhc ids
all_fqhc_ids = list(fqhc_coverage.keys())
len(all_fqhc_ids)

197

In [19]:
# define parameters
pop_size = 20 # population size
num_gens = 50 # number of generations
mut_rate = 0.1 # mutation rate

## Metric
* This fitness function calculates the total number of population points within 30-mile radius of the selected FQHCs. One set of the FQHC sums up the coverage for each center.
* Maximizing the total value is the key of this metric, aligning with our selected approach.

In [20]:
# define fitness function
def fitness_function(selected_fqhcs):

  """
  Args: list of 8 FQHCs

  Return: the sum of population coverage for all FQHCs
   - If the FQHC id is not in dictionary, it makes the value to 0
  """
  return sum(fqhc_coverage.get(fqhc_id, 0) for fqhc_id in selected_fqhcs)

In [21]:
# create empty list
population = []

# randomly selects 8 fqhc ids
for _ in range(pop_size):
# 20 different solutsions
    population.append(random.sample(all_fqhc_ids, 8))

In [22]:
# genetic algorithm

# set the initial values of number of fqhcs, best solution, and fitness
num_fqhcs = 8
best_solution = None
best_fitness = 0

# 50 loops of generation
for gen in range(num_gens):
    # calculate fitness for each selected_fqhcs
    fitness_scores = [fitness_function(selected_fqhcs) for selected_fqhcs in population]

    # find best solution in this generation
    max_fitness = max(fitness_scores)
    max_index = fitness_scores.index(max_fitness)

    # update best solution
    if max_fitness > best_fitness:
        best_fitness = max_fitness
        best_solution = population[max_index]

    # create new population with updates
    updated_population = [population[max_index]]

    # create new solution for every generation
    while len(updated_population) < pop_size:
        # tournament selection getting the highest fitness socres for parents
        parent1 = max(random.sample(population, 4), key = fitness_function)
        parent2 = max(random.sample(population, 4), key = fitness_function)

        # crossover (random selection btw 1 to 7)
        crossover_point = random.randint(1, num_fqhcs - 1)

        # creating new solution (child)

        # takes the elements from parent1
        child = parent1[:crossover_point]

        # takes the remaining elements from parent2 that are not duplicated
        remainings = num_fqhcs - len(child)
        remainings_to_add = [fqhc for fqhc in parent2 if fqhc not in child][:remainings]
        child.extend(remainings_to_add)

        # mutation

        # 10 percent chance of mutation
        if random.random() < mut_rate:
            # selects one random position
            index_change = random.randint(0, num_fqhcs - 1)
            # list of remaining ids not in child
            remaining = [i for i in all_fqhc_ids if i not in child]
            # replace the ids in selected position if available
            if remaining:
                child[index_change] = random.choice(remaining)

        updated_population.append(child)

In [23]:
# print results
print(f'Best fitness: {best_fitness}')
print(f'Best solution: {best_solution}')

Best fitness: 76188
Best solution: [190, 147, 87, 26, 78, 36, 81, 27]


* The best fitness score is 76188, representing the number of population points within 30 miles that can access to 8 healthcare centers.
* This figures seems impressive as approximately 9500 population points have access per clinic in 30-mile radius.
* While there would be some overlap of populations, this is still an impressive figure.

In [24]:
# information about 8 selected FQHCs
selected_fqhcs = gdf.loc[best_solution]
selected_fqhcs

Unnamed: 0_level_0,Group_Name,Facility,Address,City,County,State,Zip,Phone,Latitude,Longitude,Loc_Code,geometry
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
190,Ozarks Community Health Center,Ozarks Community Health Center - Miles for Smiles,"800 E. Aldrich Rd, Ste E",Bolivar,Polk,MO,65613,4173286334,37.596697,-93.40397,MAP,POINT (-93.40401 37.59671)
147,Swope Health Services,Swope Health Northland,4443 Northwest Gateway,Riverside,Platte,MO,64150,8166272050,39.176425,-94.612609,MAP,POINT (-94.61262 39.17643)
87,Southeast Missouri Health Network,Hayti Medical Center,223 South Third Street,Hayti,Pemiscot,MO,63851,5733599803,36.232233,-89.749385,MAP,POINT (-89.7494 36.23224)
26,Affinia Healthcare,Affinia Healthcare (Program Mgmt Office),2524 Hadley Street,St. Louis,St. Louis City,MO,63106,3148148700,38.650362,-90.19394,MAP,POINT (-90.19396 38.65034)
78,Jordan Valley Community Health Center,Jordan Valley Community Health - Mobile Dental...,618 North Benton Avenue,Springfield,Greene,MO,65806,4178310150,37.214148,-93.287538,MAP,POINT (-93.28747 37.21445)
36,Betty Jean Kerr People's Health Center,Betty Jean Kerr People's Health Centers West Site,7200 Manchester Road,Maplewood,St. Louis,MO,63143,3147819162,38.614255,-90.313396,MAP,POINT (-90.3134 38.61426)
81,Jordan Valley Community Health Center,Jordan Valley Community Health Center Medical,440 E. Tampa Street,Springfield,Greene,MO,65806,4178310150,37.213117,-93.288386,MAP,POINT (-93.28839 37.21313)
27,Affinia Healthcare,Affinia Healthcare at Biddle,1717 Biddle St.,St. Louis,St. Louis City,MO,63106,3148148700,38.638804,-90.201061,MAP,POINT (-90.20161 38.63924)


## Final Obsevations:
* It is notieable that the selected centers are located throughout various regions in Missouri, indicating no heavy concentration of clinics.

* Coverage of major cities such as Saint Louis and Springfield may indicate the assumptions were met.

* Two Jordan Valley centers are very close to each other, causing overlap in patients. However, it could also indicate that this region demand more resources in the future.

