## Exploratory Data Analysis

In [1]:
#Import libraries
import pandas as pd
import geopandas
from matplotlib import pyplot as plt 
import folium
from folium import plugins

# Load data
geodata = pd.read_csv('geodata.csv')

# Load geodataframe
gdf = geopandas.GeoDataFrame(
    geodata, geometry=geopandas.points_from_xy(geodata.longitude, geodata.latitude), crs="EPSG:4326")

#Store value counts of restuarants by each state
state_counts = gdf['state'].value_counts()

#Contingency table of restaurant and state
tbl = pd.crosstab(index = gdf['state'], columns=gdf['restaurant'])

#5 States with most CJ and HD respectively
cj_by_state = tbl[['CJ']].sort_values(by=['CJ'], ascending=False).head()
cj_total = tbl[['CJ']].sum()
hd_by_state = tbl[['HD']].sort_values(by=['HD'], ascending=False).head()
hd_total = tbl[['HD']].sum()
print(f'Total Hardees: {hd_total}\nTotal Carls Juniors: {cj_total}')
print('\nStates with largest number of Carl\'s Jr. and Hardee\'s Restaurants:')
print('Carl\'s Jr.\n',cj_by_state,'\n', sep="")
print('Hardee\'s\n', hd_by_state, sep="")


Total Hardees: restaurant
HD    1639
dtype: int64
Total Carls Juniors: restaurant
CJ    1074
dtype: int64

States with largest number of Carl's Jr. and Hardee's Restaurants:
Carl's Jr.
restaurant   CJ
state          
CA          662
AZ           72
OR           58
CO           46
NV           44

Hardee's
restaurant   HD
state          
NC          215
VA          172
TN          149
SC          122
IL           98


In [2]:
#Create interactive map with all locations
m = folium.Map(location=[gdf.geometry.y.mean(), gdf.geometry.x.mean()], zoom_start=3)
# Define a color mapping for restaurant types
color_mapping = {
    'HD': 'red',
    'CJ': 'yellow',
}
marker_cluster = plugins.MarkerCluster().add_to(m)

# Add restaurant points to the map and color them based on the "type" column
for index, row in gdf.iterrows():
    type_name = row['restaurant']
    color = color_mapping.get(type_name, 'gray')  # Use 'gray' as the default color
    folium.CircleMarker(
        location=[row.geometry.y, row.geometry.x],
        radius=1.5,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.5,
        popup=f"ID: {row['location_id']}, \nAddress: {row['full_address']}"
    ).add_to(m)
m


### Calculating Distance

We can use geopy's distance function to determine distance (in miles) between each Hardee's and Carl's Junior and store in a matrix

In [5]:
from geopy.distance import distance 
import numpy as np
# Create Dataframes with only cjs or hds
hds = gdf[gdf['restaurant'] == 'HD']
cjs = gdf[gdf['restaurant'] == 'CJ']

# Create list of coordinates for each restaurant location
hd_coords = list(gdf[gdf['restaurant'] == 'HD'].coordinates[0:])
cj_coords = list(gdf[gdf['restaurant'] == 'CJ'].coordinates[0:])

#Convert coordinates from string to tuple to be able to pass to geopy.distance
hd_coords = [eval(x) for x in hd_coords]
cj_coords = [eval(y) for y in cj_coords]

# Initialize empty i x j matrix
i = len(hd_coords)
j = len(cj_coords)
distance_matrix = np.zeros((i,j))
#Calculate distance for every i x j combination (this method might be slow, look to improve later)
for i in range(len(hd_coords)):
    for j in range(len(cj_coords)):
        distance_matrix[i][j] = distance(hd_coords[i], cj_coords[j]).miles




Now we have a matrix corresponding to the distances between every Hardee's (i) and every Carl's Jr. (j). This will be helpful later when solving our optimizaiton model.

For now, let's search for the shortest linear distance between a Hardee's and Carl's Junior (ignoring any geographical features that may limit travel such as lakes/mountains/etc)

In [11]:

#Get index of minimum distance 
hd_min,cj_min = np.where(distance_matrix == distance_matrix.min())

min_dist = distance_matrix[distance_matrix == distance_matrix.min()]
print(min_dist)
# Find location of each restaurant corresponding to shortest distance between the two
x = hds.iloc[[int(hd_min[0])]]
y = cjs.iloc[[int(cj_min[0])]]
minimums = pd.concat((x,y))
minimums

[31.7262445]


Unnamed: 0,location_id,street_address,city,state,postal_code,restaurant,full_address,coordinates,latitude,longitude,geometry
1102,HD-1103,2102 E Cherokee Ave,Sallisaw,OK,74955,HD,"2102 E Cherokee Ave, Sallisaw, OK 74955","(35.461142, -94.7789646)",35.461142,-94.778965,POINT (-94.77896 35.46114)
2537,CJ-899,1850 S MUSKOGEE AVE,TAHLEQUAH,OK,74464,CJ,"1850 S MUSKOGEE AVE, TAHLEQUAH, OK 74464","(35.8919233, -94.977321)",35.891923,-94.977321,POINT (-94.97732 35.89192)


In [None]:

#Show closest locations on map
m = folium.Map(location=[minimums.geometry.y.mean(), minimums.geometry.x.mean()], zoom_start=7)
# Define a color mapping for restaurant types
color_mapping = {
    'HD': 'green',
    'CJ': 'blue',
}
# Add restaurant points to the map and color them based on the "type" column
for index, row in minimums.iterrows():
    type_name = row['restaurant']
    color = color_mapping.get(type_name, 'gray')  # Use 'gray' as the default color
    folium.CircleMarker(
        location=[row.geometry.y, row.geometry.x],
        radius=2,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.5,
        popup=f"ID: {row['location_id']}, \nAddress: {row['full_address']}"
    ).add_to(m)
m