### This code is designed to take the CSTDM OD data from Madhu and site early stations
It uses a weighted k-means cluster to take the origin data and cluster them into centroids which represent station locations. Makes a map and js file of the results

Weight (i.e. trips) that is assigned to clusters would be used to partition statewide fuel demands to different stations
- could change the weight of some clusters by eliminating some of the origins that are far from the centroid (i.e. trucks that wouldn't be captured by the station bc too far away) in order to more realistically partition demands.
- could take really large cluster locations with alot of captured truck trips and split into multiple smaller locations (or maybe that's just combining outputs of runs with smaller and larger k values)
- 


In [80]:
import folium
import pandas as pd
import numpy as np
import json

#ChatGPT helped me write this weighted kmeans function
def weighted_kmeans(points, weights, k, max_iters=1000):
    # Randomly initialize centroids
    centroids = points[np.random.choice(points.shape[0], k, replace=False)]
    
    for _ in range(max_iters):
        # Assignment step
        distances = np.linalg.norm(points[:, np.newaxis] - centroids, axis=2)
        weighted_distances = distances / weights[:, np.newaxis]
        cluster_assignments = np.argmin(weighted_distances, axis=1)

        # Update step
        new_centroids = np.zeros((k, points.shape[1]))
        for i in range(k):
            assigned_points = points[cluster_assignments == i]
            assigned_weights = weights[cluster_assignments == i]
            if len(assigned_points) > 0:
                new_centroids[i] = np.average(assigned_points, weights=assigned_weights, axis=0)
        
        centroids = new_centroids

    return centroids, cluster_assignments
    
#TAZ coordinates
tazdf=pd.read_csv("tazListI.csv")

#Truck Origins
truckorigindf=pd.read_csv("TAZ_Origin.csv")

# merge dataframes to add TAZ coordinates
truckorigindf=truckorigindf.merge(tazdf[['TAZ', 'Long', 'Lat']], left_on='TAZ_Zone', right_on='TAZ', how='left')
truckorigindf

#This parameter adds weighting to the kmeans, i.e. 
# TAZs with more trips will pull cluster centroids closer
weighting_factor='Trip_Heavy'

# Convert DataFrame to NumPy array for processing
points = truckorigindf[['Long', 'Lat']].values
weights = truckorigindf[weighting_factor].values

# Number of clusters
k = 20

centroids, assignments = weighted_kmeans(points, weights, k,10000)

# Add cluster assignments to the original DataFrame
truckorigindf['cluster'] = assignments

k_meansdf = pd.DataFrame(centroids, columns=['Long', 'Lat'])
k_meansdf['cluster'] = k_meansdf.index 
# display(truckorigindf.head())

# sum weights for each cluster to mark size of cluster into new dataframe
cluster_weights = truckorigindf.groupby('cluster')[weighting_factor].sum().reset_index()

# Merge the summed weights into the centroids DataFrame
k_meansdf = k_meansdf.merge(cluster_weights, on='cluster', how='left')
display(k_meansdf)

#folium make map
mapcenter={'lat':36.8268747428773, 'lng': -120.12451171875001}
map = folium.Map(location=[mapcenter['lat'], mapcenter['lng']], zoom_start=6)

# Add circle markers for all truck origins 
# (size and opacity of circle is a function of # of trips)
for _, row in truckorigindf.iterrows():
    folium.CircleMarker(
        location=[row['Lat'], row['Long']],
        radius=row['Trip_Heavy']**.4,  
        popup=f"TAZ: {row['TAZ_Zone']}<br>Long: {row['Long']}<br>Lat: {row['Lat']}<br>Cluster: {row['cluster']}",
        color=None,  
        fill=True,
        fill_color='blue',  
        fill_opacity=row['Trip_Heavy']**.2/10  
    ).add_to(map)

# Add circle markers for clusters (i.e. stations)
# radius of circle is function of number of trips closest to that station
for _, row in k_meansdf.iterrows():
    folium.CircleMarker(
        location=[row['Lat'], row['Long']],
        radius=row['Trip_Heavy']**.5/10,  
        popup=f"Cluster: {row['cluster']}<br>Long: {row['Long']}<br>Lat: {row['Lat']}<br>Weight:{row['Trip_Heavy']}",
        color='black',  
        weight=2,
        fill=True,
        fill_color='red',  
        fill_opacity=0.8  
    ).add_to(map)

# Save the map to an HTML file
map.save('taz_map_'+str(k)+'clusters.html')

#write output of json file
truckorigindf.drop(columns=['County','TAZ_Zone','TAZ_Int', 'Trip_Light','Trip_Medium1','Trip_Medium2'], inplace=True)
jsontruckstring=json.dumps(truckorigindf.to_dict(orient='records'))
jsonclusterstring=json.dumps(k_meansdf.to_dict(orient='records'))

with open('tazdata_'+str(k)+'clusters.js', 'w') as file:
    file.write("tazdata="+jsontruckstring+";  "+"clusterdata="+jsonclusterstring+";")


  weighted_distances = distances / weights[:, np.newaxis]


Unnamed: 0,Long,Lat,cluster,Trip_Heavy
0,-118.395815,34.178039,0,16517.32
1,-117.983688,34.025306,1,11616.79
2,-117.138367,33.217166,2,5612.28
3,-117.819747,33.712021,3,16113.47
4,-119.093346,34.29151,4,3835.73
5,-120.521196,35.12997,5,1817.13
6,-117.071866,32.785992,6,6584.91
7,-121.140041,37.297972,7,11874.59
8,-117.46764,34.042682,8,15911.05
9,-122.184955,37.635769,9,30666.46


In [71]:
truckorigindf.head()

Unnamed: 0,County,TAZ_Zone,TAZ_Int,Trip_Heavy,Trip_Light,Trip_Medium1,Trip_Medium2,TAZ,Long,Lat,cluster
0,Del Norte,100,100,9.16,0.16,1.03,1.27,100,-124.218478,41.788927,3
1,Del Norte,101,101,10.78,0.18,1.12,1.38,101,-124.16879,41.77183,3
2,Del Norte,102,102,38.37,0.99,4.99,6.09,102,-124.156931,41.840055,3
3,Del Norte,103,103,79.62,1.53,9.74,12.61,103,-123.880232,41.898154,3
4,Del Norte,104,104,1.93,0.0,0.2,0.2,104,-123.876849,41.615444,3


In [79]:
k_meansdf

Unnamed: 0,Long,Lat,cluster,Trip_Heavy
0,-117.459364,34.034932,0,16243.9
1,-119.461422,35.382063,1,7420.34
2,-117.823681,33.713087,2,16382.11
3,-119.443917,36.601843,3,5989.39
4,-122.65778,38.803247,4,3403.07
5,-115.553008,33.732091,5,4127.03
6,-122.057529,38.10614,6,6257.95
7,-121.467152,36.654373,7,3748.62
8,-121.334727,38.60623,8,12676.97
9,-121.532425,39.60518,9,4492.61
