In [None]:
# Step 4: Optimize New Charging Station Placement

## 4.1  Solve ILP for Optimal Placement
"""

import numpy as np
import folium
!pip install pulp
from pulp import LpMaximize, LpProblem, LpVariable, lpSum

# Defining a fxn that optimizes new charging station placement using Integer Linear Programming (ILP) & ensures stations are placed in high-demand and low-coverage areas.
def optimize_station_placement(df, num_new_stations=3):

    # Defining the Integer Linear Programming (ILP) problem
    prob = LpProblem("Charging_Station_Optimization", LpMaximize)
    df["New_Station_ILP"] = [LpVariable(f"station_{i}", cat="Binary") for i in df.index]

    # FYI: Our objective is to maximize station placement in high-demand areas
    prob += lpSum(df.loc[i, "New_Station_ILP"] * df.loc[i, "usage_stats_avg_users_per_day"] for i in df.index)

    # Limitting new stations per cluster to avoid over-concentration
    cluster_counts = df.groupby("cluster")["New_Station_ILP"].sum()
    for cluster in cluster_counts.index:
        prob += cluster_counts[cluster] <= num_new_stations  # Max new stations per cluster

    # Prioritizing high-demand areas i.e. only top 50% demand locations
    demand_threshold = df["usage_stats_avg_users_per_day"].median()
    for i in df.index:
        if df.loc[i, "usage_stats_avg_users_per_day"] < demand_threshold:
            prob += df.loc[i, "New_Station_ILP"] == 0

    # Preferring stations closer to city center i.e. in less than 10 km
    for i in df.index:
        if df.loc[i, "distance_to_city_km"] > 10:
            prob += df.loc[i, "New_Station_ILP"] == 0

    # Solving the optimization problem
    prob.solve()

    # Extracting optimized station locations
    df["Optimized_Station"] = [var.varValue for var in df["New_Station_ILP"]]

    print(f"Total new stations recommended: {df['Optimized_Station'].sum()}")
    print("Optimized station locations:\n", df[df["Optimized_Station"] == 1][["city", "latitude", "longitude"]])

    return df

"""## 4.2 Generate Completely New Stations (If Needed)
If the ILP does not allocate enough stations, generate new stations in high-demand clusters.
"""

# this fxn will Generates new locations near existing stations but ensures they are unique.

def generate_new_locations(existing_df, cluster_id, num_new_stations=3, max_distance=5):

    np.random.seed(42)  # For reproducibility
    cluster_df = existing_df[existing_df["cluster"] == cluster_id]

    new_stations = []
    existing_locations = set(zip(cluster_df["latitude"], cluster_df["longitude"]))

    for _ in range(num_new_stations):
        while True:
            # Generating a random shift in latitude/long. within max_distance km (~0.045 degrees)
            lat_shift = np.random.uniform(-0.045, 0.045)
            lon_shift = np.random.uniform(-0.045, 0.045)

            # Picking a random high-demand station as a reference point
            reference = cluster_df.sample(1)
            new_lat = reference["latitude"].values[0] + lat_shift
            new_lon = reference["longitude"].values[0] + lon_shift

            # Ensuring the new location is not already used
            if (new_lat, new_lon) not in existing_locations:
                new_stations.append((new_lat, new_lon))
                existing_locations.add((new_lat, new_lon))
                break

    return new_stations

"""## 4.3 Visualize Optimized & New Stations"""

# this fxn plots optimized new station locations after ILP optimization.
def plot_optimized_stations(df, new_locations):

    center_lat, center_long = df['latitude'].mean(), df['longitude'].mean()
    map_stations = folium.Map(location=[center_lat, center_long], zoom_start=6)

    # Plotting existing stations
    for _, row in df.iterrows():
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=3,
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.7,
            popup=f"Existing Station - {row['city']}"
        ).add_to(map_stations)

    # Plotting optimized stations
    df_new = df[df["Optimized_Station"] == 1]
    for _, row in df_new.iterrows():
        folium.Marker(
            location=[row['latitude'], row['longitude']],
            popup=f"Optimized Station - {row['city']}",
            icon=folium.Icon(color="red", icon="bolt")
        ).add_to(map_stations)

    # Plotting completely new locations
    for lat, lon in new_locations:
        folium.Marker(
            location=[lat, lon],
            popup="New Generated Station",
            icon=folium.Icon(color="green", icon="cloud")
        ).add_to(map_stations)

    print("Displaying optimized station placement map with new locations:")
    display(map_stations)

    return map_stations

"""## 4.4 Confirm df_cleaned Has Clustering Data"""

print("Columns in df_cleaned:\n", df_cleaned.columns.tolist())

# Checkking if 'cluster' column exists
if 'cluster' in df_cleaned.columns:
    print("cluster' column exists. Proceeding with optimization.")
else:
    print("ERROR: 'cluster' column is missing! Clustering step was skipped.")

"""## 4.5 Ensure Clustering is Done Before Optimization"""

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Extractting and scaling geospatial data
geo_data = df_cleaned[['latitude', 'longitude']].dropna()
scaler = StandardScaler()
geo_data_scaled = scaler.fit_transform(geo_data)

# Applying K-Means with optimal k
optimal_k = 15
kmeans = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
df_cleaned['cluster'] = kmeans.fit_predict(geo_data_scaled)

print("Clustering applied. Now df_cleaned has a 'cluster' column.")

"""## 4.6 Run Optimization"""

# 1. Optimizing existing locations
df_optimized = optimize_station_placement(df_cleaned)

# 2. Generating additional new stations (if needed)
new_station_locations = []
for cluster_id in df_cleaned["cluster"].unique():
    new_station_locations.extend(generate_new_locations(df_cleaned, cluster_id, num_new_stations=3))

# 3. Plotting the final optimized and newly generated stations
final_map = plot_optimized_stations(df_optimized, new_station_locations)

"""1. Existing Stations (🔵 Blue Dots)
  - Current charging infrastructure distribution across the world.
  - Some areas may have high density, while others maybe underserved.


2. Optimized New Stations (🔴 Red Markers)
  - Placed using ILP optimization in high-demand but low-coverage areas.
  - Prioritizes user accessibility and demand hotspots.


3. Generated New Stations (🟢 Green Markers)
  - Placed using geospatial randomization near high-demand zones.
  - Ensures balanced expansion without redundancy.