## Data Collection

1. Pune-Ward Boundary Data 
Collected from: https://github.com/datameet/Municipal_Spatial_Data/blob/master/Pune/pune-admin-wards_2017.geojson
2. Road Network of Pune extracted using osmnx library
3. Point of Interest Data (POI) specifically fuel and parkings are collected again calling the osmnx library

In [1]:
import geopandas as gpd

# Load Pune boundary
pune_boundary = gpd.read_file(r"C:\Users\admin\Downloads\pune-admin-wards_2017.geojson")

In [2]:
import numpy as np
from shapely.geometry import Point

# Define grid spacing (in the same units as the CRS of pune_boundary)
grid_spacing = 0.01  # Adjust this value to control the grid density

# Get the bounds of the Pune boundary
minx, miny, maxx, maxy = pune_boundary.total_bounds

# Generate a list of points in a grid pattern
x_coords = np.arange(minx, maxx, grid_spacing)
y_coords = np.arange(miny, maxy, grid_spacing)
grid_points = [Point(x, y) for x in x_coords for y in y_coords]

# Create a GeoDataFrame with the grid points
grid_gdf = gpd.GeoDataFrame(geometry=grid_points, crs=pune_boundary.crs)

# Clip the grid to the Pune boundary
grid_gdf = gpd.clip(grid_gdf, pune_boundary)

In [3]:
import osmnx as ox

# Download road network within the Pune boundary
road_network = ox.graph_from_polygon(pune_boundary.geometry[0], network_type='drive')
road_nodes, road_edges = ox.graph_to_gdfs(road_network, nodes=True, edges=True)
road_nodes = road_nodes.to_crs(pune_boundary.crs)

# Download Points of Interest (POIs) such as fuel stations and parking (proxy for commercial areas)
pois = ox.geometries_from_polygon(pune_boundary.geometry[0], tags={"amenity": ["fuel", "parking"]})
fuel_stations = pois[pois["amenity"] == "fuel"]
commercial_areas = pois[pois["amenity"] == "parking"]

  pois = ox.geometries_from_polygon(pune_boundary.geometry[0], tags={"amenity": ["fuel", "parking"]})


In [4]:
# Calculate distances from each grid point to the nearest road, fuel station, and commercial area
grid_gdf["distance_to_road"] = grid_gdf.geometry.apply(lambda point: road_nodes.distance(point).min())
grid_gdf["distance_to_fuel_station"] = grid_gdf.geometry.apply(lambda point: fuel_stations.distance(point).min())
grid_gdf["distance_to_commercial_area"] = grid_gdf.geometry.apply(lambda point: commercial_areas.distance(point).min())


  grid_gdf["distance_to_road"] = grid_gdf.geometry.apply(lambda point: road_nodes.distance(point).min())

  grid_gdf["distance_to_fuel_station"] = grid_gdf.geometry.apply(lambda point: fuel_stations.distance(point).min())

  grid_gdf["distance_to_commercial_area"] = grid_gdf.geometry.apply(lambda point: commercial_areas.distance(point).min())


In [5]:
import rasterio
import numpy as np
import geopandas as gpd

# Load Pune boundary from GeoJSON file
pune_boundary = gpd.read_file(r"C:\Users\admin\Downloads\pune-admin-wards_2017.geojson")

# Reproject the Pune boundary to the CRS of the population density raster (e.g., UTM Zone 43N)
pune_boundary = pune_boundary.to_crs(epsg=32643)  # UTM Zone 43N for Pune

# Ensure the grid and population density raster use the same CRS
grid_crs = grid_gdf.crs
population_density_raster = rasterio.open(r"C:\Users\admin\Downloads\ind_pd_2017_1km.tif")

# Check if CRS of grid and raster match; if not, reproject the grid
if grid_crs != population_density_raster.crs:
    grid_gdf = grid_gdf.to_crs(population_density_raster.crs)

# Extract population density values for each grid point
pop_density_values = []

# Ensure the grid points are within the bounds of the raster
xmin, ymin, xmax, ymax = population_density_raster.bounds
for point in grid_gdf.geometry:
    if xmin <= point.x <= xmax and ymin <= point.y <= ymax:
        # Convert the coordinates to row, col indices for the raster
        row, col = population_density_raster.index(point.x, point.y)
        
        # Safely append population density value, checking for out-of-bounds errors
        if 0 <= row < population_density_raster.height and 0 <= col < population_density_raster.width:
            # Read the population density value at the row, col
            pop_density_value = population_density_raster.read(1)[row, col]
            pop_density_values.append(pop_density_value)
        else:
            pop_density_values.append(np.nan)  # Handle out-of-bounds points
    else:
        pop_density_values.append(np.nan)  # Handle points outside the raster bounds

# Add population density to grid GeoDataFrame
grid_gdf["population_density"] = pop_density_values

# Inspect the updated grid
grid_gdf

Unnamed: 0,geometry,distance_to_road,distance_to_fuel_station,distance_to_commercial_area,population_density
182,POINT (73.84156 18.44883),0.085708,0.094687,0.091997,6103.210449
221,POINT (73.86156 18.43883),0.102654,0.109599,0.107620,3732.688721
222,POINT (73.86156 18.44883),0.093847,0.100247,0.098433,6846.781738
242,POINT (73.87156 18.44883),0.099181,0.104361,0.102957,7118.951172
202,POINT (73.85156 18.44883),0.089317,0.096992,0.094743,7504.191406
...,...,...,...,...,...
356,POINT (73.92156 18.58883),0.081174,0.088168,0.102823,5904.384766
316,POINT (73.90156 18.58883),0.061950,0.068589,0.084788,5915.092285
376,POINT (73.93156 18.58883),0.090912,0.098021,0.112077,2972.632324
317,POINT (73.90156 18.59883),0.065793,0.071565,0.089249,7583.150391


In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
import folium
import matplotlib.pyplot as plt
from geopy.distance import geodesic

In [7]:
# Extract coordinates directly from the 'geometry' column (assuming it's a GeoPandas GeoDataFrame)
grid_gdf['Longitude'] = grid_gdf.geometry.x
grid_gdf['Latitude'] = grid_gdf.geometry.y

# Now you have the Longitude and Latitude columns in the grid_gdf
grid_gdf[['Longitude', 'Latitude']]

Unnamed: 0,Longitude,Latitude
182,73.841561,18.448829
221,73.861561,18.438829
222,73.861561,18.448829
242,73.871561,18.448829
202,73.851561,18.448829
...,...,...
356,73.921561,18.588829
316,73.901561,18.588829
376,73.931561,18.588829
317,73.901561,18.598829


In [8]:
# Normalize the data (excluding the 'geometry' column)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(grid_gdf[['distance_to_road', 'distance_to_fuel_station', 
                                           'distance_to_commercial_area', 'population_density']])



In [9]:
# Apply DBSCAN with an epsilon (eps) of 0.3 km and min_samples = 2 for minimal density
db = DBSCAN(eps=0.35, min_samples = 2, metric = 'euclidean')
grid_gdf['Cluster'] = db.fit_predict(scaled_features)

In [10]:
# Check the clusters
grid_gdf[['Latitude', 'Longitude', 'Cluster']]

Unnamed: 0,Latitude,Longitude,Cluster
182,18.448829,73.841561,0
221,18.438829,73.861561,0
222,18.448829,73.861561,0
242,18.448829,73.871561,0
202,18.448829,73.851561,0
...,...,...,...
356,18.588829,73.921561,17
316,18.588829,73.901561,18
376,18.588829,73.931561,0
317,18.598829,73.901561,18


In [17]:
from sklearn.metrics import davies_bouldin_score

# Compute Davies-Bouldin Index
db_index = davies_bouldin_score(scaled_features, grid_gdf['Cluster'])
print("Davies-Bouldin Index:", db_index)

Davies-Bouldin Index: 1.6881389822097852


In [11]:
import folium
from folium.plugins import MarkerCluster

# Create a base map centered around Pune (adjust if needed)
m = folium.Map(location=[18.5204, 73.8567], zoom_start=12)

# Create a MarkerCluster for clusters
marker_cluster = MarkerCluster().add_to(m)

# Plot the points with cluster information
for index, row in grid_gdf.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"Cluster: {row['Cluster']}",
        icon=folium.Icon(color='blue' if row['Cluster'] >= 0 else 'red')  # Customize color based on cluster
    ).add_to(marker_cluster)

m

In [12]:
# Group the stations by cluster and calculate the centroid for each cluster
centroids = grid_gdf.groupby('Cluster').agg({
    'Longitude': 'mean',
    'Latitude': 'mean'
}).reset_index()

# Visualize the centroids as EV station placements
for index, row in centroids.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"EV Station Cluster {row['Cluster']}",
        icon=folium.Icon(color='green')  # Green for EV stations
    ).add_to(m)

m

In [13]:
import folium
from folium.plugins import MarkerCluster

# Create a base map centered around Pune (adjust if needed)
m = folium.Map(location=[18.5204, 73.8567], zoom_start=12)

# Create a MarkerCluster for clusters
marker_cluster = MarkerCluster().add_to(m)

# Group the stations by cluster and calculate the centroid for each cluster
centroids = grid_gdf.groupby('Cluster').agg({
    'Longitude': 'mean',
    'Latitude': 'mean'
}).reset_index()

# Visualize the centroids as EV station placements
for index, row in centroids.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"EV Station Cluster {row['Cluster']}",
        icon=folium.Icon(color='green')  # Green for EV stations
    ).add_to(m)

m

In [14]:
# Save centroids to a CSV file
centroids.to_csv('ev_station_centroids.csv', index=False)

print("Centroid locations saved to 'ev_station_centroids.csv'")

Centroid locations saved to 'ev_station_centroids.csv'
