In [None]:
# Step 1: Data Cleaning & Preprocessing

## 1.1 Loading the Dataset & Cleaning it
"""

# Importing general libraries

import pandas as pd
import matplotlib.pyplot as plt

# Loading the data
file_path = "/content/drive/MyDrive/detailed_ev_charging_stations_with_city.csv"
df = pd.read_csv(file_path)

# Standardsing the column names
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")

# Removing invalid rows i.e. where'city' == 'Country'
initial_count = len(df)
df = df[df['city'] != 'Country']
removed_cities = initial_count - len(df)
print(f"Removed {removed_cities} rows where city was 'Country'.\n")

# Plotting the raw data
plt.figure(figsize=(10, 6))
plt.scatter(df['longitude'], df['latitude'], c='red', alpha=0.5, label="Raw Data")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.title("EV Charging Stations (Before Removing Water Points)")
plt.legend()
plt.grid(True)
plt.show()

# Countting the total stations before cleaning
print(f"Total EV Charging Stations BEFORE Cleaning: {len(df)}\n")

# Importing the Folium Library for Mapping

import folium

# Creating a BASE map that will be centered at the mean location of stations
center_lat, center_long = df['latitude'].mean(), df['longitude'].mean()
map_before = folium.Map(location=[center_lat, center_long], zoom_start=6)

# Add charging stations to the map (Before Cleaning)
for _, row in df.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=4,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.7,
        popup=f"City: {row['city']}"
    ).add_to(map_before)

# Saving and displaying the map
map_before.save("EV_Charging_Before_Cleaning.html")
map_before

# Importing library for removing water datapoints thaty were shown on the map

import geopandas as gpd
from shapely.geometry import Point

# Loading land shapefile
land = gpd.read_file("/content/drive/MyDrive/10m_physical/ne_10m_land.shp")


# FYI: This shapefile was downloaded from Natural Earth (https://www.naturalearthdata.com/downloads/) and 10m land precision file was used.

# Defining a fxn that checks if a coordinate is on land (using a land polygon shapefile.

def is_on_land(lat, lon):
    point = Point(lon, lat)
    return any(land.contains(point))

# Defining a fxn that removes stations that are in water using a land mask.
def filter_stations_by_land(df):
    df["is_land"] = df.apply(lambda row: is_on_land(row["latitude"], row["longitude"]), axis=1)
    df_filtered = df[df["is_land"] == True].drop(columns=["is_land"])
    print(f"After filtering using ShapeFile (.shp) we have succesfully removed {len(df) - len(df_filtered)} water locations.")
    return df_filtered

# Applying land filtering
df_cleaned = filter_stations_by_land(df)

# Saving cleaned dataset
cleaned_file_path = "/content/drive/MyDrive/cleaned_ev_charging_stations.csv"
df_cleaned.to_csv(cleaned_file_path, index=False)

# Plotting cleaned data
plt.figure(figsize=(10, 6))
plt.scatter(df_cleaned['longitude'], df_cleaned['latitude'], c='blue', alpha=0.5, label="Cleaned Data")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.title("EV Charging Stations (After Removing Water Points)")
plt.legend()
plt.grid(True)
plt.show()

# Countting the total stations after cleaning
print(f"Therefore, total EV Charging Stations AFTER Cleaning: {len(df_cleaned)}")

# Creating a BASE map centered at the mean location of the CLEANED stations
center_lat, center_long = df_cleaned['latitude'].mean(), df_cleaned['longitude'].mean()
map_after = folium.Map(location=[center_lat, center_long], zoom_start=6)

# Adding charging stations to the map (After Cleaning)
for _, row in df_cleaned.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=4,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7,
        popup=f"City: {row['city']}"
    ).add_to(map_after)

# Saving and displaing the cleaned map
map_after.save("EV_Charging_After_Cleaning.html")
map_after

"""