In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

# Load the dataset
df = pd.read_csv("Santu--ev-charging-stations-india.csv")

# Create a folder to store images
output_dir = "ev_charts"
os.makedirs(output_dir, exist_ok=True)

# --- Clean basic fields ---
df['state'] = df['state'].str.title()
df['city'] = df['city'].str.title()

# ------------------------------
# 1. Top 10 States by Stations
# ------------------------------
top_states = df['state'].value_counts().head(10)
plt.figure(figsize=(10, 6))
sns.barplot(x=top_states.values, y=top_states.index, palette="viridis")
plt.title("Top 10 States by Number of EV Charging Stations")
plt.xlabel("Number of Stations")
plt.ylabel("State")
plt.tight_layout()
plt.savefig(f"{output_dir}/ev_stations_by_state.png")
plt.close()

# ------------------------------
# 2. Charger Type Distribution (AC/DC)
# ------------------------------
# Assuming type 11.0 = AC, 12.0 = DC (adjust if needed)
df['Charger Type'] = df['type'].map({11.0: 'AC', 12.0: 'DC'}).fillna('Unknown')
charger_counts = df['Charger Type'].value_counts()
plt.figure(figsize=(6, 6))
charger_counts.plot(kind='pie', autopct='%1.1f%%', colors=['#66c2a5','#fc8d62','#8da0cb'])
plt.title("Distribution of Charger Types (AC vs DC)")
plt.ylabel('')
plt.tight_layout()
plt.savefig(f"{output_dir}/charger_type_distribution.png")
plt.close()

# ------------------------------
# 3. Histogram of Charger Capacities
# ------------------------------
valid_type = pd.to_numeric(df['type'], errors='coerce').dropna()
plt.figure(figsize=(8, 5))
sns.histplot(valid_type, bins=10, kde=False, color='steelblue')
plt.title("Histogram of Charger Capacities (Based on Type)")
plt.xlabel("Charger Type / Capacity Code")
plt.ylabel("Number of Stations")
plt.tight_layout()
plt.savefig(f"{output_dir}/charger_capacity_histogram.png")
plt.close()

# ------------------------------
# 4. Pie Chart: Top 5 Cities
# ------------------------------
top_cities = df['city'].value_counts().head(5)
plt.figure(figsize=(6, 6))
top_cities.plot(kind='pie', autopct='%1.1f%%', colors=sns.color_palette("Set2"))
plt.title("Top 5 Cities by Number of Charging Stations")
plt.ylabel('')
plt.tight_layout()
plt.savefig(f"{output_dir}/top_cities_piechart.png")
plt.close()

# ------------------------------
# 5. Heatmap: Station Density by City (Top States)
# ------------------------------
heatmap_df = df.copy()
heatmap_pivot = heatmap_df.groupby(['state', 'city']).size().unstack(fill_value=0)
top_state_names = top_states.index
heatmap_data = heatmap_pivot.loc[top_state_names]

plt.figure(figsize=(12, 8))
sns.heatmap(heatmap_data, cmap="YlGnBu", linewidths=0.5, linecolor='gray')
plt.title("Heatmap of Station Density by City within Top States")
plt.xlabel("City")
plt.ylabel("State")
plt.tight_layout()
plt.savefig(f"{output_dir}/station_density_heatmap.png")
plt.close()

# ------------------------------
# 6. KMeans Clustering Map
# ------------------------------
geo_df = df[['lattitude', 'longitude', 'type']].dropna()
geo_df.columns = ['latitude', 'longitude', 'type']
geo_df = geo_df[(geo_df['latitude'].between(6, 38)) & (geo_df['longitude'].between(68, 98))]
geo_df['type'] = pd.to_numeric(geo_df['type'], errors='coerce').fillna(0)

# Normalize and Cluster
scaler = StandardScaler()
X = scaler.fit_transform(geo_df[['latitude', 'longitude', 'type']])
kmeans = KMeans(n_clusters=5, random_state=42)
geo_df['cluster'] = kmeans.fit_predict(X)

# Scatter plot of clusters
plt.figure(figsize=(10, 8))
sns.scatterplot(data=geo_df, x='longitude', y='latitude', hue='cluster', palette='Set1', s=30)
plt.title('KMeans Clustering of EV Charging Stations in India')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True)
plt.legend(title='Cluster')
plt.tight_layout()
plt.savefig(f"{output_dir}/ev_cluster_map.png")
plt.close()

# ------------------------------
# 7. Charger Capacity vs Cluster
# ------------------------------
plt.figure(figsize=(8, 6))
sns.stripplot(data=geo_df, x='type', y='cluster', palette='Set2', jitter=0.25, size=4)
plt.title("Charger Capacity Distribution across Clusters")
plt.xlabel("Charger Type / Capacity Code")
plt.ylabel("Cluster")
plt.tight_layout()
plt.savefig(f"{output_dir}/charger_capacity_by_cluster.png")
plt.close()

print(f"✅ All charts saved to folder: {output_dir}")


FileNotFoundError: [Errno 2] No such file or directory: 'Santu--ev-charging-stations-india.csv'