<a href="https://colab.research.google.com/github/niranjan-vini/ev_market_segment_anaylsis/blob/main/ev_market_segment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## importing libraries

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns

## Load data

In [None]:
df = pd.read_csv("indian-ev-data.csv")

## cleand the data

In [None]:
df_cleaned = df.dropna()  # Remove rows with missing values

## Summary by Vehicle Type

In [None]:
summary_by_type = df_cleaned.groupby("Vehicle Type").agg({
    "Price": "mean",
    "Battery Capacity (kWh)": "mean",
    "Range per Charge (km)": "mean",
    "Charging Time": "mean",
    "Power (HP or kW)": "mean",
    "Top Speed (km/h)": "mean"
}).round(2)
print(summary_by_type)


## Feature Selection & Standardization

In [None]:
features = df_cleaned[[
    "Price", "Battery Capacity (kWh)", "Range per Charge (km)",
    "Charging Time", "Power (HP or kW)", "Top Speed (km/h)"
]]
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)


## Apply KMeans Clustering

In [None]:
kmeans = KMeans(n_clusters=3, random_state=42)
df_cleaned["Segment"] = kmeans.fit_predict(features_scaled)

## Segment Summary

In [None]:
segment_summary = df_cleaned.groupby("Segment").agg({
    "Price": "mean",
    "Range per Charge (km)": "mean",
    "Power (HP or kW)": "mean",
    "Vehicle Type": lambda x: x.mode()[0]
}).round(2)
print(segment_summary)


## Visualize Segments



In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(
    data=df_cleaned,
    x="Range per Charge (km)",
    y="Price",
    hue="Segment",
    palette="Set2",
    s=100
)
plt.title("EV Segmentation: Price vs Range")
plt.xlabel("Range per Charge (km)")
plt.ylabel("Price (INR)")
plt.grid(True)
plt.tight_layout()
plt.show()
