## K- Means clustering basic model

In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import folium

# Aim here is to identify cluster where the gas price / EV charging ratio is lowest
data = pd.DataFrame({
    'latitude': np.random.uniform(45, 55, 100),    # approximate latitudes in Europe
    'longitude': np.random.uniform(5, 15, 100),      # approximate longitudes in Europe
    'price': np.random.uniform(1.0, 2.0, 100)        # e.g., fuel price in euros per liter
})

# Feature matrix: use location and price
X = data[['latitude', 'longitude', 'price']]

# Use KMeans to find, for example, 5 clusters (adjust number as needed)
n_clusters = 5
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
data['cluster'] = kmeans.fit_predict(X)

# Print out cluster centers
print("Cluster centers (latitude, longitude, price):")
print(kmeans.cluster_centers_)

# Plot clusters using matplotlib (for a quick look)
plt.figure(figsize=(8,6))
plt.scatter(data['longitude'], data['latitude'], c=data['cluster'], cmap='viridis', alpha=0.6)
plt.colorbar(label='Cluster')
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.title("K-Means Clusters of European Regions by Price")
plt.show()

# Optionally, create an interactive map with Folium
map_center = [data['latitude'].mean(), data['longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=6)
colors = ['red', 'blue', 'green', 'purple', 'orange']

# Add each point to the map
for idx, row in data.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=3,
        color=colors[int(row['cluster']) % len(colors)],
        fill=True
    ).add_to(m)

# Display the map in a Jupyter Notebook (use m.save('map.html') to save)
m

## K-means clustering prediction

In [1]:
# Aim here is to predict the differential fuel to EV charging by geograhical cluster

data = pd.DataFrame({
    'latitude': [48.8566, 50.1109, 52.5200, 41.9028, 45.7640],
    'longitude': [2.3522, 8.6821, 13.4050, 12.4964, 4.8357],
    'price': [1.40, 1.35, 1.50, 1.20, 1.45],  # example gas price or charging price
    'trip_cost': [20, 25, 18, 22, 21]         # target variable for prediction
})

# Split the data randomly (e.g., 80% training, 20% testing)
X = data.drop('trip_cost', axis=1)
y = data['trip_cost']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training set:")
print(X_train)
print("\nTest set:")
print(X_test)

NameError: name 'pd' is not defined