In [1]:
import pandas as pd
from sklearn.cluster import MiniBatchKMeans

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Load your data
path = 'train.csv'

data = pd.read_csv(path)

In [3]:
# Drop rows with missing or invalid coordinates
data.dropna(subset=['Latitude', 'Longitude'], inplace=True)

In [4]:
print(data.shape)

(878049, 9)


In [5]:
# Selecting only the latitude and longitude columns
geo_data = data[['Latitude', 'Longitude']]

In [6]:
print(geo_data.shape)

(878049, 2)


In [7]:
# Create the MiniBatch K-Means model
n_clusters = 200
kmeans = MiniBatchKMeans(n_clusters=n_clusters, batch_size=10000, random_state=0)


In [8]:
# Fit the model
kmeans.fit(geo_data)

In [9]:
# Extract cluster centers (hotspots)
hotspots = kmeans.cluster_centers_



In [10]:
# Convert hotspots to a DataFrame
hotspots_df = pd.DataFrame(hotspots, columns=['Latitude', 'Longitude'])

hotspots_df.head()

Unnamed: 0,Latitude,Longitude
0,37.732583,-122.447601
1,37.710541,-122.42904
2,37.783191,-122.410084
3,37.733936,-122.390471
4,37.767931,-122.457551


In [11]:
# Save the hotspots to a new CSV file
hotspots_df.to_csv('hotspots.csv', index=False)

In [12]:
import folium
from folium.plugins import HeatMap
import pandas as pd

In [13]:
# Load hotspots data
hotspots = pd.read_csv('hotspots.csv')

hotspots.head()

Unnamed: 0,Latitude,Longitude
0,37.732583,-122.447601
1,37.710541,-122.42904
2,37.783191,-122.410084
3,37.733936,-122.390471
4,37.767931,-122.457551


In [14]:
# Create a map centered around the mean location of the hotspots
map_center = [hotspots['Latitude'].mean(), hotspots['Longitude'].mean()]
map = folium.Map(location=map_center, zoom_start=12)

In [15]:
# Add hotspots to the map
for index, row in hotspots.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup='Hotspot',
        icon=folium.Icon(color='red')
    ).add_to(map)

In [16]:
# Display the map in Jupyter Notebook
map  

In [17]:
# Or save the map to an HTML file
# map.save('hotspots_map.html')