In [None]:
import pandas as pd
import folium
from folium.plugins import HeatMap
from sklearn.ensemble import RandomForestRegressor
from datetime import datetime
import re
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

# Function to convert location string to decimal degrees
def dms_to_dd(degrees, minutes, seconds, direction):
    dd = float(degrees) + float(minutes) / 60 + float(seconds) / (60 * 60)
    if direction in ['W', 'S']:
        dd *= -1
    return dd

# Function to parse location string
def parse_location(location):
    long_pattern = re.compile(r"Long : (\d+)° (\d+)' (\d+\.\d+) ([EW])")
    lat_pattern = re.compile(r"Lat : (\d+)° (\d+)' (\d+\.\d+) ([NS])")

    long_match = long_pattern.search(location)
    lat_match = lat_pattern.search(location)

    if long_match and lat_match:
        longitude = dms_to_dd(*long_match.groups())
        latitude = dms_to_dd(*lat_match.groups())
        return latitude, longitude
    else:
        return None, None

# Load the CSV file
file_path = '/content/drive/MyDrive/gpsMovementReport_11June2024_041002_60b0db2a005655_523.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the data to understand its structure
print(data.head())

# Preprocess the data
# Extract latitude and longitude from the 'Location' column
data[['latitude', 'longitude']] = data['Location'].apply(lambda x: pd.Series(parse_location(x)))

# Drop rows with missing latitude or longitude
data = data.dropna(subset=['latitude', 'longitude'])

# Parsing the 'Report Group Date' column to datetime
data['Report Group Date'] = pd.to_datetime(data['Report Group Date'])

# Extracting features for the model
data['hour'] = data['Report Group Date'].dt.hour
data['day_of_week'] = data['Report Group Date'].dt.dayofweek

# Aggregate data to count rides per location per time unit
ride_counts = data.groupby(['latitude', 'longitude', 'hour', 'day_of_week']).size().reset_index(name='ride_count')

# Training a model to predict demand
features = ride_counts[['latitude', 'longitude', 'hour', 'day_of_week']]
target = ride_counts['ride_count']

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(features, target)

# Predicting demand for the current date and time
current_datetime = datetime.now()
current_hour = current_datetime.hour
current_day_of_week = current_datetime.weekday()

# Create a DataFrame for the current time prediction
predict_data = ride_counts[['latitude', 'longitude']].drop_duplicates().copy()
predict_data['hour'] = current_hour
predict_data['day_of_week'] = current_day_of_week

# Predicting ride counts
predict_data['predicted_ride_count'] = model.predict(predict_data)

# Filter out locations with less than 1.5 predicted ride counts
predict_data = predict_data[predict_data['predicted_ride_count'] >= 1.5]

# Normalize predicted ride counts for visualization
max_ride_count = predict_data['predicted_ride_count'].max()
min_ride_count = predict_data['predicted_ride_count'].min()
predict_data['normalized_ride_count'] = (predict_data['predicted_ride_count'] - min_ride_count) / (max_ride_count - min_ride_count)

# Function to generate color based on the normalized ride count
def get_color(normalized_value):
    # Generate color from green to red based on normalized value
    colormap = plt.cm.get_cmap('RdYlGn_r')
    return mcolors.to_hex(colormap(normalized_value))

# Creating a map centered around the mean latitude and longitude
map_center = [predict_data['latitude'].mean(), predict_data['longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=12)

# Add clusters to the map based on predicted ride counts
for _, row in predict_data.iterrows():
    radius = 5 + (row['normalized_ride_count'] * 20)  # Scale factor for radius
    color = get_color(row['normalized_ride_count'])
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=radius,  # Adjust the radius based on normalized predicted ride counts
        color=color,
        fill=True,
        fill_opacity=0.6,
        fill_color=color,
        tooltip=f'Predicted ride count: {row["predicted_ride_count"]:.2f}'
    ).add_to(m)

# Save the map with predictions
# m.save('predicted_demand_map.html')

# Create a heatmap for visualizing demand
heat_data = [[row['latitude'], row['longitude'], row['predicted_ride_count']] for _, row in predict_data.iterrows()]
HeatMap(heat_data).add_to(m)

# Save the heatmap
m.save('heatmap.html')
# m

     Report Group Date VehicleReg  DriverID  \
0  10/06/2024 03:00:23   LC18 YXR       NaN   
1  10/06/2024 09:12:26   LC18 YXR       NaN   
2  10/06/2024 09:13:00   LC18 YXR       NaN   
3  10/06/2024 09:14:00   LC18 YXR       NaN   
4  10/06/2024 09:15:00   LC18 YXR       NaN   

                                VehicleStatus  MOBILESPEED  MOBILEODO  \
0  Health Check; (Ignition off); (GPS Unlock)          0.0        0.0   
1                                    Start up          0.0        0.0   
2                       Driving; (GPS Unlock)          0.0        0.0   
3                       Driving; (GPS Unlock)          0.0        0.0   
4                       Driving; (GPS Unlock)          0.0        0.0   

                                    Location  SkillSet  MsgTypeId  \
0  Long : 0° 13' 33.9 W Lat : 51° 30' 21.3 N       NaN          1   
1  Long : 0° 13' 33.9 W Lat : 51° 30' 21.3 N       NaN          1   
2  Long : 0° 13' 33.9 W Lat : 51° 30' 21.3 N       NaN          1   
3 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Report Group Date'] = pd.to_datetime(data['Report Group Date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['hour'] = data['Report Group Date'].dt.hour
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['day_of_week'] = data['Report Group Date'].dt.dayofweek
  colormap = plt.cm.get_c

In [None]:
import pandas as pd
import folium
from folium.plugins import HeatMap
from sklearn.ensemble import RandomForestRegressor
from sklearn.cluster import KMeans
from datetime import datetime
import re
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

# Function to convert location string to decimal degrees
def dms_to_dd(degrees, minutes, seconds, direction):
    dd = float(degrees) + float(minutes) / 60 + float(seconds) / (60 * 60)
    if direction in ['W', 'S']:
        dd *= -1
    return dd

# Function to parse location string
def parse_location(location):
    long_pattern = re.compile(r"Long : (\d+)° (\d+)' (\d+\.\d+) ([EW])")
    lat_pattern = re.compile(r"Lat : (\d+)° (\d+)' (\d+\.\d+) ([NS])")

    long_match = long_pattern.search(location)
    lat_match = lat_pattern.search(location)

    if long_match and lat_match:
        longitude = dms_to_dd(*long_match.groups())
        latitude = dms_to_dd(*lat_match.groups())
        return latitude, longitude
    else:
        return None, None

# Load the CSV file
file_path = '/content/drive/MyDrive/gpsMovementReport_11June2024_041002_60b0db2a005655_523.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the data to understand its structure
print(data.head())

# Preprocess the data
# Extract latitude and longitude from the 'Location' column
data[['latitude', 'longitude']] = data['Location'].apply(lambda x: pd.Series(parse_location(x)))

# Drop rows with missing latitude or longitude
data = data.dropna(subset=['latitude', 'longitude'])

# Parsing the 'Report Group Date' column to datetime
data['Report Group Date'] = pd.to_datetime(data['Report Group Date'], format='%d/%m/%Y %H:%M:%S')

# Extracting features for the model
data['hour'] = data['Report Group Date'].dt.hour
data['day_of_week'] = data['Report Group Date'].dt.dayofweek

# Aggregate data to count rides per location per time unit
ride_counts = data.groupby(['latitude', 'longitude', 'hour', 'day_of_week']).size().reset_index(name='ride_count')

# Training a model to predict demand
features = ride_counts[['latitude', 'longitude', 'hour', 'day_of_week']]
target = ride_counts['ride_count']

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(features, target)

# Predicting demand for the current date and time
current_datetime = datetime.now()
current_hour = current_datetime.hour
current_day_of_week = current_datetime.weekday()

# Create a DataFrame for the current time prediction
predict_data = ride_counts[['latitude', 'longitude']].drop_duplicates().copy()
predict_data['hour'] = current_hour
predict_data['day_of_week'] = current_day_of_week

# Predicting ride counts
predict_data['predicted_ride_count'] = model.predict(predict_data)

# Filter out locations with less than 1.5 predicted ride counts
predict_data = predict_data[predict_data['predicted_ride_count'] >= 1.5]

# Normalize predicted ride counts for visualization
max_ride_count = predict_data['predicted_ride_count'].max()
min_ride_count = predict_data['predicted_ride_count'].min()
predict_data['normalized_ride_count'] = (predict_data['predicted_ride_count'] - min_ride_count) / (max_ride_count - min_ride_count)

# Applying KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=42)
predict_data['cluster'] = kmeans.fit_predict(predict_data[['latitude', 'longitude']]).astype(int)

# Function to generate color based on cluster
def get_cluster_color(cluster):
    cluster_colors = ['red', 'green', 'blue']
    return cluster_colors[int(cluster)]

# Creating a map centered around the mean latitude and longitude
map_center = [predict_data['latitude'].mean(), predict_data['longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=12)

# Add clusters to the map based on predicted ride counts and KMeans clusters
for _, row in predict_data.iterrows():
    radius = 5 + (row['normalized_ride_count'] * 20)  # Scale factor for radius
    color = get_cluster_color(row['cluster'])
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=radius,  # Adjust the radius based on normalized predicted ride counts
        color=color,
        fill=True,
        fill_opacity=0.6,
        fill_color=color,
        tooltip=f'Predicted ride count: {row["predicted_ride_count"]:.2f}, Cluster: {row["cluster"]}'
    ).add_to(m)

# Save the map with predictions
# m.save('/mnt/data/predicted_demand_map.html')

# Create a heatmap for visualizing demand
heat_data = [[row['latitude'], row['longitude'], row['predicted_ride_count']] for _, row in predict_data.iterrows()]
HeatMap(heat_data).add_to(m)

#Save the heatmap
m.save('/mnt/data/heatmap.html')
m


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/gpsMovementReport_11June2024_041002_60b0db2a005655_523.csv'

In [None]:
import pandas as pd
import folium
from folium.plugins import HeatMap
from sklearn.ensemble import RandomForestRegressor
from sklearn.cluster import KMeans
from datetime import datetime
import re
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

# Function to convert location string to decimal degrees
def dms_to_dd(degrees, minutes, seconds, direction):
    dd = float(degrees) + float(minutes) / 60 + float(seconds) / (60 * 60)
    if direction in ['W', 'S']:
        dd *= -1
    return dd

# Function to parse location string
def parse_location(location):
    long_pattern = re.compile(r"Long : (\d+)° (\d+)' (\d+\.\d+) ([EW])")
    lat_pattern = re.compile(r"Lat : (\d+)° (\d+)' (\d+\.\d+) ([NS])")

    long_match = long_pattern.search(location)
    lat_match = lat_pattern.search(location)

    if long_match and lat_match:
        longitude = dms_to_dd(*long_match.groups())
        latitude = dms_to_dd(*lat_match.groups())
        return latitude, longitude
    else:
        return None, None

# Load the CSV file
file_path = '/content/drive/MyDrive/gpsMovementReport_11June2024_041002_60b0db2a005655_523.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the data to understand its structure
print(data.head())

# Preprocess the data
# Extract latitude and longitude from the 'Location' column
data[['latitude', 'longitude']] = data['Location'].apply(lambda x: pd.Series(parse_location(x)))

# Drop rows with missing latitude or longitude
data = data.dropna(subset=['latitude', 'longitude'])

# Parsing the 'Report Group Date' column to datetime
data['Report Group Date'] = pd.to_datetime(data['Report Group Date'], format='%d/%m/%Y %H:%M:%S')

# Extracting features for the model
data['hour'] = data['Report Group Date'].dt.hour
data['day_of_week'] = data['Report Group Date'].dt.dayofweek

# Aggregate data to count rides per location per time unit
ride_counts = data.groupby(['latitude', 'longitude', 'hour', 'day_of_week']).size().reset_index(name='ride_count')

# Training a model to predict demand
features = ride_counts[['latitude', 'longitude', 'hour', 'day_of_week']]
target = ride_counts['ride_count']

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(features, target)

# Predicting demand for the current date and time
current_datetime = datetime.now()
current_hour = current_datetime.hour
current_day_of_week = current_datetime.weekday()

# Create a DataFrame for the current time prediction
predict_data = ride_counts[['latitude', 'longitude']].drop_duplicates().copy()
predict_data['hour'] = current_hour
predict_data['day_of_week'] = current_day_of_week

# Predicting ride counts
predict_data['predicted_ride_count'] = model.predict(predict_data)

# Filter out locations with less than 1.5 predicted ride counts
predict_data = predict_data[predict_data['predicted_ride_count'] >= 1.5]

# Normalize predicted ride counts for visualization
max_ride_count = predict_data['predicted_ride_count'].max()
min_ride_count = predict_data['predicted_ride_count'].min()
predict_data['normalized_ride_count'] = (predict_data['predicted_ride_count'] - min_ride_count) / (max_ride_count - min_ride_count)

# Applying KMeans clustering
kmeans = KMeans(n_clusters=10, random_state=42)
predict_data['cluster'] = kmeans.fit_predict(predict_data[['latitude', 'longitude']]).astype(int)

# Function to generate color based on cluster
def get_cluster_color(cluster):
    cluster_colors = ['red', 'green', 'blue','orange','yellow','black','brown','purple','pink','gray']
    return cluster_colors[int(cluster)]

# Creating a map centered around the mean latitude and longitude
map_center = [predict_data['latitude'].mean(), predict_data['longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=12)

# Add clusters to the map based on predicted ride counts and KMeans clusters
for _, row in predict_data.iterrows():
    radius = 5 + (row['normalized_ride_count'] * 20)  # Scale factor for radius
    color = get_cluster_color(row['cluster'])
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=radius,  # Adjust the radius based on normalized predicted ride counts
        color=color,
        fill=True,
        fill_opacity=0.6,
        fill_color=color,
        tooltip=f'Predicted ride count: {row["predicted_ride_count"]:.2f}, Cluster: {row["cluster"]}'
    ).add_to(m)

# Save the map with predictions
# m.save('/mnt/data/predicted_demand_map.html')

# Create a heatmap for visualizing demand
heat_data = [[row['latitude'], row['longitude'], row['predicted_ride_count']] for _, row in predict_data.iterrows()]
HeatMap(heat_data).add_to(m)

# Save the heatmap
m.save('heatmap.html')
m


     Report Group Date VehicleReg  DriverID  \
0  10/06/2024 03:00:23   LC18 YXR       NaN   
1  10/06/2024 09:12:26   LC18 YXR       NaN   
2  10/06/2024 09:13:00   LC18 YXR       NaN   
3  10/06/2024 09:14:00   LC18 YXR       NaN   
4  10/06/2024 09:15:00   LC18 YXR       NaN   

                                VehicleStatus  MOBILESPEED  MOBILEODO  \
0  Health Check; (Ignition off); (GPS Unlock)          0.0        0.0   
1                                    Start up          0.0        0.0   
2                       Driving; (GPS Unlock)          0.0        0.0   
3                       Driving; (GPS Unlock)          0.0        0.0   
4                       Driving; (GPS Unlock)          0.0        0.0   

                                    Location  SkillSet  MsgTypeId  \
0  Long : 0° 13' 33.9 W Lat : 51° 30' 21.3 N       NaN          1   
1  Long : 0° 13' 33.9 W Lat : 51° 30' 21.3 N       NaN          1   
2  Long : 0° 13' 33.9 W Lat : 51° 30' 21.3 N       NaN          1   
3 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Report Group Date'] = pd.to_datetime(data['Report Group Date'], format='%d/%m/%Y %H:%M:%S')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['hour'] = data['Report Group Date'].dt.hour
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['day_of_week'] = data['Report Group Date'].dt.dayofwe

In [None]:
predict_data

Unnamed: 0,latitude,longitude,hour,day_of_week,predicted_ride_count,normalized_ride_count,cluster
14,51.356639,-0.117000,5,4,1.81,0.119231,8
15,51.356639,-0.116778,5,4,2.42,0.353846,8
16,51.356778,-0.160889,5,4,2.80,0.500000,8
18,51.357000,-0.155333,5,4,1.56,0.023077,8
51,51.366500,-0.098694,5,4,3.02,0.584615,8
...,...,...,...,...,...,...,...
1362,51.585889,-0.276306,5,4,2.18,0.261538,9
1364,51.586139,-0.308000,5,4,2.49,0.380769,5
1365,51.586472,-0.293083,5,4,1.62,0.046154,5
1373,51.587722,-0.250722,5,4,2.24,0.284615,9


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
