In [1]:
import pandas as pd
import datashader as ds
import datashader.transfer_functions as tf
from datashader.utils import export_image
import colorcet as cc

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [2]:
df = pd.read_csv('../data/processeddata/mergeddata.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1557740 entries, 0 to 1557739
Data columns (total 16 columns):
 #   Column                 Non-Null Count    Dtype  
---  ------                 --------------    -----  
 0   Trip ID                1557740 non-null  int64  
 1   Trip Origin            1557740 non-null  object 
 2   Trip Destination       1557740 non-null  object 
 3   Trip Start Time        1555718 non-null  object 
 4   Trip End Time          1557740 non-null  object 
 5   datetime               1557740 non-null  object 
 6   hour                   1557740 non-null  int64  
 7   is_weekend             1557740 non-null  bool   
 8   driver_id              1557740 non-null  int64  
 9   driver_action          1557740 non-null  object 
 10  Driver Location        1557740 non-null  object 
 11  driver_clientdistance  1557740 non-null  float64
 12  day_of_week            1557740 non-null  object 
 13  lat                    1557740 non-null  float64
 14  lon               

In [4]:
# Define a function to create Datashader plots
def create_datashader_plot(df, x_col, y_col, agg_col, plot_width=800, plot_height=600, cmap=cc.fire):
    cvs = ds.Canvas(plot_width=plot_width, plot_height=plot_height)
    agg = cvs.points(df, x_col, y_col, ds.mean(agg_col))
    img = tf.shade(agg, cmap=cmap)
    return img

In [5]:
# Plot driver locations with driver-client distance
img = create_datashader_plot(df, 'lon', 'lat', 'driver_clientdistance')

In [6]:
# Plot accepted and rejected actions
df_accepted = df[df['driver_action'] == 'accepted']
df_rejected = df[df['driver_action'] == 'rejected']

img_accepted = create_datashader_plot(df_accepted, 'lon', 'lat', 'driver_clientdistance', cmap=cc.kbc)
img_rejected = create_datashader_plot(df_rejected, 'lon', 'lat', 'driver_clientdistance', cmap=cc.fire)

export_image(img_accepted, 'driver_locations_accepted')
export_image(img_rejected, 'driver_locations_rejected')

print("Plots have been generated and saved as images.")

Plots have been generated and saved as images.


In [7]:
# Extract latitude and longitude from 'Driver Location'
df[['start_lat', 'start_lon']] = df['Trip Origin'].str.split(',', expand=True)
df['start_lat'] = df['start_lat'].astype(float)
df['start_lon'] = df['start_lon'].astype(float)

In [8]:
from sklearn.cluster import KMeans
import folium
from datashader.colors import colormap_select, Greys9
import matplotlib.pyplot as plt

# Perform KMeans clustering
coords = df[['start_lat', 'start_lon']]
kmeans = KMeans(n_clusters=10)
df['start_cluster'] = kmeans.fit_predict(coords)

# Plot clusters on a map using Folium
map = folium.Map(location=[df['start_lat'].mean(), df['start_lon'].mean()], zoom_start=12)
for idx, row in df.iterrows():
    folium.CircleMarker(location=[row['start_lat'], row['start_lon']],
                        radius=5,
                        color='blue' if row['start_cluster'] == 0 else 'red',
                        fill=True).add_to(map)

map.save('clusters_map.html')

# Advanced visualization with Datashader
cvs = ds.Canvas(plot_width=800, plot_height=800)
agg = cvs.points(df, 'start_lon', 'start_lat')
img = tf.shade(agg, cmap=Greys9, how='log')
tf.set_background(img, "white").to_pil()
plt.imshow(img.to_pil())
plt.show()