# Task 2.5 Geospatial Plotting

In [None]:
# import libraries
import pandas as pd
import os
from keplergl import KeplerGl
from pyproj import CRS
import numpy as np
from matplotlib import pyplot as plt

In [None]:
# import necessary columns from csv file
dtype_mapping = {6: str} # force column as string data types
df = pd.read_csv('NY_Citi_Bike_Weather_Data.csv', usecols=['start_station_name', 'end_station_name', 'start_lat', 'start_lng', 'end_lat', 'end_lng'], dtype=dtype_mapping)

In [None]:
df.dtypes

In [None]:
df.head()

In [None]:
# create a value column and group by start and end station 
df['value'] = 1
df = df.groupby(['start_station_name', 'end_station_name']).agg({
    'value': 'count',
    'start_lat': 'first',
    'start_lng': 'first',
    'end_lat': 'first',
    'end_lng': 'first'
}).reset_index()

In [None]:
df.head()

In [None]:
print(df['value'].sum())
print(df.shape)

#### The numbers no longer match because like values of start and stop stations have now been condensed to a single row.

In [None]:
df['value'].describe().round(2)

In [None]:
df.rename(columns = {'start_station_name':'start_station','end_station_name' : 'end_station',
                    'value': 'trip_counts', 'start_lat': 'lat_x', 'start_lng': 'lng_x',
                    'end_lat': 'lat_y', 'end_lng': 'lng_y'}, inplace = True)

# Creating a Map

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
# ensure numerical columns are float
df[['lat_x', 'lng_x', 'lat_y', 'lng_y']] = df[
    ['lat_x', 'lng_x', 'lat_y', 'lng_y']].astype(float)

In [None]:
df.apply(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)

In [None]:
# create another smaller df for processing reasons
df_sample = df.groupby('start_station', group_keys=False).apply(
    lambda x: x.sample(frac=0.02, random_state=42)
).reset_index(drop=True)

In [None]:
df_sample.shape

In [None]:
df_sample = df_sample.map(lambda x: x.decode() if isinstance(x, bytes) else x)

In [None]:
for col in df_sample.select_dtypes(include=['object']).columns:
    df_sample[col] = df_sample[col].astype(str)

In [None]:
df_sample.dtypes

In [None]:
from IPython.display import display
display(m)

In [None]:
# create KeplerGl instance
m = KeplerGl(height = 700, data={"data_1": df_sample})
m

In [None]:
config = m.config

In [None]:
config