# 2.5 Advanced Geospatial Plotting

## 2. Import and Install Libraries

In [1]:
#import libraries
import pandas as pd
import os
from keplergl import KeplerGl
from pyproj import CRS
import numpy as npa
from matplotlib import pyplot as plt

In [2]:
!python --version

Python 3.12.4


In [3]:
#read csv
df = pd.read_csv('NY_CitiBike_2022_sampled.csv', index_col = 0)

## 3. Data Processing

In [4]:
df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,start_time,date,avgTemp,_merge
29120623,3C8AACB2D6B4349A,classic_bike,2022-12-03 00:19:08,2022-12-03 00:40:57,Irving Ave & Jefferson St,5051.02,Market St & Henry St,5270.09,40.70541,-73.92545,40.713108,-73.99446,member,2022-12-03 00:19:08,2022-12-03,10.6,both
21568440,8891065C742A0959,classic_bike,2022-09-23 17:28:00,2022-09-23 17:42:12,W 36 St & 9 Ave,6569.07,W 12 St & Hudson St,5997.1,40.754623,-73.99517,40.73753,-74.00559,member,2022-09-23 17:28:00,2022-09-23,15.1,both
23553887,44C9E5C0CE745B83,classic_bike,2022-09-02 11:48:04,2022-09-02 11:55:42,Allen St & Rivington St,5414.06,Lafayette St & E 8 St,5788.13,40.720196,-73.989975,40.730206,-73.99103,member,2022-09-02 11:48:04,2022-09-02,21.7,both
21160236,621A040D1B1CD808,electric_bike,2022-09-28 16:41:53,2022-09-28 16:41:55,West St & Liberty St,5184.08,West St & Liberty St,5184.08,40.711445,-74.01485,40.711445,-74.01485,member,2022-09-28 16:41:53,2022-09-28,17.2,both
15027579,C06B5DF339C420E1,classic_bike,2022-07-28 06:08:22,2022-07-28 06:18:50,Lewis Ave & Kosciuszko St,4617.01,Hanson Pl & Ashland Pl,4395.07,40.692345,-73.9371,40.685066,-73.977905,casual,2022-07-28 06:08:22,2022-07-28,26.7,both


In [5]:
# Remove blank rows
df_cleaned = df.dropna()

In [6]:
# Create a value column and group by start and end station 

df_cleaned['value'] = 1
df_group = df_cleaned.groupby(['start_station_name', 'end_station_name', 'start_lat', 'start_lng', 'end_lat', 'end_lng'], as_index=False).agg({'value' : 'sum'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned['value'] = 1


In [7]:
df_group.head()

Unnamed: 0,start_station_name,end_station_name,start_lat,start_lng,end_lat,end_lng,value
0,1 Ave & E 110 St,Adam Clayton Powell Blvd & W 132 St,40.792328,-73.9383,40.813538,-73.94521,1
1,1 Ave & E 110 St,E 147 St & Bergen Ave,40.792328,-73.9383,40.814674,-73.91839,1
2,1 Ave & E 110 St,E 85 St & 3 Ave,40.792385,-73.93809,40.77801,-73.95407,1
3,1 Ave & E 110 St,Frederick Douglass Blvd & W 145 St,40.792328,-73.9383,40.823063,-73.941925,1
4,1 Ave & E 16 St,2 Ave & E 29 St,40.73222,-73.98166,40.741722,-73.978096,1


In [8]:
df_group.dtypes

start_station_name     object
end_station_name       object
start_lat             float64
start_lng             float64
end_lat               float64
end_lng               float64
value                   int64
dtype: object

In [9]:
#change the data type of 'value'
df_group['value'] = df_group['value'].astype('float')

In [10]:
df_group.dtypes

start_station_name     object
end_station_name       object
start_lat             float64
start_lng             float64
end_lat               float64
end_lng               float64
value                 float64
dtype: object

In [14]:
df_group.to_csv('NY_Locations_For_Map.csv')

## 4. Initialize an instance of a Kepler.gl map

In [11]:
import gc # this is garbage collector - speeds up performance
gc.collect()

20

In [12]:
# Create KeplerGl instance

m = KeplerGl(height=400, data={"data_1": df_group})
m

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'data_1':                 start_station_name                     end_station_name  \
0         …

## 5. Customize output of map

### I changed the color for the start and end stations to make it more visible and differentiate the two. Then I made the arc visible for both the start and end stations and changed the color of the arcs.  

## 6. Add filter to map and write observations

### I added a filter to see the most popular stations, the stations were 12 Ave & 40 St and Vesey St & Church St. After a quick search on google these popular destinations were near the Hudson River Park and near The World Trade Center. These results make sense because they are popular destinations in New York. 

## 7. Create a config object and save your map with it

In [13]:
config = m.config

In [14]:
config

{'version': 'v1',
 'config': {'visState': {'filters': [{'dataId': ['data_1'],
     'id': '47qvtfe39',
     'name': ['value'],
     'type': 'range',
     'value': [1, 5],
     'enlarged': False,
     'plotType': 'histogram',
     'animationWindow': 'free',
     'yAxis': None,
     'speed': 1}],
   'layers': [{'id': '01hrt0v',
     'type': 'point',
     'config': {'dataId': 'data_1',
      'label': 'start',
      'color': [223, 73, 22],
      'highlightColor': [252, 242, 26, 255],
      'columns': {'lat': 'start_lat', 'lng': 'start_lng', 'altitude': None},
      'isVisible': True,
      'visConfig': {'radius': 10,
       'fixedRadius': False,
       'opacity': 0.8,
       'outline': False,
       'thickness': 2,
       'strokeColor': None,
       'colorRange': {'name': 'Global Warming',
        'type': 'sequential',
        'category': 'Uber',
        'colors': ['#5A1846',
         '#900C3F',
         '#C70039',
         '#E3611C',
         '#F1920E',
         '#FFC300']},
       'stroke

In [15]:
import json
with open("config.json", "w") as outfile:
    json.dump(config, outfile)

In [16]:
m.save_to_html(file_name = 'NY_Bike_Trips_Aggregated.html', read_only = False, config = config)

Map saved to NY_Bike_Trips_Aggregated.html!
