In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium import plugins

plt.rcParams['figure.figsize'] = (15, 6)
plt.rcParams['font.size'] = 12
plt.style.use('fivethirtyeight')
import warnings
warnings.filterwarnings('ignore')

>Citi Bike is a privately owned public bicycle sharing system serving the New York City boroughs of the Bronx, Brooklyn, Manhattan, and Queens, as well as Jersey City, New Jersey. Named after lead sponsor Citigroup, it is operated by Motivate (formerly Alta Bicycle Share), with former Metropolitan Transportation Authority CEO Jay Walder as chief executive until September 30, 2018 when the company was acquired by Lyft. The system's bikes and stations use BIXI-branded technology from PBSC Urban Solutions.

The goal of this notebook is data visualization for January-April 2021.

### Table of Contents
* [Data Preparation](#chap1)
* [Visual Data Analysis](#chap2)
    * [Duration of Trips](#chap21)
    * [Weekly Usage](#chap22)
    * [Popular Stations](#chap23)
    * [Popular Stations on Maps](#chap24)
    * [Most Popular Routes](#chap25)
    * [Customer Visualization](#chap26)


# Data Preparation <a class="anchor" id="chap1"></a>

In [None]:
january = pd.read_csv('/kaggle/input/citibike-bike-sharingnewyork-cityjan-to-apr-2021/CitiBike_Trip_Data/2021/January2021.csv')
february = pd.read_csv('/kaggle/input/citibike-bike-sharingnewyork-cityjan-to-apr-2021/CitiBike_Trip_Data/2021/February2021.csv')
march = pd.read_csv('/kaggle/input/citibike-bike-sharingnewyork-cityjan-to-apr-2021/CitiBike_Trip_Data/2021/March2021.csv')
april = pd.read_csv('/kaggle/input/citibike-bike-sharingnewyork-cityjan-to-apr-2021/CitiBike_Trip_Data/2021/April2021.csv')

In [None]:
january['starttime'] = pd.to_datetime(january['starttime'], format="%Y/%m/%d %H:%M:%S")
january['stoptime'] = pd.to_datetime(january['stoptime'], format="%Y/%m/%d %H:%M:%S")

february['starttime'] = pd.to_datetime(february['starttime'], format="%Y/%m/%d %H:%M:%S")
february['stoptime'] = pd.to_datetime(february['stoptime'], format="%Y/%m/%d %H:%M:%S")

march['starttime'] = pd.to_datetime(march['starttime'], format="%Y/%m/%d %H:%M:%S")
march['stoptime'] = pd.to_datetime(march['stoptime'], format="%Y/%m/%d %H:%M:%S")

april['starttime'] = pd.to_datetime(april['starttime'], format="%Y/%m/%d %H:%M:%S")
april['stoptime'] = pd.to_datetime(april['stoptime'], format="%Y/%m/%d %H:%M:%S")

In [None]:
january['tripduration'] = january['tripduration'] / 60

february['tripduration'] = february['tripduration'] / 60

march['tripduration'] = march['tripduration'] / 60

april['tripduration'] = april['tripduration'] / 60

In [None]:
january.head()

# Visual Data Analysis <a class="anchor" id="chap2"></a>

## Duration of Trips <a class="anchor" id="chap21"></a>

In [None]:
print(f"January max: {january['tripduration'].max()}\n\
February max: {february['tripduration'].max()}\n\
March max: {march['tripduration'].max()}\n\
April max: {april['tripduration'].max()}")

In [None]:
fig, ax = plt.subplots(4, figsize=(12, 15))

sns.histplot(x=january['tripduration'], ax=ax[0], bins=np.logspace(0, 6, 100))
ax[0].set_xscale("log")
ax[0].set_title("January", fontsize=14)

sns.histplot(x=february['tripduration'], ax=ax[1], bins=np.logspace(0, 6, 100))
ax[1].set_xscale("log")
ax[1].set_title("February", fontsize=14)

sns.histplot(x=march['tripduration'], ax=ax[2], bins=np.logspace(0, 6, 100))
ax[2].set_xscale("log")
ax[2].set_title("March", fontsize=14)

sns.histplot(x=april['tripduration'], ax=ax[3], bins=np.logspace(0, 6, 100))
ax[3].set_xscale("log")
ax[3].set_title("April", fontsize=14)

fig.suptitle("Histograms of Trip Durations in Minutes for Each Month", fontsize=16)
plt.tight_layout()
plt.show()

## Weekly Usage <a class="anchor" id="chap22"></a>

In [None]:
january['startweekday'] = january['starttime'].dt.day_name()
january['stopweekday'] = january['stoptime'].dt.day_name()

february['startweekday'] = february['starttime'].dt.day_name()
february['stopweekday'] = february['stoptime'].dt.day_name()

march['startweekday'] = march['starttime'].dt.day_name()
march['stopweekday'] = march['stoptime'].dt.day_name()

april['startweekday'] = april['starttime'].dt.day_name()
april['stopweekday'] = april['stoptime'].dt.day_name()

In [None]:
fig, ax = plt.subplots(4, figsize=(12, 15))

sns.countplot('startweekday', data=january, order=january['startweekday'].value_counts().index, palette='flare', ax=ax[0])
ax[0].set_title("January", fontsize=14)

sns.countplot('startweekday', data=february, order=february['startweekday'].value_counts().index, palette='flare', ax=ax[1])
ax[1].set_title("February", fontsize=14)

sns.countplot('startweekday', data=march, order=march['startweekday'].value_counts().index, palette='flare', ax=ax[2])
ax[2].set_title("March", fontsize=14)

sns.countplot('startweekday', data=april, order=april['startweekday'].value_counts().index, palette='flare', ax=ax[3])
ax[3].set_title("April", fontsize=14)

fig.suptitle("Usages Each Day of Week", fontsize=16)
plt.tight_layout()
plt.show()

## Popular Stations <a class="anchor" id="chap23"></a>

In [None]:
fig, ax = plt.subplots(4, figsize=(12, 15))

sns.countplot('start station name', data=january, order=january['start station name'].value_counts().index[:15], palette='flare', ax=ax[0])
ax[0].set_title("January", fontsize=14)
ax[0].tick_params(labelsize=10, labelrotation=45)

sns.countplot('start station name', data=february, order=february['start station name'].value_counts().index[:15], palette='flare', ax=ax[1])
ax[1].set_title("February", fontsize=14)
ax[1].tick_params(labelsize=10, labelrotation=45)

sns.countplot('start station name', data=march, order=march['start station name'].value_counts().index[:15], palette='flare', ax=ax[2])
ax[2].set_title("March", fontsize=14)
ax[2].tick_params(labelsize=10, labelrotation=45)

sns.countplot('start station name', data=april, order=april['start station name'].value_counts().index[:15], palette='flare', ax=ax[3])
ax[3].set_title("April", fontsize=14)
ax[3].tick_params(labelsize=10, labelrotation=45)

fig.suptitle("Starts of Trips for Top 15 Stations", fontsize=16)
plt.tight_layout()
plt.show()

## Popular Stations on Maps <a class="anchor" id="chap24"></a>

In [None]:
tmp = january.groupby(['start station latitude', 'start station longitude', 'start station name']).size().to_frame('size').reset_index()
lats = tmp['start station latitude']
longs = tmp['start station longitude']
names = tmp['start station name']
size = tmp['size']
colors = pd.cut(tmp['size'], bins=4, labels=['orange', 'lightred', 'red', 'darkred'])
places = [[x[0],x[1]] for x in zip(lats,longs)]

m = folium.Map(places[0], tiles='OpenStreetMap', zoom_start=12)
m.get_root().html.add_child(folium.Element('<h3 align="center" style="font-size:16px"><b>January</b></h3>'))
for i, loc in enumerate(places[1:]):
    folium.Marker(loc, icon=folium.Icon(color=colors[i]), popup=f"Station {names[i]}\nUsage in a month:\n{size[i]}").add_to(m)
m

In [None]:
tmp = february.groupby(['start station latitude', 'start station longitude', 'start station name']).size().to_frame('size').reset_index()
lats = tmp['start station latitude']
longs = tmp['start station longitude']
names = tmp['start station name']
size = tmp['size']
colors = pd.cut(tmp['size'], bins=4, labels=['orange', 'lightred', 'red', 'darkred'])
places = [[x[0],x[1]] for x in zip(lats,longs)]

m = folium.Map(places[0], tiles='OpenStreetMap', zoom_start=12)
m.get_root().html.add_child(folium.Element('<h3 align="center" style="font-size:16px"><b>February</b></h3>'))
for i, loc in enumerate(places[1:]):
    folium.Marker(loc, icon=folium.Icon(color=colors[i]), popup=f"Station {names[i]}\nUsage in a month:\n{size[i]}").add_to(m)
m

In [None]:
tmp = march.groupby(['start station latitude', 'start station longitude', 'start station name']).size().to_frame('size').reset_index()
lats = tmp['start station latitude']
longs = tmp['start station longitude']
names = tmp['start station name']
size = tmp['size']
colors = pd.cut(tmp['size'], bins=4, labels=['orange', 'lightred', 'red', 'darkred'])
places = [[x[0],x[1]] for x in zip(lats,longs)]

m = folium.Map(places[0], tiles='OpenStreetMap', zoom_start=12)
m.get_root().html.add_child(folium.Element('<h3 align="center" style="font-size:16px"><b>March</b></h3>'))
for i, loc in enumerate(places[1:]):
    folium.Marker(loc, icon=folium.Icon(color=colors[i]), popup=f"Station {names[i]}\nUsage in a month:\n{size[i]}").add_to(m)
m

In [None]:
tmp = april.groupby(['start station latitude', 'start station longitude', 'start station name']).size().to_frame('size').reset_index()
lats = tmp['start station latitude']
longs = tmp['start station longitude']
names = tmp['start station name']
size = tmp['size']
colors = pd.cut(tmp['size'], bins=4, labels=['orange', 'lightred', 'red', 'darkred'])
places = [[x[0],x[1]] for x in zip(lats,longs)]

m = folium.Map(places[0], tiles='OpenStreetMap', zoom_start=12)
m.get_root().html.add_child(folium.Element('<h3 align="center" style="font-size:16px"><b>April</b></h3>'))
for i, loc in enumerate(places[1:]):
    folium.Marker(loc, icon=folium.Icon(color=colors[i]), popup=f"Station {names[i]}\nUsage in a month:\n{size[i]}").add_to(m)
m

## Most Popular Routes <a class="anchor" id="chap25"></a>

In [None]:
tmp = january.groupby(['start station latitude', 'end station latitude', 'start station name', 'start station longitude', 'end station longitude', 'end station name']).size().nlargest(18).to_frame('size').reset_index()
latstart = tmp['start station latitude']
longstart = tmp['start station longitude']
namestart = tmp['start station name']
latsend = tmp['end station latitude']
longsend = tmp['end station longitude']
nameend = tmp['end station name']
size = tmp['size']
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred',\
          'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue',\
          'darkpurple', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray']
placestart = [[x[0],x[1]] for x in zip(latstart, longstart)]
placesend = [[x[0],x[1]] for x in zip(latsend, longsend)]
places = list(zip(placestart, placesend))

m = folium.Map(places[0][0], tiles='OpenStreetMap', zoom_start=12)
m.get_root().html.add_child(folium.Element('<h3 align="center" style="font-size:16px"><b>January</b></h3>'))
for i, pair in enumerate(places):
    marker_cluster = plugins.MarkerCluster().add_to(m)
    folium.Marker(pair[0], icon=folium.Icon(color=colors[i]), popup=f"Start {namestart[i]}\nUsage in a month:\n{size[i]}").add_to(marker_cluster)
    folium.Marker(pair[1], icon=folium.Icon(color=colors[i]), popup=f"End {nameend[i]}\nUsage in a month:\n{size[i]}").add_to(marker_cluster)
    folium.PolyLine(pair, color=colors[i]).add_to(m)
m

In [None]:
tmp = february.groupby(['start station latitude', 'end station latitude', 'start station name', 'start station longitude', 'end station longitude', 'end station name']).size().nlargest(18).to_frame('size').reset_index()
latstart = tmp['start station latitude']
longstart = tmp['start station longitude']
namestart = tmp['start station name']
latsend = tmp['end station latitude']
longsend = tmp['end station longitude']
nameend = tmp['end station name']
size = tmp['size']
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred',\
          'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue',\
          'darkpurple', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray']
placestart = [[x[0],x[1]] for x in zip(latstart, longstart)]
placesend = [[x[0],x[1]] for x in zip(latsend, longsend)]
places = list(zip(placestart, placesend))

m = folium.Map(places[0][0], tiles='OpenStreetMap', zoom_start=12)
m.get_root().html.add_child(folium.Element('<h3 align="center" style="font-size:16px"><b>February</b></h3>'))
for i, pair in enumerate(places):
    marker_cluster = plugins.MarkerCluster().add_to(m)
    folium.Marker(pair[0], icon=folium.Icon(color=colors[i]), popup=f"Start {namestart[i]}\nUsage in a month:\n{size[i]}").add_to(marker_cluster)
    folium.Marker(pair[1], icon=folium.Icon(color=colors[i]), popup=f"End {nameend[i]}\nUsage in a month:\n{size[i]}").add_to(marker_cluster)
    folium.PolyLine(pair, color=colors[i]).add_to(m)
m

In [None]:
tmp = march.groupby(['start station latitude', 'end station latitude', 'start station name', 'start station longitude', 'end station longitude', 'end station name']).size().nlargest(18).to_frame('size').reset_index()
latstart = tmp['start station latitude']
longstart = tmp['start station longitude']
namestart = tmp['start station name']
latsend = tmp['end station latitude']
longsend = tmp['end station longitude']
nameend = tmp['end station name']
size = tmp['size']
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred',\
          'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue',\
          'darkpurple', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray']
placestart = [[x[0],x[1]] for x in zip(latstart, longstart)]
placesend = [[x[0],x[1]] for x in zip(latsend, longsend)]
places = list(zip(placestart, placesend))

m = folium.Map(places[0][0], tiles='OpenStreetMap', zoom_start=12)
m.get_root().html.add_child(folium.Element('<h3 align="center" style="font-size:16px"><b>March</b></h3>'))
for i, pair in enumerate(places):
    marker_cluster = plugins.MarkerCluster().add_to(m)
    folium.Marker(pair[0], icon=folium.Icon(color=colors[i]), popup=f"Start {namestart[i]}\nUsage in a month:\n{size[i]}").add_to(marker_cluster)
    folium.Marker(pair[1], icon=folium.Icon(color=colors[i]), popup=f"End {nameend[i]}\nUsage in a month:\n{size[i]}").add_to(marker_cluster)
    folium.PolyLine(pair, color=colors[i]).add_to(m)
m

In [None]:
tmp = april.groupby(['start station latitude', 'end station latitude', 'start station name', 'start station longitude', 'end station longitude', 'end station name']).size().nlargest(18).to_frame('size').reset_index()
latstart = tmp['start station latitude']
longstart = tmp['start station longitude']
namestart = tmp['start station name']
latsend = tmp['end station latitude']
longsend = tmp['end station longitude']
nameend = tmp['end station name']
size = tmp['size']
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred',\
          'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue',\
          'darkpurple', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray']
placestart = [[x[0],x[1]] for x in zip(latstart, longstart)]
placesend = [[x[0],x[1]] for x in zip(latsend, longsend)]
places = list(zip(placestart, placesend))

m = folium.Map(places[0][0], tiles='OpenStreetMap', zoom_start=12)
m.get_root().html.add_child(folium.Element('<h3 align="center" style="font-size:16px"><b>April</b></h3>'))
for i, pair in enumerate(places):
    marker_cluster = plugins.MarkerCluster().add_to(m)
    folium.Marker(pair[0], icon=folium.Icon(color=colors[i]), popup=f"Start {namestart[i]}\nUsage in a month:\n{size[i]}").add_to(marker_cluster)
    folium.Marker(pair[1], icon=folium.Icon(color=colors[i]), popup=f"End {nameend[i]}\nUsage in a month:\n{size[i]}").add_to(marker_cluster)
    folium.PolyLine(pair, color=colors[i]).add_to(m)
m

## Customer Visualization <a class="anchor" id="chap26"></a>

In [None]:
print(f"Customer&Subscriber trips in\nJanuary 2021:\n{january.groupby('usertype').size()}\n\
February 2021:\n{february.groupby('usertype').size()}\n\
March 2021:\n{march.groupby('usertype').size()}\n\
April 2021:\n{april.groupby('usertype').size()}")

In [None]:
fig, ax = plt.subplots(4, 2, figsize=(12, 15))
sns.histplot(x=january[january.usertype=='Customer']['tripduration'], ax=ax[0][0], bins=np.logspace(0, 6, 100))
ax[0][0].set_xscale("log")
ax[0][0].set_title("Customers in January", fontsize=14)

sns.histplot(x=january[january.usertype=='Subscriber']['tripduration'], ax=ax[0][1], bins=np.logspace(0, 6, 100))
ax[0][1].set_xscale("log")
ax[0][1].set_title("Subscribers in January", fontsize=14)

sns.histplot(x=february[february.usertype=='Customer']['tripduration'], ax=ax[1][0], bins=np.logspace(0, 6, 100))
ax[1][0].set_xscale("log")
ax[1][0].set_title("Customers in February", fontsize=14)

sns.histplot(x=february[february.usertype=='Subscriber']['tripduration'], ax=ax[1][1], bins=np.logspace(0, 6, 100))
ax[1][1].set_xscale("log")
ax[1][1].set_title("Subscribers in February", fontsize=14)

sns.histplot(x=march[march.usertype=='Customer']['tripduration'], ax=ax[2][0], bins=np.logspace(0, 6, 100))
ax[2][0].set_xscale("log")
ax[2][0].set_title("Customers in March", fontsize=14)

sns.histplot(x=march[march.usertype=='Subscriber']['tripduration'], ax=ax[2][1], bins=np.logspace(0, 6, 100))
ax[2][1].set_xscale("log")
ax[2][1].set_title("Subscribers in March", fontsize=14)

sns.histplot(x=april[april.usertype=='Customer']['tripduration'], ax=ax[3][0], bins=np.logspace(0, 6, 100))
ax[3][0].set_xscale("log")
ax[3][0].set_title("Customers in April", fontsize=14)

sns.histplot(x=april[april.usertype=='Subscriber']['tripduration'], ax=ax[3][1], bins=np.logspace(0, 6, 100))
ax[3][1].set_xscale("log")
ax[3][1].set_title("Subcribers in April", fontsize=14)

fig.suptitle("Histograms of Trip Durations in Minutes for Each Month", fontsize=16)
plt.tight_layout()
plt.show()