In [1]:
%matplotlib notebook

# Dependencies
import gmaps
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

# Save file path to variable
metrotransit2020_csv = "MetroTransit_2020.csv"

# Read with Pandas
metrotransit2020_df = pd.read_csv(metrotransit2020_csv)
metrotransit2020_df.head()

Unnamed: 0,week,week_of,srv,route_class,line_id,dir,trip_cnt,obs_trips,seq,site_id,location,ons,offs
0,5,1/27/2020,WK,CoreLoc,10,North,96,96.0,1.0,19337,Leamington Ramp & Lower - Gate #7,114.65,24.766667
1,5,1/27/2020,WK,CoreLoc,10,North,96,96.0,2.0,19277,3rd Ave S & 11th St S,10.366667,2.733333
2,5,1/27/2020,WK,CoreLoc,10,North,96,96.0,3.0,41911,2nd Ave S & Convention Center,35.0,1.166667
3,5,1/27/2020,WK,CoreLoc,10,North,96,96.0,4.0,17988,Nicollet Mall & Alice Rainville,119.566667,3.533333
4,5,1/27/2020,WK,CoreLoc,10,North,96,96.0,5.0,17990,Nicollet Mall & 11th St S,140.816667,4.05


In [2]:
metrotransit2020_df["week_of"]= pd.to_datetime(metrotransit2020_df["week_of"])
metrotransit2020_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 176177 entries, 0 to 176176
Data columns (total 13 columns):
week           176177 non-null int64
week_of        176177 non-null datetime64[ns]
srv            176177 non-null object
route_class    176177 non-null object
line_id        176177 non-null int64
dir            176177 non-null object
trip_cnt       176177 non-null int64
obs_trips      175855 non-null float64
seq            175855 non-null float64
site_id        176177 non-null int64
location       176177 non-null object
ons            175855 non-null float64
offs           175855 non-null float64
dtypes: datetime64[ns](1), float64(4), int64(4), object(4)
memory usage: 17.5+ MB


In [3]:
sample_df = metrotransit2020_df.sample(n = 500)

ons = sample_df["ons"]
offs = sample_df["offs"]

plt.scatter(ons, offs, marker="o", facecolors="red", edgecolors="black")
plt.xlabel("Ons")
plt.ylabel("Offs")
plt.show

<IPython.core.display.Javascript object>

<function matplotlib.pyplot.show(*args, **kw)>

In [4]:
# Line plot showing week-to-week change for highest ON location

highest_on_df = metrotransit2020_df.loc[metrotransit2020_df["location"]=="7th & Nicollet Station"]
# highest_on_df
clean_highest_on_df = pd.DataFrame(highest_on_df,columns=["week_of","ons","offs","trip_cnt"])
clean_highest_on_df
# summary_ons = clean_highest_on_df.groupby("week_of")
# summary_ons.head()

Unnamed: 0,week_of,ons,offs,trip_cnt
2496,2020-01-27,161.566667,26.300000,35
2497,2020-01-27,161.566667,26.300000,35
5648,2020-01-27,0.000000,9.550000,5
6193,2020-01-27,0.200000,16.850000,4
7080,2020-01-27,469.916667,508.533333,124
...,...,...,...,...
168800,2020-04-13,171.083333,196.000000,82
168801,2020-04-13,85.750000,100.666667,82
173152,2020-04-13,5.166667,2.000000,3
176058,2020-04-13,473.583333,61.500000,87


In [5]:
# group by "week of"
summary_ons = clean_highest_on_df.groupby("week_of")

# then do the math
ons_sum = summary_ons["ons"].sum()
trips_sum = summary_ons["trip_cnt"].sum()

summary_table = (pd.DataFrame({
    "Ons": ons_sum,
    "Trips": trips_sum
})).round(2)
# summary_table["week_of"] = pd.to_datetime("week_of")
# summary_table["week_of"] = summary_table["week_of"].astype('datetime64[ns]') 

summary_table

Unnamed: 0_level_0,Ons,Trips
week_of,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-27,3406.72,549
2020-02-03,3469.03,549
2020-02-10,3333.47,549
2020-02-17,3154.35,549
2020-02-24,3497.45,549
2020-03-02,3517.12,549
2020-03-09,3342.85,549
2020-03-16,2233.5,549
2020-03-30,1513.33,406
2020-04-06,1364.63,406


In [6]:
summary_table.plot(kind="bar", facecolor="red")
plt.title("Metro Transit Traffic from January through April")
plt.xlabel("Weeks")
plt.ylabel("Number of Passenger Ons")
plt.show()
plt.tight_layout()

<IPython.core.display.Javascript object>

In [7]:
highest_on_df.groupby("week_of")
highest_on_df

Unnamed: 0,week,week_of,srv,route_class,line_id,dir,trip_cnt,obs_trips,seq,site_id,location,ons,offs
2496,5,2020-01-27,WK,CoreLoc,19,North,35,34.0,7.0,17902,7th & Nicollet Station,161.566667,26.300000
2497,5,2020-01-27,WK,SupportLoc,19,North,35,34.0,7.0,17902,7th & Nicollet Station,161.566667,26.300000
5648,5,2020-01-27,WK,CoreLoc,3,West,5,5.0,126.0,17902,7th & Nicollet Station,0.000000,9.550000
6193,5,2020-01-27,WK,SupportLoc,39,North,4,4.0,13.0,17902,7th & Nicollet Station,0.200000,16.850000
7080,5,2020-01-27,WK,CoreLoc,5,North,124,122.0,81.0,17902,7th & Nicollet Station,469.916667,508.533333
...,...,...,...,...,...,...,...,...,...,...,...,...,...
168800,16,2020-04-13,WK,CoreLoc,5,North,82,70.0,81.0,17902,7th & Nicollet Station,171.083333,196.000000
168801,16,2020-04-13,WK,CoreLoc,5,North,82,70.0,82.0,17902,7th & Nicollet Station,85.750000,100.666667
173152,16,2020-04-13,WK,SuburbLoc,721,North,3,3.0,7.0,17902,7th & Nicollet Station,5.166667,2.000000
176058,16,2020-04-13,WK,CommExpress,923,North,87,84.0,4.0,17902,7th & Nicollet Station,473.583333,61.500000


In [8]:
# Line plot showing week-to-week change for highest OFF location

highest_off_df = metrotransit2020_df.loc[metrotransit2020_df["location"]=="Nicollet Mall & 7th St S"]
highest_off_df.head()

Unnamed: 0,week,week_of,srv,route_class,line_id,dir,trip_cnt,obs_trips,seq,site_id,location,ons,offs
6,5,2020-01-27,WK,CoreLoc,10,North,99,99.0,7.0,17994,Nicollet Mall & 7th St S,575.116667,67.683333
205,5,2020-01-27,WK,CoreLoc,10,South,96,94.0,99.0,17980,Nicollet Mall & 7th St S,111.6,536.316667
255,5,2020-01-27,WK,CoreLoc,11,North,66,65.0,44.0,17994,Nicollet Mall & 7th St S,267.4,255.9
347,5,2020-01-27,WK,CoreLoc,11,South,66,66.0,48.0,17980,Nicollet Mall & 7th St S,202.983333,255.716667
794,5,2020-01-27,WK,CoreLoc,12,East,19,19.0,91.0,17994,Nicollet Mall & 7th St S,9.483333,153.016667
