In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio
import json
import requests
import pickle

In [None]:
pd.options.plotting.backend = "plotly"
pio.templates.default = "plotly_white"

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/guebin/DV2023/main/posts/NYCTaxi.csv").assign(
    log_trip_duration = lambda df: np.log(df.trip_duration),
    pickup_datetime = lambda df: df.pickup_datetime.apply(pd.to_datetime),
    dropoff_datetime = lambda df: df.dropoff_datetime.apply(pd.to_datetime),
    dist = lambda df: np.sqrt((df.pickup_latitude-df.dropoff_latitude)**2 + (df.pickup_longitude-df.dropoff_longitude)**2),
    #---#
    vendor_id = lambda df: df.vendor_id.map({1:'A',2:'B'})
).assign(
    speed = lambda df: df.dist / df.trip_duration,
    pickup_hour = lambda df: df.pickup_datetime.dt.hour,
    dropoff_hour = lambda df: df.dropoff_datetime.dt.hour,
    dayofweek = lambda df: df.pickup_datetime.dt.dayofweek
)
df_small = df[::100].reset_index(drop=True)

df_small

Unnamed: 0,id,vendor_id,pickup_datetime,dropoff_datetime,passenger_count,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,store_and_fwd_flag,trip_duration,log_trip_duration,dist,speed,pickup_hour,dropoff_hour,dayofweek
0,id2875421,B,2016-03-14 17:24:55,2016-03-14 17:32:30,1,-73.982155,40.767937,-73.964630,40.765602,N,455,6.120297,0.017680,0.000039,17,17,0
1,id3667993,B,2016-01-03 04:18:57,2016-01-03 04:27:03,1,-73.980522,40.730530,-73.997993,40.746220,N,486,6.186209,0.023482,0.000048,4,4,6
2,id2002463,B,2016-01-14 12:28:56,2016-01-14 12:37:17,1,-73.965652,40.768398,-73.960068,40.779308,N,501,6.216606,0.012256,0.000024,12,12,3
3,id1635353,B,2016-03-04 23:20:58,2016-03-04 23:49:29,5,-73.985092,40.759190,-73.962151,40.709850,N,1711,7.444833,0.054412,0.000032,23,23,4
4,id1850636,A,2016-02-05 00:21:28,2016-02-05 00:52:24,1,-73.994537,40.750439,-74.025719,40.631100,N,1856,7.526179,0.123345,0.000066,0,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141,id0621879,A,2016-04-23 09:31:33,2016-04-23 09:51:33,1,-73.950783,40.743614,-74.006218,40.722729,N,1200,7.090077,0.059239,0.000049,9,9,5
142,id2587483,B,2016-03-28 12:59:58,2016-03-28 13:08:11,2,-73.953903,40.787079,-73.940842,40.792461,N,493,6.200509,0.014127,0.000029,12,13,0
143,id1030598,B,2016-03-03 11:44:24,2016-03-03 11:49:59,1,-74.005066,40.719143,-74.006065,40.735134,N,335,5.814131,0.016022,0.000048,11,11,3
144,id3094934,A,2016-03-21 09:53:40,2016-03-21 10:22:20,1,-73.986153,40.722431,-73.985977,40.762669,N,1720,7.450080,0.040238,0.000023,9,10,0


In [None]:
# fig 1

df_avg_spd = df.groupby(['dayofweek', 'pickup_hour'])['speed'].mean().reset_index()

df_avg_spd['dayofweek'] = df_avg_spd['dayofweek'].replace({0: '월', 1: '화', 2: '수', 3: '목', 4: '금', 5: '토', 6: '일'})
df_avg_spd['dayofweek'] = pd.Categorical(df_avg_spd['dayofweek'], categories=['일', '토', '금', '목', '수', '화', '월'], ordered=True)
# Y축 순서 바꾸려면 categories의 순서 바꾸기

df_pivot_spd = df_avg_spd.pivot(index='dayofweek', columns='pickup_hour', values='speed')

fig1 = px.imshow(df_pivot_spd)
fig1['layout']['title']['text'] = '요일, pick up시간에 따른 평균속력 시각화'
fig1['layout']['xaxis']['title']['text'] = 'pickup 시간'
fig1['layout']['yaxis']['title']['text'] = '요일'
fig1['layout']['coloraxis']['colorbar']['title']['text'] = '평균속력'

# fig 2

df_avg_dst = df.groupby(['dayofweek', 'pickup_hour'])['dist'].mean().reset_index()

df_avg_dst['dayofweek'] = df_avg_dst['dayofweek'].replace({0: '월', 1: '화', 2: '수', 3: '목', 4: '금', 5: '토', 6: '일'})
df_avg_dst['dayofweek'] = pd.Categorical(df_avg_dst['dayofweek'], categories=['일', '토', '금', '목', '수', '화', '월'], ordered=True)

df_pivot_dst = df_avg_dst.pivot(index='dayofweek', columns='pickup_hour', values='dist')

fig2 = px.imshow(df_pivot_dst)
fig2['layout']['title']['text'] = '요일,pick up시간에 따른 평균이동거리 시각화'
fig2['layout']['xaxis']['title']['text'] = 'pickup 시간'
fig2['layout']['yaxis']['title']['text'] = '요일'
fig2['layout']['coloraxis']['colorbar']['title']['text'] = '평균이동거리'

# fig 3

df_small['speed_quartile'] = pd.qcut(df_small['speed'], 4, labels=['매우느림', '조금느림', '조금빠름', '매우빠름'])

lons, lats, quartiles, ids = [], [], [], []

for i, row in df_small.iterrows():
    lons.extend([row['pickup_longitude'], row['dropoff_longitude'], None])
    lats.extend([row['pickup_latitude'], row['dropoff_latitude'], None])
    quartiles.extend([row['speed_quartile'], row['speed_quartile'], None])
    ids.extend([i, i, None])

plot_data = pd.DataFrame({
    'longitude': lons,
    'latitude': lats,
    'speed_quartile': quartiles,
    'id': ids
})

plot_data = plot_data.dropna()

fig3 = px.line_mapbox(plot_data, lat='latitude', lon='longitude', color='speed_quartile', line_group='id', zoom=11, category_orders={'speed_quartile': ['매우느림', '조금느림', '조금빠름', '매우빠름']})

colors = px.colors.qualitative.Plotly
quartile_colors = dict(zip(['매우느림', '조금느림', '조금빠름', '매우빠름'], colors))

for quartile in ['매우느림', '조금느림', '조금빠름', '매우빠름']:
    filtered_df = df_small[df_small['speed_quartile'] == quartile]

    fig3.add_scattermapbox(
        lat=pd.concat([filtered_df['pickup_latitude'], filtered_df['dropoff_latitude']]),
        lon=pd.concat([filtered_df['pickup_longitude'], filtered_df['dropoff_longitude']]),
        mode='markers',
        marker=dict(size=5, color=quartile_colors[quartile]),
        name=quartile,
        showlegend=False
    )
fig3['layout']['legend']['title']['text'] = '평균속력'



In [None]:
fig1.show()

In [None]:
fig2.show()