# <span style='color:#A80808'>Motivation</span>

This notebook aims to provide animations for time-space congestion visualizations. The idea is to animate the congestion change during time for all the 12 locations and 65 roadways. For a detail EDA, please visit the [notebook](https://www.kaggle.com/sytuannguyen/tps-mar-2022-eda-model)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['axes.facecolor'] = 'gray'

import seaborn as sns
from matplotlib import animation
from IPython.display import HTML

import warnings
warnings.simplefilter('ignore')

In [None]:
train = pd.read_csv('../input/tabular-playground-series-mar-2022/train.csv', index_col='row_id')
train.time = pd.to_datetime(train.time)
train['dailytime_id'] = ( ( train.time.dt.hour*60 + train.time.dt.minute ) /20 ).astype(int)
train['time_id'] = ( ( (train.time.dt.dayofyear-91)*24*60 + train.time.dt.hour*60 + train.time.dt.minute ) /20 ).astype(int)

In [None]:
# Map directions to vectors
train['dir_xy'] = train.direction.map({'EB':'[1,0]', 'NB':'[0,1]', 'SB':'[0,-1]', 'WB':'[-1,0]', 'NE':'[1,1]', 'SW':'[-1,-1]', 'NW':'[-1,1]', 'SE':'[1,-1]'})

loc_dir = train.groupby(['x','y']).dir_xy.unique().reset_index()
loc_dir['num_dir'] = loc_dir.dir_xy.apply(lambda x: len(x))

# <span style='color:#A80808'>Data repartition for each roadway</span>

In [None]:
# create a categorical feature for distinguishing the 65 roadways
train['roadway'] = train.x.astype('str') +'_'+ train.y.astype('str') +'_'+ train.direction.astype('str')

# create a color column for the scatter plot: white for data close to the mean of each instant, 
# black for data outside the range (mean-std) to (mean+std)
train['color'] = 'white'
color=[]
for roadway in train.roadway.unique():
    df = train[train.roadway==roadway]
    df['color'] = 'white'
    for dailytime_id in df.dailytime_id.unique():
        congestion = df.congestion[df.dailytime_id==dailytime_id]
        mean = np.mean(congestion)
        std = np.std(congestion)
        cond = abs(congestion-mean)<std
        df.color[df.dailytime_id==dailytime_id]=cond.map({False:'black', True:'white'}).tolist()
    train.color[train.roadway==roadway] = df.color

# highlight the last day 30 Sep 1991
train.color[train.time.dt.dayofyear==273] = 'red'

The scatter plot below clearly shows the position of the last day morning congestion comparing to data of all the previous days for the first roadway (0_0_EB). The red points are for the last day morning (30 Sep), the white zone is for the range between (mean-std) to (mean+std) where mean and std are computed for each instant.  

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))
plt.ylim(0,100)
plt.xlim(-1,73)
plt.xlabel('Time (x20 minutes)', fontsize=16)
plt.ylabel('Congestion', fontsize=16)

df = train[train.roadway=='0_0_EB']
sct = plt.scatter(df.dailytime_id, df.congestion)
sct.set_color(df.color)
ax.set_title(f'Roadway 0_0_EB', fontsize=16)

In [None]:
%%capture
fig, ax = plt.subplots(figsize=(15, 5))
plt.ylim(0,100)
plt.xlim(-1,73)
plt.xlabel('Time (x20 minutes)', fontsize=16)
plt.ylabel('Congestion', fontsize=16)

sct = plt.scatter(range(72),[0]*72, color=['white'])

def update(idx, sct, roadways):
    roadway = roadways[idx]
    df = train[train.roadway==roadway]

    sct.set_offsets(np.array([df.dailytime_id, df.congestion]).T)
    sct.set_color(df.color)
    ax.set_title(f'Roadway {roadway}', fontsize=16)

    return sct

daily_ani = animation.FuncAnimation(fig, update, fargs=(sct, train.roadway.unique()),
                               interval=300, frames=train.roadway.nunique(), blit=False)

In [None]:
HTML(daily_ani.to_jshtml())

The white zone is in the range *(mean-std)* to *(mean+std)* where *mean* and *std* are computed for each roadway at each instant.

# <span style='color:#A80808'>Data flow over time</span>

In [None]:
%%capture
fig, ax = plt.subplots(figsize=(15, 5))
plt.ylim(0,100)
plt.xlabel('Time', fontsize=16)
plt.ylabel('Mean congestion', fontsize=16)
plt.xticks([])

dailytime_ids = range(72)
bars = plt.bar(dailytime_ids, [0]*len(dailytime_ids), color='white')


def update(idx):
    df = train[(train.time_id>=idx) & (train.time_id<(idx+72))]
    for idx, dailytime_id in enumerate(df.dailytime_id.unique()):
        congestion = df.congestion[df.dailytime_id==dailytime_id].mean()
        bars[idx].set_height(congestion)
        bars[idx].set_color('white')
        
    ax.set_title(f'Congestion flow over time ({df.time.dt.day.iloc[0]} {df.time.dt.month_name().iloc[0]})', fontsize=16)

daily_ani = animation.FuncAnimation(fig, update,
                               interval=100, frames=1000, blit=False)

In [None]:
HTML(daily_ani.to_jshtml())

Note that you can slow down or speed up the animation by using the buttons - or +

# <span style='color:#A80808'>Daily congestion animation for all the days of year</span>

In [None]:
%%capture
fig, ax = plt.subplots(figsize=(15, 5))
plt.ylim(30,60)
plt.xlabel('Day of year', fontsize=16)
plt.ylabel('Daily congestion', fontsize=16)

dayofyear = train.time.dt.dayofyear.unique()
bars = plt.bar(dayofyear, [0]*len(dayofyear), color='white')

def update(dailytime_id, bars, dummy):
    df = train[train.dailytime_id==dailytime_id]
    
    for idx, dayofyear in enumerate(df.time.dt.dayofyear.unique()):
        congestion = df.congestion[df.time.dt.dayofyear==dayofyear].mean()
        bars[idx].set_height(congestion)

    ax.set_title(f'Average daily congestion at {dailytime_id//3}h{dailytime_id%3*20}', fontsize=16)

    return bars

daily_ani = animation.FuncAnimation(fig, update, fargs=(bars, train.dailytime_id.unique()),
                               interval=100, frames=train.dailytime_id.nunique(), blit=False)

In [None]:
HTML(daily_ani.to_jshtml())

In [None]:
%%capture
fig, ax = plt.subplots(figsize=(15, 5))
plt.ylim(-10,10)
plt.xlabel('Day of year', fontsize=16)
plt.ylabel('Daily congestion', fontsize=16)

dayofyear = train.time.dt.dayofyear.unique()
bars = plt.bar(dayofyear, [0]*len(dayofyear), color='white')

def update(dailytime_id, bars, dummy):
    df = train[train.dailytime_id==dailytime_id]
    
    for idx, dayofyear in enumerate(df.time.dt.dayofyear.unique()):
        congestion = df.congestion[df.time.dt.dayofyear==dayofyear].mean() - df.congestion.median()
        bars[idx].set_height(congestion)
        if congestion<0:
            bars[idx].set_color('black')
        else:
            bars[idx].set_color('white')
        
    ax.set_title(f'Deviation from daily average (median of all days) at {dailytime_id//3}h{dailytime_id%3*20}', fontsize=16)

    return bars

daily_ani = animation.FuncAnimation(fig, update, fargs=(bars, train.dailytime_id.unique()),
                               interval=200, frames=train.dailytime_id.nunique(), blit=False)

In [None]:
HTML(daily_ani.to_jshtml())

In [None]:
%%capture
fig, ax = plt.subplots(figsize=(15, 5))
plt.ylim(-10,10)
plt.xlabel('Daily time index', fontsize=16)
plt.ylabel('Congestion deviation', fontsize=16)

dailytime_ids = train.dailytime_id.unique()
bars = plt.bar(dailytime_ids, [0]*len(dailytime_ids), color='white')


def update(idx, bars, dayofyears):
    dayofyear = dayofyears[idx]
    
    median = train.groupby(train.dailytime_id).congestion.median().round().astype(int).tolist()
    
    df = train[train.time.dt.dayofyear==dayofyear]
    for idx, dailytime_id in enumerate(df.dailytime_id.unique()):
        congestion = df.congestion[df.dailytime_id==dailytime_id].mean() - median[dailytime_id]
        bars[idx].set_height(congestion)
        if congestion<0:
            bars[idx].set_color('black')
        else:
            bars[idx].set_color('white')
        
    ax.set_title(f'Deviation from daily average (median of all days) of the day {dayofyear} of year', fontsize=16)

    return bars
daily_ani = animation.FuncAnimation(fig, update, fargs=(bars, train.time.dt.dayofyear.unique()),
                               interval=500, frames=train.time.dt.dayofyear.nunique(), blit=False)

In [None]:
HTML(daily_ani.to_jshtml())

# <span style='color:#A80808'>Animate histograms of the roadways</span>

In [None]:
train['roadway'] = train.x.astype('str') +'_'+ train.y.astype('str') +'_'+ train.direction.astype('str')

In [None]:
%%capture
fig, ax = plt.subplots(figsize=(10,7))

plt.xlabel('Congestion', fontsize=16)
plt.ylabel('Count', fontsize=16)
_,_,hist = ax.hist(train.congestion, 100, color='white')

def update(idx, hist, roadways):
    df = train.congestion[train.roadway==roadways[idx]]
    n,_ = np.histogram(df, 100)
    for count, rect in zip(n, hist):
        rect.set_height(count)

    ax.set_ylim(0,np.max(n))
    ax.set_title(f'Roadway: {roadways[idx]}', fontsize=16)
    return hist

ani = animation.FuncAnimation(fig, update, fargs=(hist, train.roadway.unique()),
                               interval=500, frames=train.roadway.nunique(), blit=False)

In [None]:
HTML(ani.to_jshtml())

# <span style='color:#A80808'>Daily correlation between the 12 locations</span>

In [None]:
%%capture
def animate(hour):
    dfs = []
    columns = []
    for x in range(3):
        for y in range(4):
            df = train[(train.x == x) & (train.y==y) & (train.time.dt.hour==hour) & (train.time.dt.minute==0)]
            dfs.append(df.groupby('time').congestion.mean().tolist())
            columns.append(f'x{x}y{y}')      
    location_congestions = pd.DataFrame(np.array(dfs).T, columns=columns)

    ax.cla()
    sns.heatmap(ax = ax, data = location_congestions.corr(), annot=True, cbar_ax = cbar_ax)
    ax.set_title(f'Correlation between the locations at {hour}h00', fontsize=16)

grid_kws = {'width_ratios': (0.9, 0.05), 'wspace': 0.2}
fig, (ax, cbar_ax) = plt.subplots(1, 2, gridspec_kw = grid_kws, figsize = (10, 8))
ani = animation.FuncAnimation(fig = fig, func = animate, frames = train.time.dt.hour.nunique(), interval = 500)

In [None]:
HTML(ani.to_jshtml())

# <span style='color:#A80808'>Daily congestion animation for the Monday of year</span>

In [None]:
%%capture
fig, ax = plt.subplots(figsize=(15, 5))
plt.ylim(30,60)
plt.xlabel('Day of year', fontsize=16)
plt.ylabel('Daily congestion', fontsize=16)

mondayofyear = train[train.time.dt.weekday==0].time.dt.dayofyear.unique()
bars = plt.bar(mondayofyear, [0]*len(mondayofyear), color='white')

def update(dailytime_id, bars, dummy):
    df = train[(train.time.dt.weekday==0) & (train.dailytime_id==dailytime_id)]
    
    for idx, dayofyear in enumerate(df.time.dt.dayofyear.unique()):
        congestion = df.congestion[df.time.dt.dayofyear==dayofyear].mean()
        bars[idx].set_height(congestion)

    ax.set_title(f'Average Monday congestion at {dailytime_id//3}h{dailytime_id%3*20}', fontsize=16)

    return bars

daily_ani = animation.FuncAnimation(fig, update, fargs=(bars, train.dailytime_id.unique()),
                               interval=100, frames=train.dailytime_id.nunique(), blit=False)

In [None]:
HTML(daily_ani.to_jshtml())

# <span style='color:#A80808'>Average daily congestion for the weekdays</span>

In [None]:
%%capture
fig, ax = plt.subplots(figsize=(15, 5))
plt.ylim(30,60)
ax.set_xticklabels(['', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], fontsize=16)
plt.ylabel('Daily congestion', fontsize=16)

weekday = train.time.dt.weekday.unique()
bars = plt.bar(weekday, [0]*len(weekday), color='white')

def update(dailytime_id, bars, dummy):
    df = train[train.dailytime_id==dailytime_id]
    
    for idx, weekday in enumerate(df.time.dt.weekday.unique()):
        congestion = df.congestion[df.time.dt.weekday==weekday].mean()
        bars[idx].set_height(congestion)

    ax.set_title(f'Average week days congestion at {dailytime_id//3}h{dailytime_id%3*20}', fontsize=16)

    return bars

daily_ani = animation.FuncAnimation(fig, update, fargs=(bars, train.dailytime_id.unique()),
                               interval=100, frames=train.dailytime_id.nunique(), blit=False)

In [None]:
HTML(daily_ani.to_jshtml())

# <span style='color:#A80808'>Average daily congestion for all the 12 locations and 65 roadways</span>

In [None]:
%%capture
fig = plt.figure(figsize=(16,12))

# Plot the 12 locations
ax1 = plt.subplot(1,2,1)
plt.xlim(-0.5,2.5)
plt.ylim(-0.5,3.5)
plt.xticks([])
plt.yticks([])

theta = np.arange(0,2.01*np.pi,0.01*np.pi)
r=0.1

lines = []
for ox in range(3):
    for oy in range(4):
        x = ox + r*np.sin(theta)
        y = oy + r*np.cos(theta)
        line, = ax1.plot(x,y, 'white', linewidth=10)
        lines.append(line)

# Plot the 65 roadways
origins=[]
for idx, row in loc_dir.iterrows():
    origin = np.repeat(np.array([[row.x],[row.y]]),row.num_dir, axis=-1)
    origins.append(origin)
    
origin = np.concatenate(origins, axis=1)

ax2 = plt.subplot(1,2,2)
plt.xlim(-0.5,2.5)
plt.ylim(-0.5,3.5)
plt.xticks([])
plt.yticks([])
Q = ax2.quiver(*origin, [0]*origin.shape[1], [0]*origin.shape[1], scale=1, color='white')

def update(dailytime_id, lines, Q, dummy):
    # update locations
    theta = np.arange(0,2.01*np.pi,0.01*np.pi)
    colors = ['silver', 'gainsboro', 'white']
    idx=0
    for ox in range(3):
        for oy in range(4):
            congestion = train.congestion[(train.x==ox) & (train.y==oy) & (train.dailytime_id==dailytime_id)].mean()
            r = congestion * 0.5/100
            x = ox + r*np.sin(theta)
            y = oy + r*np.cos(theta)
            
            
            lines[idx].set_data(x,y)
            lines[idx].set_color(colors[int(congestion//33)])
            idx+=1
            
    ax1.set_title(f'Location average daily congestion at {dailytime_id//3}h{dailytime_id%3*20}', fontsize=16)
    
    # update roadways
    directions = []

    for idx, row in loc_dir.iterrows():
        df = train[(train.x==row.x) & (train.y==row.y) & (train.dailytime_id==dailytime_id)]
        direction=[]
        for d in row.dir_xy:
            
            congestion_d = df.congestion[df.dir_xy==d].mean()
            direction.append(np.array(eval(d)) * congestion_d/500)
        
        directions.append(direction)
        
    direction = np.concatenate(directions, axis=0)

    Q.set_UVC(direction[:,0], direction[:,1])
    ax2.set_title(f'Roadway average daily congestion at {dailytime_id//3}h{dailytime_id%3*20}', fontsize=16)

    return lines, Q

daily_ani = animation.FuncAnimation(fig, update, fargs=(lines, Q, train.dailytime_id.unique()),
                               interval=300, frames=train.dailytime_id.nunique(), blit=False)

In [None]:
HTML(daily_ani.to_jshtml())

# <span style='color:#A80808'>Average daily congestion for the 8 directions</span>

In [None]:
%%capture
fig = plt.figure(figsize=(10,10))
ax = plt.subplot(projection='polar')
plt.ylim(0,60)
ax.set_xticklabels(['EB', 'NE', 'NB', 'NW', 'WB', 'SW', 'SB', 'SE'], fontsize=16)

angles = np.linspace(0, 2 * np.pi, 9)
bars = plt.bar(angles[:8], [1]*8, width=np.pi / 8, color='white')

def update(dailytime_id, bars, dummy):
    df = train[train.dailytime_id==dailytime_id]
    
    for idx, direction in enumerate(df.direction.unique()):
        congestion = df.congestion[df.direction==direction].mean()
        bars[idx].set_height(congestion)

    ax.set_title(f'Average daily congestion at {dailytime_id//3}h{dailytime_id%3*20}', fontsize=16)

    return bars

daily_ani = animation.FuncAnimation(fig, update, fargs=(bars, train.dailytime_id.unique()),
                               interval=100, frames=train.dailytime_id.nunique(), blit=False)

In [None]:
HTML(daily_ani.to_jshtml())

In [None]:
# This function returns an animation for a weekday
def weekday_ani(weekday):
    weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

    fig = plt.figure(figsize=(16,12))

    # Plot the 12 locations
    ax1 = plt.subplot(1,2,1)
    plt.xlim(-0.5,2.5)
    plt.ylim(-0.5,3.5)
    plt.xticks([])
    plt.yticks([])

    theta = np.arange(0,2.01*np.pi,0.01*np.pi)
    r=0.1

    lines = []
    for ox in range(3):
        for oy in range(4):
            x = ox + r*np.sin(theta)
            y = oy + r*np.cos(theta)
            line, = ax1.plot(x,y, 'white', linewidth=10)
            lines.append(line)

    # Plot the 65 roadways
    origins=[]
    for idx, row in loc_dir.iterrows():
        origin = np.repeat(np.array([[row.x],[row.y]]),row.num_dir, axis=-1)
        origins.append(origin)

    origin = np.concatenate(origins, axis=1)

    ax2 = plt.subplot(1,2,2)
    plt.xlim(-0.5,2.5)
    plt.ylim(-0.5,3.5)
    plt.xticks([])
    plt.yticks([])
    Q = ax2.quiver(*origin, [0]*origin.shape[1], [0]*origin.shape[1], scale=1, color='white')

    def update(dailytime_id, lines, Q, dummy):
        # update locations
        theta = np.arange(0,2.01*np.pi,0.01*np.pi)
        colors = ['silver', 'gainsboro', 'white']
        idx=0
        for ox in range(3):
            for oy in range(4):
                congestion = train.congestion[(train.time.dt.weekday==weekday) & (train.x==ox) & (train.y==oy) & (train.dailytime_id==dailytime_id)].mean()
                r = congestion * 0.5/100
                x = ox + r*np.sin(theta)
                y = oy + r*np.cos(theta)


                lines[idx].set_data(x,y)
                lines[idx].set_color(colors[int(congestion//33)])
                idx+=1

        ax1.set_title(f'Average {weekdays[weekday]} congestion at {dailytime_id//3}h{dailytime_id%3*20} for all locations', fontsize=16)

        # update roadways
        directions = []

        for idx, row in loc_dir.iterrows():
            df = train[(train.time.dt.weekday==weekday) & (train.x==row.x) & (train.y==row.y) & (train.dailytime_id==dailytime_id)]
            direction=[]
            for d in row.dir_xy:

                congestion_d = df.congestion[df.dir_xy==d].mean()
                direction.append(np.array(eval(d)) * congestion_d/500)

            directions.append(direction)

        direction = np.concatenate(directions, axis=0)

        Q.set_UVC(direction[:,0], direction[:,1])
        ax2.set_title(f'Average {weekdays[weekday]} congestion at {dailytime_id//3}h{dailytime_id%3*20} for all roadways', fontsize=16)

        return lines, Q

    daily_ani = animation.FuncAnimation(fig, update, fargs=(lines, Q, train.dailytime_id.unique()),
                                   interval=100, frames=train.dailytime_id.nunique(), blit=False)
    
    return daily_ani

# <span style='color:#A80808'>Monday congestion</span>

In [None]:
%%capture
Monday_ani = weekday_ani(0)

In [None]:
HTML(Monday_ani.to_jshtml())

# <span style='color:#A80808'>Daily congestion of each roadway</span>

In [None]:
def roadway_daily_congestion(x, y, direction):
    fig, ax = plt.subplots(figsize=(15, 5))
    plt.ylim(0,100)
    plt.xlim(-1,73)
    plt.xlabel('Time (x20 minutes)', fontsize=16)
    plt.ylabel('Congestion', fontsize=16)

    sct = plt.scatter(range(72),[0]*72, color=['black']*36 + ['white']*36)

    def update(idx, sct, dayofyears):
        dayofyear = dayofyears[idx]
        df = train[(train.time.dt.dayofyear==dayofyear) & (train.x==x) & (train.y==y) & (train.direction==direction)]

        sct.set_offsets(np.array([df.dailytime_id, df.congestion]).T)

        ax.set_title(f'Roadway {x}_{y}_{direction} on {df.time.dt.day.unique()[0]} {df.time.dt.month_name().unique()[0]}', fontsize=16)

        return sct

    daily_ani = animation.FuncAnimation(fig, update, fargs=(sct, train.time.dt.dayofyear.unique()),
                                   interval=300, frames=train.time.dt.dayofyear.nunique(), blit=False)
    return daily_ani

In [None]:
%%capture
anis =[]
for x in range(3):
    for y in range(4):
        for direction in train.direction.unique():
            if train[(train.x==x) & (train.y==y) & (train.direction==direction)].shape[0]>0:
                ani = roadway_daily_congestion(x, y, direction)
                anis.append(ani)

In [None]:
HTML(anis[0].to_jshtml())

# <span style='color:#A80808'>Bonnus: simple baseline without ML that outperforms top ML models</span>

It is said that: "Don't jump too soon into the water!!!"

In [None]:
train = pd.read_csv('../input/tabular-playground-series-mar-2022/train.csv')
train.time = pd.to_datetime(train.time)
train['daytime_id'] = ( ( train.time.dt.hour*60 + train.time.dt.minute ) /20 ).astype(int)
train = train.set_index('row_id', drop=True)
train['roadway'] = train.x.astype('str') +'_'+ train.y.astype('str') +'_'+ train.direction.astype('str')

test = pd.read_csv('../input/tabular-playground-series-mar-2022/test.csv', index_col='row_id')
test.time = pd.to_datetime(test.time)
test['roadway'] = test.x.astype('str') +'_'+ test.y.astype('str') +'_'+ test.direction.astype('str') 
submission = pd.read_csv('../input/tabular-playground-series-mar-2022/sample_submission.csv')

In [None]:
test['median_'] = -1

for roadway in train.roadway.unique():
    # extract data for each roadway
    df = train[train.roadway==roadway]

    if df.shape[0]>0:                
        test.median_[test.roadway==roadway] = df.groupby(df.daytime_id).congestion.median().tolist()[-36:]

test.median_[(test.roadway=='2_2_SE') & (test.time.dt.hour<15)] = 20
test['median_'] = test['median_'].round().astype(int).tolist()

In [None]:
submission.congestion = test.median_.tolist()
submission.to_csv('submission.csv', index=False)
submission