In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns

from plotly.offline import init_notebook_mode, iplot, plot
import plotly.express as px
import plotly as py
init_notebook_mode(connected=True)
import plotly.graph_objs as go

# Set Matplotlib defaults
plt.style.use("seaborn-whitegrid")
plt.rc(
    "figure",
    autolayout=True,
    figsize=(12, 6),
    titlesize=18,
    titleweight='bold',
)
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=16,
    titlepad=10,
)
plot_params = dict(
    color="0.75",
    style=".-",
    markeredgecolor="0.25",
    markerfacecolor="0.25",
)

from warnings import simplefilter
simplefilter("ignore")

from sklearn.preprocessing import LabelEncoder

# Goal
it seems to me that plotly is used undeservedly rarely, while interactive charts have a number of advantages
* charts are zoomable
* traces to view can be selected
* hover data gives additional information
* animation feature is very usefull
* easy to implement
* and finally, it's beautiful!

Here I want to show some examples of using **plotly** in this competition notebooks. 

### Load data and add some features for visualization

In [None]:
df_train = pd.read_csv('../input/tabular-playground-series-mar-2022/train.csv', index_col="row_id", parse_dates=['time'])

df_train['roadway'] = df_train['x'].astype(str) + df_train['y'].astype(str) + df_train['direction']
df_train['location'] = df_train['x'].astype(str) + df_train['y'].astype(str)
le = LabelEncoder()
df_train['road'] = le.fit_transform(df_train['roadway'])

def add_datetime_features(df):
    df['day']     = df['time'].dt.day
    df['weekday'] = df['time'].dt.weekday
    df['hour']    = df['time'].dt.hour
    df['minute']  = df['time'].dt.minute
    
add_datetime_features(df_train)

mins = pd.DataFrame(df_train.groupby(['road', 'weekday', 'hour', 'minute']).congestion.min().astype(int)).reset_index()
mins = mins.rename(columns={'congestion':'min'})
df_train = df_train.merge(mins, on=['road', 'weekday', 'hour', 'minute'], how='left')

maxs = pd.DataFrame(df_train.groupby(['road', 'weekday', 'hour', 'minute']).congestion.max().astype(int)).reset_index()
maxs = maxs.rename(columns={'congestion':'max'})
df_train = df_train.merge(maxs, on=['road', 'weekday', 'hour', 'minute'], how='left')

medians = pd.DataFrame(df_train.groupby(['road', 'weekday', 'hour', 'minute']).congestion.median().astype(int)).reset_index()
medians = medians.rename(columns={'congestion':'median'})
df_train = df_train.merge(medians, on=['road', 'weekday', 'hour', 'minute'], how='left')

day = df_train.copy()
day['time'] = day['time'] + pd.Timedelta(1, unit="d")
day = day.rename(columns={'congestion':'lag1'})[['time', 'road', 'lag1']]
df_train = df_train.merge(day, on=['time', 'road'], how='left')

week = df_train.copy()
week['time'] = week['time'] + pd.Timedelta(7, unit="d")
week = week.rename(columns={'congestion':'lag7'})[['time', 'road', 'lag7']]
df_train = df_train.merge(week, on=['time', 'road'], how='left')

sample_start = pd.to_datetime('1991-09-15 00:00')
sample = df_train[(df_train.time >= sample_start)]
te = LabelEncoder()
sample['tic'] = te.fit_transform(sample['time'])

features = list(df_train.columns[12:])

plot_data = sample[['time', 'roadway', 'congestion']].copy()
plot_data['y'] = 'congestion'
for y in list(features):
    tmp = sample[['time', 'roadway'] + [y]].copy()
    tmp['y'] = y
    tmp = tmp.rename(columns={y:'congestion'})
    plot_data = pd.concat([plot_data, tmp], axis=0)

Let's plot congestion and added features.

In [None]:
fig = px.line(plot_data, x='time', y='congestion', color='y', animation_frame='roadway')
fig.show()

* Too much data on the chart, isn't it? Use legend to show/hide traces, zoom to select area of interest, animation control to scroll between roadways.

In [None]:
sample = sample[sample.time >=  pd.to_datetime('1991-09-28 00:00')]
sorter = ['NB', 'NE', 'EB', 'SE', 'SB', 'SW', 'WB', 'NW']
sorterIndex = dict(zip(sorter, range(len(sorter))))
sample['rank'] = sample['direction'].map(sorterIndex)
sample = sample.sort_values(['tic', 'rank'])

* Each (x,y) location has up to 8 roadway directions, then let's use polar plot to represent locations data:

In [None]:
fig = px.line_polar(sample, 
              r='congestion',
              range_r = [0, 100],
              theta="direction", 
              color='location',
              height= 700,
              markers=True,
              line_close=True,
              title = '12 (x,y) locations with all direction congestion values by "tic" (unique time number)',
              animation_frame="tic")

In [None]:
fig.update_layout(font_size=12)
# show directions in right order
fig.update_layout(
    polar = dict(
      angularaxis = dict(
           categoryorder="array",
            categoryarray=['NB', 'NE', 'EB', 'SE', 'SB', 'SW', 'WB', 'NW'],
            )
    ))
fig.show()

In [MultiOutputRegressor](https://www.kaggle.com/code/martynovandrey/tps-mar-22-multioutput-regressor) I use 65 congestion values of all roadways as one target, let's plot the whole target on one chart with animation by time.

In [None]:
fig = px.line_polar(sample, 
              r='congestion',
              range_r = [0, 100],
              theta="roadway", 
              height= 700,
              markers=True,
              line_close=True,
              title = '65 roadway congestions by "tic" (unique time number)',
              animation_frame="tic")
fig.update_layout(font_size=8)
fig.show()

# Thanks for attention! Happy Kaggling!