# In this notebook, I will be showing you the visualization of the WAP and Log Return graphs in the first 10 minutes for the Most & Least Volatility Stock-Time records in the second 10 minutes from the train data.

In [None]:
# import necessary libraries
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import glob

In [None]:
# extract the second 10 minutes data from the train.csv
train = pd.read_csv('../input/optiver-realized-volatility-prediction/train.csv')

In [None]:
# input the number of most & least volatility stock-time records to visualize
num_to_visualize = 10

In [None]:
# save & show the target volatility of the most volatility records
most_volatility = train.nlargest(num_to_visualize, 'target')
print(most_volatility)

In [None]:
# save & show the target volatility of the least volatility records
least_volatility = train.nsmallest(num_to_visualize, 'target')
print(least_volatility)

In [None]:
# extract first 10 minutes book & trade data
train_book = glob.glob('/kaggle/input/optiver-realized-volatility-prediction/book_train.parquet/*')
train_trade = glob.glob('/kaggle/input/optiver-realized-volatility-prediction/trade_train.parquet/*')

In [None]:
# create empty dicts for dataframes for the most and least volatility books
most_v_book_collection = {}
least_v_book_collection = {}

In [None]:
# compute WAP and Log Return and saving them in the dict for Most Volatility books
for index, row in most_volatility.iterrows():
    stock_id = str(int(row['stock_id']))
    time_id = int(row['time_id'])
    book_example = pd.read_parquet('../input/optiver-realized-volatility-prediction/book_train.parquet/stock_id='+stock_id)
    trade_example =  pd.read_parquet('../input/optiver-realized-volatility-prediction/trade_train.parquet/stock_id='+stock_id)

    book_example = book_example[book_example['time_id']==time_id]
    book_example.loc[:,'stock_id'] = stock_id
    trade_example = trade_example[trade_example['time_id']==time_id]
    trade_example.loc[:,'stock_id'] = stock_id

    book_example['wap'] = (book_example['bid_price1'] * book_example['ask_size1'] +
                                    book_example['ask_price1'] * book_example['bid_size1']) / (
                                           book_example['bid_size1']+ book_example['ask_size1'])

    book_example.loc[:,'log_return'] = np.log(book_example['wap']).diff()
    book_example = book_example[~book_example['log_return'].isnull()]
    book_example = book_example.drop(['bid_price1','ask_price1','bid_price2','ask_price2','bid_size1','ask_size1','bid_size2','ask_size2'], axis=1)
    most_v_book_collection['S'+str(int(row['stock_id']))+'-T'+str(int(row['time_id']))] = book_example

In [None]:
# compute WAP and Log Return and saving them in the dict for Least Volatility books
for index, row in least_volatility.iterrows():
    stock_id = str(int(row['stock_id']))
    time_id = int(row['time_id'])
    book_example = pd.read_parquet('../input/optiver-realized-volatility-prediction/book_train.parquet/stock_id='+stock_id)
    trade_example =  pd.read_parquet('../input/optiver-realized-volatility-prediction/trade_train.parquet/stock_id='+stock_id)

    book_example = book_example[book_example['time_id']==time_id]
    book_example.loc[:,'stock_id'] = stock_id
    trade_example = trade_example[trade_example['time_id']==time_id]
    trade_example.loc[:,'stock_id'] = stock_id

    book_example['wap'] = (book_example['bid_price1'] * book_example['ask_size1'] +
                                    book_example['ask_price1'] * book_example['bid_size1']) / (
                                           book_example['bid_size1']+ book_example['ask_size1'])

    book_example.loc[:,'log_return'] = np.log(book_example['wap']).diff() #log_return(book_example['wap'])
    book_example = book_example[~book_example['log_return'].isnull()]
    book_example = book_example.drop(['bid_price1','ask_price1','bid_price2','ask_price2','bid_size1','ask_size1','bid_size2','ask_size2'], axis=1)
    least_v_book_collection['S'+str(int(row['stock_id']))+'-T'+str(int(row['time_id']))] = book_example

In [None]:
# plot out the WAP of the Most and Least Volatility on the same graph for visualization
fig = go.Figure()
for keys in most_v_book_collection:
    fig.add_trace(go.Scatter(x=most_v_book_collection[keys]['seconds_in_bucket'], 
                             y=most_v_book_collection[keys]['wap'], 
                             line=dict(color='red', width=1),
                             connectgaps=True, name = 'Most Volatility '+keys))
for keys in least_v_book_collection:
    fig.add_trace(go.Scatter(x=least_v_book_collection[keys]['seconds_in_bucket'], 
                             y=least_v_book_collection[keys]['wap'], 
                             line=dict(color='royalblue',width=1),
                             connectgaps=True, name = 'Least Volatility '+keys))
fig.update_layout(title='WAP of Most & Least Volatility Stock-Time Records',
                   xaxis_title='Time in Bucket',
                   yaxis_title='WAP')
fig.show()

In [None]:
# plot out the Log Return of the Most and Least Volatility on the same graph for visualization
fig = go.Figure()
for keys in most_v_book_collection:
    fig.add_trace(go.Scatter(x=most_v_book_collection[keys]['seconds_in_bucket'], 
                             y=most_v_book_collection[keys]['log_return'], 
                             line=dict(color='red', width=1),
                             connectgaps=True, name = 'Most Volatility '+keys))
for keys in least_v_book_collection:
    fig.add_trace(go.Scatter(x=least_v_book_collection[keys]['seconds_in_bucket'], 
                             y=least_v_book_collection[keys]['log_return'], 
                             line=dict(color='royalblue', width=1),
                             connectgaps=True, name = 'Least Volatility '+keys))
fig.update_layout(title='Log Return of Most & Least Volatility Stock-Time Records',
                   xaxis_title='Time in Bucket',
                   yaxis_title='Log Return')
fig.show()

# I hope this notebook helps you with getting an idea of volatility with visual assistance.  Best of luck in this competition, Kagglers !