In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import jpx_tokyo_market_prediction
import matplotlib.pyplot as plt
import seaborn as sns

COMP_DIR = "../input/jpx-tokyo-stock-exchange-prediction"

# Adjustment Factors

Included in the `stock_prices.csv` data is an adjustment factor column carrying information about changes in *theoretical* price/volume due to stock splits. From the specs:

In [None]:
pd.read_csv(f'{COMP_DIR}/data_specifications/stock_price_spec.csv').query("Column=='AdjustmentFactor'").iloc[0,-1]

It is this *theoretical* price/volume that we'll want to use for prediction, so let's investigate how to use the adjustment factors. We'll start by loading the data, looking at the distribution, and isolating some individual cases in the EDA section.

## EDA

In [None]:
indx_cols = ['SecuritiesCode','Date']
prices = pd.read_csv(f'{COMP_DIR}/train_files/stock_prices.csv',parse_dates=['Date'])#,index_col=indx_cols)
prices = prices.set_index(indx_cols).sort_values(indx_cols).drop(columns='RowId')
prices

First, note that the adjustment factor is almost always 1:

In [None]:
prices.query('AdjustmentFactor==1').shape[0]/prices.shape[0]

This is what the distribution of all the other values looks like. You can see most correspond to stock splits (adjustment factor < 1), yet a sizeable minority correspond to reverse stock splits (adjustment factor > 1):

* **stock-splits**: each existing stock share is converted into multiple shares at a fixed ratio. This diminishes the value of each share and increases the trading volume proportionally. For example, a 2-for-1 split corresponds to an adjustment factor of $1/2=0.5$ 

* **reverse stock-splits**: shareholders give up a part of their existing shares in proportion to their ownership. This leaves fewer shares outstanding, increasing the price of each share and decreasing the trading volume.

In [None]:
prices.query('AdjustmentFactor!=1')['AdjustmentFactor'].hist(bins=50)

### Stock Split Example

In [None]:
def show_adjustment(stock=None,af='AdjustmentFactor',df=prices):
    fig, axes = plt.subplots(1,2,figsize=(20,5))
    for ax, col in zip(axes,['Close','Volume']):
        df.xs(stock).plot(y=col,ax=ax,legend=False)
        ax2 = ax.twinx()
        df.xs(stock).plot(y=af,ax=ax2,linestyle='--',color='red',legend=False)
        ax.set_ylabel(col)
        ax2.set_ylabel(af)
        ax.set_title(f'Security Code {stock}')

In [None]:
smallest_af = prices['AdjustmentFactor'].idxmin()[0]
show_adjustment(smallest_af)

### Reverse Stock-Split Example

In [None]:
largest_af = prices['AdjustmentFactor'].idxmax()[0]
show_adjustment(largest_af)

## Converting Historical Price to Theoretical Price

We can get a cumulative adjustment factor to adjust all historical prices by taking the cumulative product of the given adjustment factor, then scaling the prices and volumes accordingly. Compare the scaled prices/volumes for the following security with the original ones above.

In [None]:
f = lambda df: df['AdjustmentFactor'][::-1].cumprod()[::-1]
prices['CumAdjustmentFactor'] = prices.groupby('SecuritiesCode').apply(f).values
adjusted_prices = prices.copy()
for price in ['Open','High','Low','Close']:
    adjusted_prices[price] *= adjusted_prices['CumAdjustmentFactor']
adjusted_prices['Volume'] /= adjusted_prices['CumAdjustmentFactor']

In [None]:
largest_af = prices['AdjustmentFactor'].idxmax()[0]
show_adjustment(largest_af,'CumAdjustmentFactor',adjusted_prices)

#### Example w/ many stock splits

In [None]:
many_splits = prices.query('AdjustmentFactor!=1')['AdjustmentFactor'].groupby('SecuritiesCode').count().idxmax()
show_adjustment(many_splits,'AdjustmentFactor',prices)

In [None]:
show_adjustment(many_splits,'CumAdjustmentFactor',adjusted_prices)