This notebook contains an autocorrelation analysis of the target (rate-of-change) for all the securities.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.stattools import acf
from tqdm import tqdm

pd.set_option('display.max_columns', None)

***
## loading data

In [None]:
%%time
stock_prices = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv")
stock_list = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/stock_list.csv")

***
## autocorrelation of target


In [None]:
# take a sample of just a few Securities for visualization
arr = stock_prices.SecuritiesCode.unique()
np.random.shuffle(arr)
arr = arr[:30]
_stock_prices = stock_prices.query("SecuritiesCode in @arr").reset_index(drop=True)

In [None]:
for SecuritiesCode,df in _stock_prices.groupby("SecuritiesCode"):
    df = df[["Date","Target"]].dropna()
    df = df.set_index(['Date'])
    
    fig = plt.figure(figsize=(15,5))
    ax = plt.subplot()
    plot_acf(df, lags=np.arange(1,29), ax=ax, title=f"SecuritiesCode={SecuritiesCode}")
    plt.grid()
    plt.show()


***
## overall autocorrelation

In [None]:
results = list()

for SecuritiesCode,df in tqdm(stock_prices.groupby("SecuritiesCode")):
    df = df[["Date","Target"]].dropna()
    df = df.set_index(['Date'])
    
    out = acf(df, nlags=28)
    results.append(out[1:])

In [None]:
avg_results = np.asarray(results).mean(axis=0)

In [None]:
plt.figure(figsize=(12,8))
plt.plot(np.arange(1,29), avg_results, "o")
plt.axhline(y=0, c="red")
plt.ylim(-0.1, 0.1)
plt.grid()
plt.xlabel("lag")
plt.ylabel("autocorrelation")
plt.show()

***