# Extended dataset

yfinance is a python library used to query stocks information from yahoo finance. 

In [None]:
pip install yfinance

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from tqdm import tqdm
import matplotlib.pyplot as plt

In [None]:
df1 = pd.read_csv('../input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv')
df2 = pd.read_csv('../input/jpx-tokyo-stock-exchange-prediction/supplemental_files/stock_prices.csv')

df = pd.concat([df1,df2])
df['Date'] = pd.to_datetime(df.Date)
codes = df.SecuritiesCode.unique()

In [None]:
import time

all_hist = []
for tick in tqdm(codes):
    msft = yf.Ticker(f"{tick}.T")
    hist = msft.history(start = "2013-01-01", end='2022-05-01',back_adjust=True,auto_adjust=False).reset_index().astype(str)
    hist['SecuritiesCode'] = tick
    hist['RowId'] = hist['Date'].apply(lambda x:''.join(x.split('-'))+'_'+str(tick))
    for col in ['Open','High','Low','Close','Volume']:
        hist[col] = pd.to_numeric(hist[col], errors='coerce')
    hist['Target'] = (hist['Close'].shift(-2) - hist['Close'].shift(-1))/hist['Close'].shift(-1)
    hist = hist.rename(columns = {'Dividends':'ExpectedDividend'})
    hist['ExpectedDividend'] = hist['ExpectedDividend'].apply(lambda x: x if x!= 0 else np.nan)
    hist['SupervisionFlag'] = np.nan
    hist['AdjustmentFactor'] = np.nan
    hist = hist[df1.columns]
    all_hist.append(hist)
    time.sleep(2)

In [None]:
new_ts = pd.concat(all_hist)
for code in codes:
    
    new_sub = new_ts[new_ts.SecuritiesCode==code].set_index('Date')
    new_sub.index = pd.to_datetime(new_sub.index)
    old_sub = df[df.SecuritiesCode == code].set_index('Date')

    plt.figure(figsize = (12,4))
    plt.title(code)
    plt.plot(new_sub['Close'], label='Close yfinance')
    plt.plot(old_sub['Close'], label='Close JPX' )
    plt.legend()
    plt.show()

In [None]:
new_ts.to_csv('jpx_extended_stocks.csv', index=False)