# JPX Tokio Stock Exchange

## Reading data paths

In [1]:
import numpy as np
import pandas as pd

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


/kaggle/input/jpx-tokyo-stock-exchange-prediction/stock_list.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/example_test_files/sample_submission.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/example_test_files/options.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/example_test_files/financials.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/example_test_files/secondary_stock_prices.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/example_test_files/trades.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/example_test_files/stock_prices.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/jpx_tokyo_market_prediction/competition.cpython-37m-x86_64-linux-gnu.so
/kaggle/input/jpx-tokyo-stock-exchange-prediction/jpx_tokyo_market_prediction/__init__.py
/kaggle/input/jpx-tokyo-stock-exchange-prediction/data_specifications/stock_fin_spec.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/data_specifications/trades_spec.csv
/kaggle/input/jpx-tokyo-stock-

## Reading the Sample Submission file for clarity

**Observations - Features :** 
- Data
- SecuritiesCode
- Rank

In [2]:
df_example = pd.read_csv("/kaggle/input/jpx-tokyo-stock-exchange-prediction/example_test_files/sample_submission.csv")
df_example

Unnamed: 0,Date,SecuritiesCode,Rank
0,2021-12-06,1301,0
1,2021-12-06,1332,1
2,2021-12-06,1333,2
3,2021-12-06,1375,3
4,2021-12-06,1376,4
...,...,...,...
111995,2022-02-28,9990,1995
111996,2022-02-28,9991,1996
111997,2022-02-28,9993,1997
111998,2022-02-28,9994,1998


In [3]:
df_example.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112000 entries, 0 to 111999
Data columns (total 3 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   Date            112000 non-null  object
 1   SecuritiesCode  112000 non-null  int64 
 2   Rank            112000 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 2.6+ MB


## Time-Series API test

iter_test method creates a generator.

Generated tuples Are equivalent of the data samples

In [4]:
import jpx_tokyo_market_prediction
env = jpx_tokyo_market_prediction.make_env()   # initialize the environment
iter_test = env.iter_test()    # an iterator which loops over the test files


# for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
#    sample_prediction_df['Rank'] = np.arange(len(sample_prediction))  # make your predictions here
#    env.predict(df_example["Rank"])   # register your predictions

In [5]:
a = next(iter_test)

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.


In [6]:
a[0]

Unnamed: 0,Date,RowId,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag
0,2021-12-06,20211206_1301,1301,2982.0,2982.0,2965.0,2971.0,8900,1.0,,False
1,2021-12-06,20211206_1332,1332,592.0,599.0,588.0,589.0,1360800,1.0,,False
2,2021-12-06,20211206_1333,1333,2368.0,2388.0,2360.0,2377.0,125900,1.0,,False
3,2021-12-06,20211206_1375,1375,1230.0,1239.0,1224.0,1224.0,81100,1.0,,False
4,2021-12-06,20211206_1376,1376,1339.0,1372.0,1339.0,1351.0,6200,1.0,,False
...,...,...,...,...,...,...,...,...,...,...,...
1995,2021-12-06,20211206_9990,9990,528.0,531.0,516.0,517.0,65300,1.0,,False
1996,2021-12-06,20211206_9991,9991,796.0,800.0,785.0,785.0,29100,1.0,,False
1997,2021-12-06,20211206_9993,9993,1645.0,1653.0,1627.0,1627.0,6200,1.0,,False
1998,2021-12-06,20211206_9994,9994,2394.0,2433.0,2393.0,2418.0,7800,1.0,,False


In [7]:
env.predict(pd.DataFrame(a[5]["Rank"]))

## Data observations

### Stock prices (training set)

- Stocks are identified by their SecuritiesCode
- Target is to be used for prediction for a given date -> (target(t+2) - target(t+1))



In [8]:
df_stock_prices = pd.read_csv("/kaggle/input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv")
# df_stock_prices[df_stock_prices['SecuritiesCode'] == 1301]
df_stock_prices.tail(-20)

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target
20,20170104_1719,2017-01-04,1719,783.0,797.0,779.0,795.0,2039100,1.0,,False,-0.005051
21,20170104_1720,2017-01-04,1720,937.0,952.0,937.0,950.0,634400,1.0,,False,0.005219
22,20170104_1721,2017-01-04,1721,2154.0,2198.0,2147.0,2187.0,510500,1.0,,False,0.012582
23,20170104_1723,2017-01-04,1723,2033.0,2120.0,2021.0,2088.0,48000,1.0,,False,-0.006226
24,20170104_1726,2017-01-04,1726,254.0,261.0,251.0,261.0,539500,1.0,,False,0.011194
...,...,...,...,...,...,...,...,...,...,...,...,...
2332526,20211203_9990,2021-12-03,9990,514.0,528.0,513.0,528.0,44200,1.0,,False,0.034816
2332527,20211203_9991,2021-12-03,9991,782.0,794.0,782.0,794.0,35900,1.0,,False,0.025478
2332528,20211203_9993,2021-12-03,9993,1690.0,1690.0,1645.0,1645.0,7200,1.0,,False,-0.004302
2332529,20211203_9994,2021-12-03,9994,2388.0,2396.0,2380.0,2389.0,6500,1.0,,False,0.009098
