In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
stocks = pd.read_csv('/kaggle/input/jpx-tokyo-stock-exchange-prediction/stock_list.csv')
stocks.head()

In [None]:
print(stocks.shape)

In [None]:
stocks.info()

In [None]:
stocks.isna().sum()

In [None]:
stock_prices_sup = pd.read_csv('../input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv')


In [None]:
stock_prices_sup.head()

In [None]:
stock_prices_train = pd.read_csv('../input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv')
stock_prices_train.head()

In [None]:
print(f"shape of supplement stock price data is: {stock_prices_sup.shape}")
print(f"shape of train stock price data is: {stock_prices_train.shape}")

In [None]:

#Align the differences on the columns
stock_prices_sup.compare(stock_prices_train, align_axis = 0)

In [None]:
stock_prices_sup.info()

In [None]:
stock_prices_sup.isna().sum()

In [None]:
def cleaning(df):
    df['Date'] = pd.to_datetime(df['Date'])
    df.fillna(0, inplace = True)
    target = df['Target']
    df.drop('Target', axis = 1, inplace = True)
    return df, target

In [None]:
supplemental_stock_prices, target = cleaning(stock_prices_sup)

In [None]:
supplemental_stock_prices.head()

In [None]:
supplemental_stock_prices.info()

In [None]:
supplemental_stock_prices.isna().sum()

In [None]:
target

In [None]:
supplemental_stock_prices['SupervisionFlag'].value_counts()

In [None]:
len(np.unique(supplemental_stock_prices['SecuritiesCode']))

In [None]:
# plt.figure(figsize = (30,24))
fig, (ax1,ax2, ax3) = plt.subplots(nrows=3, figsize = (10,16), sharex=True, subplot_kw=dict(frameon=False)) # frameon=False removes frames

ax1.plot(supplemental_stock_prices['Date'], supplemental_stock_prices['Open'], color = 'r', label = 'Open')
ax2.plot(supplemental_stock_prices['Date'], supplemental_stock_prices['Close'], color= 'g', label = 'close')
ax3.plot(supplemental_stock_prices['Date'], supplemental_stock_prices['Volume'], color = 'b', label = 'Volume')
ax1.legend(loc = 'upper left')
ax2.legend(loc = 'upper left')
ax3.legend(loc = 'upper left')
plt.tight_layout()
plt.show()

In [None]:
supplemental_stock_prices[supplemental_stock_prices['Close'] == max(supplemental_stock_prices['Close'])]

In [None]:
supplemental_stock_prices[supplemental_stock_prices['Open'] == max(supplemental_stock_prices['Open'])]

In [None]:
nine = supplemental_stock_prices[supplemental_stock_prices['SecuritiesCode']==9983]

In [None]:
nine[nine['Date']>='2021-02-26']

In [None]:
supplemental_stock_prices[supplemental_stock_prices['Close'] == min(supplemental_stock_prices['Close'])]

In [None]:
train_stock_price, target = cleaning(stock_prices_train)

In [None]:
def feature_engineering(df):
    df['Date'] = pd.to_datetime(df['Date'])
    df["year"] = df.Date.dt.year
    df["month"] = df.Date.dt.month
    df["day"] = df.Date.dt.day
    df['dayofweek'] = df.Date.dt.dayofweek
    df['hour'] = df.Date.dt.hour
    df.pop('Date')
    df.pop('RowId')
    return df

In [None]:
train_stock_price = feature_engineering(train_stock_price)

In [None]:
train_stock_price.columns

In [None]:
import seaborn as sns
sns.scatterplot(train_stock_price['ExpectedDividend'], train_stock_price['Volume'])

In [None]:
validation_split = 0.1
split_index = int(len(train_stock_price) * (1 - validation_split))
X_train = train_stock_price.iloc[0:split_index]
X_val = train_stock_price.iloc[split_index:]
y_train = target.iloc[0:split_index]
y_val = target.iloc[split_index:]

In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
lgr_pred = model.predict(X_val) 

In [None]:
from sklearn.metrics import mean_absolute_percentage_error
mape = mean_absolute_percentage_error(lgr_pred, y_val)
mape

In [None]:
def cleaning_df(df):
#     df['Date'] = pd.to_datetime(df['Date'])
    df.fillna(0, inplace = True)
    return df

In [None]:
test = pd.read_csv('../input/jpx-tokyo-stock-exchange-prediction/example_test_files/stock_prices.csv')
test.head()

In [None]:
test.shape

In [None]:
submission = pd.DataFrame()
submission.head()

In [None]:
submission['Date'] = test['Date']
submission['SecuritiesCode'] = test['SecuritiesCode']


In [None]:
submission.isna().sum()

In [None]:
test_cleaned = cleaning_df(test)
test_featured = feature_engineering(test_cleaned)
pred = model.predict(test_featured)

In [None]:
pred

In [None]:
prediction = pred.reshape(-1)
ranks = np.argsort(prediction)
len(ranks)

In [None]:
submission['Rank'] = ranks

In [None]:
submission.head()