# Stock Market Analysis

### Problem Link : https://www.kaggle.com/daiearth22/uniqlo-fastretailing-stock-price-prediction/data

#### Necessary Import Statements

In [0]:
import pandas as pd
import numpy as np
%matplotlib inline
from matplotlib import pyplot as plt
from pandas import Series
from pandas import DataFrame
from pandas import concat

# Train test split
from sklearn.model_selection import TimeSeriesSplit


# Models
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestRegressor

# Model evaluators
from sklearn.metrics import f1_score,confusion_matrix

### Reading the Train Data

In [0]:
!wget https://www.dropbox.com/s/22nfk89c8hshxhs/Train.csv

In [0]:
train = pd.read_csv("Train.csv")
train.info()

In [0]:
train.head()

In [0]:
train['Avg'] = train['Stock Trading'] / train['Volume']

In [0]:
train.head()

In [0]:
train.describe()

In [0]:
train.head()

In [0]:
train.tail()

In [0]:
train['Open'].plot(color='k')

In [0]:
 train.head()

Seperating the DateTime field to data, month and year into different columns.

In [0]:
train['Year'] = pd.DatetimeIndex(train['Date']).year
train['Month'] = pd.DatetimeIndex(train['Date']).month
train['Day'] = pd.DatetimeIndex(train['Date']).day

In [0]:
train.head()

Function for dropping of unnecesary features or redundant features

In [0]:
def drop_features(features,data):
    data.drop(features,inplace=True,axis=1)

In [0]:
drop_features(['Date'],train)

### Applying the TimeSeries Feature Engineering
#### Lag Features

In [0]:
train.info()

In [0]:
open_vals = Series(train['Open'])

In [0]:
open_vals.head()

In [0]:
open_vals.shift(-1).head()

In [0]:
open_vals.shift(-1).tail()

In [0]:
#Applying Lag Features to Opening Price
open1 = Series(train['Open'])
open_dataframe = concat([open1.shift(-3), open1.shift(-2), open1.shift(-1)], axis=1)
open_dataframe.columns = ['O_t-3', 'O_t-2', 'O_t-1']
open_dataframe.shape

In [0]:
open_dataframe.head()

In [0]:
#combining the train data and the lag features of Temp
train1 = pd.concat([train, open_dataframe], axis=1)

In [0]:
train1.head()

In [0]:
train1.tail()

In [0]:
#Applying Lag Features to Opening Price
close_vals = Series(train['Close'])
close_dataframe = concat([close_vals.shift(-3), close_vals.shift(-2), close_vals.shift(-1)], axis=1)
close_dataframe.columns = ['C_t-3', 'C_t-2', 'C_t-1']
close_dataframe.shape

In [0]:
#combining the train data and the lag features of Temp
train1 = pd.concat([train1, close_dataframe], axis=1)

In [0]:
train1.head()

In [0]:
train1.tail()

In [0]:
#dropping null value rows
train1.dropna(inplace = True)

In [0]:
train1.tail()

In [0]:
train1.shape

In [0]:
#since for combining of data, both should have same number of rows. hence, removing the extra row
#train1.drop(train1.head(1).index, inplace=True)
#train1.shape

#### Rolling Window Statistics

In [0]:
# Appling window Features for the Highest Value Feature
high = train['High']
w = 5

In [0]:
shift = high.shift(-w+1)

In [0]:
train['High'].head(8)

In [0]:
shift.head(8)

In [0]:
shift.head()

In [0]:
window = shift.rolling(window=w)

In [0]:
type(window)

In [0]:
window.min().head()

In [0]:
window.min()

In [0]:
window.min().tail(50)

In [0]:
df = concat([window.min(), window.mean(), window.max()], axis=1)
df.columns = ['min', 'mean', 'max']

In [0]:
df.head(10)

In [0]:
#combining the train1 and the rolling window features
train2 = pd.concat([train1, df], axis=1)

In [0]:
train2.head()

In [0]:
train2.tail()

#### Expanding window Statistics

In [0]:
train2.dropna(inplace=True)

In [0]:
train2.head()

In [0]:
#Applying Expanding window for the Lowest Value
low= train['Low']
window = low.expanding(5)
dfc = concat([window.min(), window.mean(), window.max()], axis=1)
dfc.columns = ['min', 'mean', 'max']

In [0]:
#no null values
dfc.shape

In [0]:
#to make it equal to the train2, dropping forst 4 values
#dfc.drop(dfc.head(4).index, inplace=True)

In [0]:
dfc.shape

In [0]:
#concating the expanding window features to the previous train2.
train_final = pd.concat([train2, dfc], axis=1)

In [0]:
train_final.tail()

In [0]:
#Replacing the null values with -1 if any.
train_final.dropna(inplace = True)

In [0]:
#Complete final Train Data
train_final.shape

In [0]:
train_final.drop('High',axis=1,inplace=True)
train_final.drop('Low',axis=1,inplace=True)
train_final.drop('Open',axis=1,inplace=True)

### Splitting of Data

In [0]:
from sklearn.model_selection import train_test_split

In [0]:
train_final.info()

In [0]:
train_final.drop(['Avg'],axis=1,inplace=True)


In [0]:
train_final.info()

In [0]:

X_train,X_test,y_train,y_test = train_test_split(train_final.drop('Close',axis=1)
                                                 ,train_final['Close'],random_state=42)

In [0]:
X_train.head()

### Applying Regressor Model

In [0]:
model=RandomForestRegressor()


In [0]:
model.fit(X_train,y_train)

In [0]:
predict=model.predict(X_test)

In [0]:
model.feature_importances_

In [0]:
imp_list = list(model.feature_importances_)

In [0]:
col_lis = list(X_test.columns)

In [0]:
feature_importances = {i[0]:"{0:.4f}".format(i[1]) for i in list(zip(col_lis,imp_list))}

In [0]:
feature_importances

#### Measuring the Score. (Evaluation Metrics)

In [0]:
from sklearn import metrics
print('MAE:', metrics.mean_absolute_error(y_test, predict))
print('MSE:', metrics.mean_squared_error(y_test, predict))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predict)))

In [0]:
from sklearn.cross_validation import cross_val_score

print(cross_val_score(model, X_test, y_test,cv=5))