# Stock Market Analysis

### Problem Link : https://www.kaggle.com/daiearth22/uniqlo-fastretailing-stock-price-prediction/data

#### Necessary Import Statements

In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
from matplotlib import pyplot as plt
from pandas import Series
from pandas import DataFrame
from pandas import concat

# Train test split
from sklearn.model_selection import TimeSeriesSplit


# Models
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

# Model evaluators
from sklearn.metrics import f1_score,confusion_matrix
from sklearn.metrics import classification_report

### Reading the Train Data

In [2]:
train = pd.read_csv("train.csv", parse_dates=[0], index_col=[0])
train.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1226 entries, 2016-12-30 to 2012-01-04
Data columns (total 6 columns):
Open             1226 non-null int64
High             1226 non-null int64
Low              1226 non-null int64
Close            1226 non-null int64
Volume           1226 non-null int64
Stock Trading    1226 non-null int64
dtypes: int64(6)
memory usage: 67.0 KB


In [None]:
train.head()

In [None]:
def drop_features(features,data):
    data.drop(features,inplace=True,axis=1)

In [None]:
from sklearn import metrics
from sklearn.metrics import classification_report

def run_train_test(df, model):
    train_data=df[:1000][:]
    test_data=df[1000:][:]
    
    y_train = train_data['Close']
    drop_features(['Close'],train_data)
    x_train = train_data
    
    #Splitting of test data into x_test and y_test.
    y_test = test_data['Close']
    drop_features(['Close'],test_data)
    x_test = test_data

    model.fit(x_train,y_train)
    predict=model.predict(x_test)
    
    print (model.feature_importances_)
 
    print('MAE:', metrics.mean_absolute_error(y_test, predict))
    print('MSE:', metrics.mean_squared_error(y_test, predict))
    print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predict)))
    
    from sklearn.cross_validation import cross_val_score
    print("Cross Val Score =", cross_val_score(model, x_test, y_test,cv=3))   


In [None]:
train.head()

In [None]:
run_train_test(train, RandomForestRegressor())

In [None]:
train.head()

### Applying the TimeSeries Feature Engineering
#### Lag Features

In [None]:
#Applying Lag Features to Opening Price
train['Open lag 1'] = Series(train['Open']).shift(1)
train['High lag 1'] = Series(train['High']).shift(1)
train['Low lag 1'] = Series(train['Low']).shift(1)
train.head()
drop_features(['Open','High','Low','Volume','Stock Trading'],train)

In [None]:
train.dropna(inplace=True)
train.head()


In [None]:
drop_features(['High lag 1','Low lag 1'],train)

In [None]:
run_train_test(train, RandomForestRegressor())

In [None]:
#combining the train data and the lag features of Temp
train1 = pd.concat([train, dataframe], axis=1)

In [None]:
#dropping temperature feature
drop_features(['Open'],train1)

In [None]:
train1.head()

In [None]:
#dropping null value rows
train1.dropna(inplace = True)

In [None]:
#since for combining of data, both should have same number of rows. hence, removing the extra row
train1.drop(train1.head(1).index, inplace=True)
train1.shape

#### Rolling Window Statistics

In [None]:
# Appling window Features for the Highest Value Feature
high = train['High']
w = 3
shift = high.shift(w-1)
window = shift.rolling(window=w)
df = concat([window.min(), window.mean(), window.max(), high], axis=1)
df.columns = ['min', 'mean', 'max', 'high']

In [None]:
df.head()

In [None]:
#dropping null values
df.dropna(inplace = True)
df.shape

In [None]:
#combining the train1 and the rolling window features
train2 = pd.concat([train1, df], axis=1)

In [None]:
train2.head()

In [None]:
#Dropping the High feature(redundant)
drop_features(['High'],train2)

In [None]:
train2.tail()

#### Expanding window Statistics

In [None]:
#Applying Expanding window for the Lowest Value
low= train['Low']
window = low.expanding()
dfc = concat([window.min(), window.mean(), window.max(), low.shift(-1),low], axis=1)
dfc.columns = ['min', 'mean', 'max', 't+1','low']

In [None]:
#no null values
dfc.shape

In [None]:
#to make it equal to the train2, dropping forst 4 values
dfc.drop(dfc.head(4).index, inplace=True)

In [None]:
dfc.shape

In [None]:
#concating the expanding window features to the previous train2.
train_final = pd.concat([train2, dfc], axis=1)
#Dropping the CO2 feature(redundant)
drop_features(['Low'],train_final)

In [None]:
train_final.tail()

In [None]:
#Replacing the null values with -1 if any.
train_final.fillna(value=-1,inplace = True)

In [None]:
#Complete final Train Data
train_final.shape

### Splitting of Data

In [None]:
test_data=train_final[1000:][:]

In [None]:
#Splitting of train data into x_train and y_train.
y_train = train_data['Close']
drop_features(['Close'],train_data)
x_train = train_data

In [None]:
#Splitting of test data into x_test and y_test.
y_test = test_data['Close']
drop_features(['Close'],test_data)
x_test = test_data

### Applying Regressor Model

In [None]:
model=RandomForestRegressor()

In [None]:
model.fit(x_train,y_train)

In [None]:
predict=model.predict(x_test)

#### Measuring the Score. (Evaluation Metrics)

In [None]:
from sklearn import metrics
print('MAE:', metrics.mean_absolute_error(y_test, predict))
print('MSE:', metrics.mean_squared_error(y_test, predict))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predict)))

In [None]:
from sklearn.cross_validation import cross_val_score

print(cross_val_score(model, x_test, y_test,cv=3))