# Data Exploration

In [None]:
import pandas_datareader.data as web
import pandas as pd 
import matplotlib.pyplot as plt
import datetime
%matplotlib inline
from pandas.plotting import scatter_matrix
import seaborn as sns


In [None]:
start=datetime.datetime(2016,1,1)
end=datetime.datetime(2021,1,1)

In [None]:
amazon=web.DataReader("AMZN","yahoo",start,end)
google=web.DataReader("GOOG","yahoo",start,end)
facebook=web.DataReader("fb","yahoo",start,end)

In [None]:
amazon.head()
google.head()
facebook.head()

In [None]:
amazon.isna().sum()

In [None]:
google.isna().sum()

In [None]:
facebook.isna().sum()

In [None]:
facebook.info()
amazon.info()
google.info()

In [None]:
facebook.to_csv("fb_stocks.csv")
amazon.to_csv("amazon_stocks.csv")
google.to_csv("google_stocks.csv")

# Lowest close of all the stocks ?

In [None]:
facebook[facebook['Close']==facebook['Close'].min()]

In [None]:
amazon[amazon['Close']==amazon['Close'].min()]

In [None]:
google[google['Close']==google['Close'].min()]


In [None]:
facebook=facebook.reset_index()
amazon=amazon.reset_index()
google=google.reset_index()
facebook.head()

In [None]:
facebook['month'] = pd.DatetimeIndex(facebook['Date']).month 
google['month'] = pd.DatetimeIndex(google['Date']).month 
amazon['month'] = pd.DatetimeIndex(amazon['Date']).month 


# Amazon Monthly Closing

In [None]:
amazon_monthly_closing=amazon.groupby("month").sum()['Close'].reset_index()
plt.title("Amazon Monthly Closing")
plt.bar(amazon_monthly_closing['month'],amazon_monthly_closing['Close'])
plt.xticks(amazon_monthly_closing['month'])
plt.xlabel("Months")
plt.ylabel("Closing $")
plt.show()

# Facebook Monthly Closing

In [None]:
facebook_monthly_closing=facebook.groupby("month").sum()['Close'].reset_index()
plt.title("Facebook Monthly Closing")
plt.bar(facebook_monthly_closing['month'],facebook_monthly_closing['Close'])
plt.xticks(facebook_monthly_closing['month'])
plt.xlabel("Months")
plt.ylabel("Closing $")
plt.show()

# Google Monthly Closing

In [None]:
google_monthly_closing=google.groupby("month").sum()['Close'].reset_index()
plt.title("Google Monthly Closing")
plt.bar(google_monthly_closing['month'],google_monthly_closing['Close'])
plt.xticks(google_monthly_closing['month'])
plt.xlabel("Months")
plt.ylabel("Closing $")
plt.show()

# Google Open and Closing Shares

In [None]:
google.set_index("Date")
plt.plot(google['Date'],google['Close'],label ="Closing")
plt.plot(google['Date'],google['Open'],label ="Open")
plt.plot(google['Date'],google['High'],label ="High")
plt.plot(google['Date'],google['Low'],label ="Low")

plt.legend()
plt.show()

In [None]:
google['Open'].iloc[1000:1400].plot

In [None]:
plt.figure(figsize=(17,5))
plt.plot(google['Date'],google['Volume'])
plt.ylabel("Stock Price")
plt.grid()
plt.show()

# Facebook Opening and Closing Shares

In [None]:
plt.plot(facebook['Date'],facebook['Close'],label ="Closing")
plt.plot(facebook['Date'],facebook['Open'],label ="Open")
plt.plot(facebook['Date'],facebook['High'],label ="High")
plt.plot(facebook['Date'],facebook['Low'],label ="Low")

plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(17,5))
plt.plot(facebook['Date'],facebook['Volume'])
plt.ylabel("Stock Price")
plt.grid()
plt.show()

# Amazon Opening and Closing Shares

In [None]:
plt.plot(amazon['Date'],amazon['Close'],label ="Closing")
plt.plot(amazon['Date'],amazon['Open'],label ="Open")
plt.plot(amazon['Date'],amazon['High'],label ="High")
plt.plot(amazon['Date'],amazon['Low'],label ="Low")

plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(17,5))
plt.plot(amazon['Date'],amazon['Volume'])
plt.ylabel("Stock Price")
plt.grid()
plt.show()

In [None]:
google['Total Traded']=google['Open']*google['Volume']
facebook['Total Traded']=facebook['Open']*facebook['Volume']
amazon['Total Traded']=amazon['Open']*amazon['Volume']

In [None]:
google.set_index("Date",inplace=True)
facebook.set_index("Date",inplace=True)
amazon.set_index("Date",inplace=True)

# Total Traded

In [None]:
google['Total Traded'].plot(label='google',figsize=(15,7))
amazon['Total Traded'].plot(label='amazon',figsize=(15,7))
facebook['Total Traded'].plot(label='facebook',figsize=(15,7))
plt.legend()
plt.grid()

In [None]:
amazon['Total Traded'].iloc[1000:1300].plot(label="amazon",figsize=(15,7))
facebook['Total Traded'].iloc[1000:1300].plot(label="facebook")
google['Total Traded'].iloc[1000:1300].plot(label="google")
plt.legend()
plt.grid()

### Amazon is the most traded share during 2020, because in during lockdown most of people start their own buisness using Amazon FBA and Amazon PL

## Now Check The Relation by using correlation and scatter matrix 

In [None]:
open_share=pd.concat([google['Open'],amazon['Open'],facebook['Open']],axis=1)
open_share.columns=['Google Open', 'Amazon Open','Facebook Open']

In [None]:
scatter_matrix(open_share,figsize=(8,8),hist_kwds={'bins':50})

## As you see, facebook and google show a possitive good relation between each other

# Correlation of Close stocks

In [None]:
close_share=pd.concat([google['Close'],amazon['Close'],facebook['Close']],axis=1)
close_share.columns=['Google Close', 'Amazon Close','Facebook Close']

In [None]:
sns.pairplot(close_share, kind='reg')

# Daily Percentage Change

In [None]:
## Or check the volatility of any stock 
facebook['return']=(facebook['Close']/facebook['Close'].shift(1))-1
amazon['return']=(amazon['Close']/amazon['Close'].shift(1))-1
google['return']=(google['Close']/google['Close'].shift(1))-1

In [None]:
plt.title("DAily Return")
facebook['return'].plot(label='Facebook Return',figsize=(15,8))
plt.legend()
plt.grid()
plt.show()

In [None]:
plt.title("DAily Return")
google['return'].plot(label='Google Return',figsize=(15,8),color='purple')
plt.legend()
plt.grid()
plt.show()

In [None]:
plt.title("DAily Return")
amazon['return'].plot(label='Amazon Return',figsize=(15,8),color="green")
plt.legend()
plt.grid()
plt.show()

In [None]:
facebook['return'].hist(bins=100, label="facebook",alpha=0.5,figsize=(15,8))
amazon['return'].hist(bins=100, label="amazon",alpha=0.5)
google['return'].hist(bins=100, label="google",alpha=0.5)
plt.legend()

In [None]:
## TO normalize the data you should use KDE = kernal distribution estimation
facebook['return'].plot(kind='kde',label='Facebook',figsize=(12,8))
amazon['return'].plot(kind='kde',label='Amazon',figsize=(12,8))
google['return'].plot(kind='kde',label='google',figsize=(12,8))
plt.legend()
plt.grid()

## Dont be wonder about that spikes of facebook is high in histogram. and here it is down. It shows density and KDE is normalized. So the area of all three stocks are constant 

In [None]:
box=pd.concat([facebook['return'],google['return'],amazon['return']],axis=1)
box.columns=['Facebook Return',"Google Return", "Amazon Return"]
box.head()

In [None]:
scatter_matrix(box,figsize=(8,8),hist_kwds={'bins':50})

# Cumulative Return 

### we use cumulative return for capture the long term investment. if the cumulative return is greater than 1 you gain profit otherwise you lose it.

In [None]:
facebook['Cumulative Return']=(1+facebook['return']).cumprod()
google['Cumulative Return']=(1+google['return']).cumprod()
amazon['Cumulative Return']=(1+amazon['return']).cumprod()

In [None]:
plt.title("Cumulative Return Vs Time")
facebook['Cumulative Return'].plot(label="Facebook",figsize=(15,7))
google['Cumulative Return'].plot(label="Google")
amazon['Cumulative Return'].plot(label="Amazon")
plt.legend()
plt.grid()

# Linear Regression

In [None]:
google.info()
facebook.info()
amazon.info()

In [None]:
print('Google')
google.describe()

In [None]:
print('Facebook')
facebook.describe()

In [None]:
print('Amazon')
amazon.describe()

In [None]:
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

print(__version__)

In [None]:
#libraries for ploting Graph
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import plot
import numpy as np

#for offline plotting
init_notebook_mode(connected=True)

In [None]:
#Resettin g all the dataset index from date to numeric form
amazon=amazon.reset_index()
google=google.reset_index()
facebook=facebook.reset_index()
#Setting the layout of all the company for ploting
amazon_layout=go.Layout(
    title='Stock Price of Amazon',
    xaxis=dict(
        title='Date',
        titlefont=dict(
        family='Courier New, monospace',
        size=18,
        color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
        family='Courier New, monospace',
        size=18,
        color='#7f7f7f'
        )
    )
)
google_layout=go.Layout(
    title='Stock Price of Google',
    xaxis=dict(
        title='Date',
        titlefont=dict(
        family='Courier New, monospace',
        size=18,
        color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
        family='Courier New, monospace',
        size=18,
        color='#7f7f7f'
        )
    )
)
facebook_layout=go.Layout(
    title='Stock Price of Facebook',
    xaxis=dict(
        title='Date',
        titlefont=dict(
        family='Courier New, monospace',
        size=18,
        color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Price',
        titlefont=dict(
        family='Courier New, monospace',
        size=18,
        color='#7f7f7f'
        )
    )
)


In [None]:
#ploting amazon data using the layout
amazon_data=[{'x':amazon['Date'],'y':amazon['Close']}]
amazon_plot=go.Figure(data=amazon_data,layout=amazon_layout)
iplot(amazon_plot)

In [None]:
#ploting google data using the layout
google_data=[{'x':google['Date'],'y':google['Close']}]
google_plot=go.Figure(data=google_data,layout=google_layout)
iplot(google_plot)

In [None]:
#ploting facebook data using the layout
facebook_data=[{'x':facebook['Date'],'y':facebook['Close']}]
facebook_plot=go.Figure(data=facebook_data,layout=facebook_layout)
iplot(facebook_plot)

In [None]:
#Building the regression model
from sklearn.model_selection import train_test_split
#for prepocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

#For model evaluation
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score  

In [None]:
#Splitting the dataset into train and test sets for linear regression
#for amazon
amazon_X =np.array(amazon.index).reshape(-1,1)
amazon_Y =amazon['Close']
#for training we are taking 80%(0.8) of the whole dataset and assigning a random state of 101
amazon_X_train,amazon_X_test,amazon_Y_train,amazon_Y_test=train_test_split(amazon_X,amazon_Y,test_size=0.8,random_state=101)

In [None]:
#Splitting the dataset into train and test sets for linear regression
#for google
google_X =np.array(google.index).reshape(-1,1)
google_Y =google['Close']
#for training we are taking 80%(0.8) of the whole dataset and assigning a random state of 101
google_X_train,google_X_test,google_Y_train,google_Y_test=train_test_split(google_X,google_Y,test_size=0.8,random_state=101)

In [None]:
#Splitting the dataset into train and test sets for linear regression
#for facebook
facebook_X =np.array(facebook.index).reshape(-1,1)
facebook_Y =facebook['Close']
#for training we are taking 80%(0.8) of the whole dataset and assigning a random state of 101
facebook_X_train,facebook_X_test,facebook_Y_train,facebook_Y_test=train_test_split(facebook_X,facebook_Y,test_size=0.8,random_state=101)

In [None]:
#feature scaling 
amazon_scaler=StandardScaler().fit(amazon_X_train)
google_scaler=StandardScaler().fit(google_X_train)
facebook_scaler=StandardScaler().fit(facebook_X_train)
#standardize scaler is mandatory for any machine learning 
#they tend to behave badly if the dataset are not distributed properly


In [None]:
#importing linear Regression library to perform modeling
from sklearn.linear_model import LinearRegression

In [None]:
#create a linear model
#provide x and y train value for the linear regression model
amazon_lm = LinearRegression()
amazon_lm.fit(amazon_X_train,amazon_Y_train)

google_lm = LinearRegression()
google_lm.fit(google_X_train,google_Y_train)

facebook_lm = LinearRegression()
facebook_lm.fit(facebook_X_train,facebook_Y_train)

In [None]:
#plot actual and predicted values for train dataset using scatter plot
#actual values
amazon_trace0 = go.Scatter(
    x=amazon_X_train.T[0],
    y=amazon_Y_train,
    mode='markers',
    name='Actual'
)
google_trace0 = go.Scatter(
    x=google_X_train.T[0],
    y=google_Y_train,
    mode='markers',
    name='Actual'
)
facebook_trace0 = go.Scatter(
    x=facebook_X_train.T[0],
    y=facebook_Y_train,
    mode='markers',
    name='Actual'
)
#predicted Values
amazon_trace1 = go.Scatter(
    x=amazon_X_train.T[0],
    y=amazon_lm.predict(amazon_X_train).T,
    mode='lines',
    name='Predicted'
)
google_trace1 = go.Scatter(
    x=google_X_train.T[0],
    y=google_lm.predict(google_X_train).T,
    mode='lines',
    name='Predicted'
)
facebook_trace1 = go.Scatter(
    x=facebook_X_train.T[0],
    y=facebook_lm.predict(facebook_X_train).T,
    mode='lines',
    name='Predicted'
)
#combining the actual and predicted value in a variable
#for amazon
amazon_data=[amazon_trace0,amazon_trace1]
amazon_layout.xaxis.title.text='Day'
amazon_plot2=go.Figure(data=amazon_data,layout=amazon_layout)
#for google
google_data=[google_trace0,google_trace1]
google_layout.xaxis.title.text='Day'
google_plot2=go.Figure(data=google_data,layout=google_layout)
#for facebook
facebook_data=[facebook_trace0,facebook_trace1]
facebook_layout.xaxis.title.text='Day'
facebook_plot2=go.Figure(data=facebook_data,layout=facebook_layout)

In [None]:
iplot(amazon_plot2)
iplot(google_plot2)
iplot(facebook_plot2)

In [None]:
#Calculate scores for the model evaluation
# we will find the error using r2 error and the mean squar error on our trained model
print(' Amazon Predicted scores'.center(50))
amazon_scores=f'''
{'Metric'.ljust(10)} {'Train'.center(20)} {'Test'.center(20)}
{'r2_score'.ljust(10)} {r2_score(amazon_Y_train,amazon_lm.predict(amazon_X_train))} \t {r2_score(amazon_Y_test,amazon_lm.predict(amazon_X_test))}
{'MSE'.ljust(10)} {mse(amazon_Y_train,amazon_lm.predict(amazon_X_train))} \t {mse(amazon_Y_test,amazon_lm.predict(amazon_X_test))}
'''
print(amazon_scores)

print(' Google Predicted scores'.center(50))
google_scores=f'''
{'Metric'.ljust(10)} {'Train'.center(20)} {'Test'.center(20)}
{'r2_score'.ljust(10)} {r2_score(google_Y_train,google_lm.predict(google_X_train))} \t {r2_score(google_Y_test,google_lm.predict(google_X_test))}
{'MSE'.ljust(10)} {mse(google_Y_train,google_lm.predict(google_X_train))} \t {mse(google_Y_test,google_lm.predict(google_X_test))}
'''
print(google_scores)

print(' Facebook Predicted scores'.center(50))
facebook_scores=f'''
{'Metric'.ljust(10)} {'Train'.center(20)} {'Test'.center(20)}
{'r2_score'.ljust(10)} {r2_score(facebook_Y_train,facebook_lm.predict(facebook_X_train))} \t {r2_score(facebook_Y_test,facebook_lm.predict(facebook_X_test))}
{'MSE'.ljust(10)} {mse(facebook_Y_train,facebook_lm.predict(facebook_X_train))} \t {mse(facebook_Y_test,facebook_lm.predict(facebook_X_test))}
'''
print(facebook_scores)

# Using LSTM For prediction

In [None]:
gogle_close=google.reset_index()['Close']
amzn_close=amazon.reset_index()['Close']
facebook_close=facebook.reset_index()['Close']

## Google Closing Stocks


In [None]:
gogle_close.isna().sum()

In [None]:
gogle_closing=MinMaxScaler(feature_range=(0,1))
gogle_close=gogle_closing.fit_transform(np.array(gogle_close).reshape(-1,1))

In [None]:
gogle_close # you see the differnce before and after transformation. You can see that the values are now 
# in between 0 and 1. It is very necessary step to normalize your data to applying any model.

In [None]:
gogle_training_size=int(len(gogle_close)*0.65) # if you take 65% of data into training and remaining 35% into testing
gogle_test_size=len(gogle_close)-gogle_training_size
gogle_train_data,gogle_test_data=gogle_close[0:gogle_training_size,:],gogle_close[gogle_training_size:len(gogle_close),:1]

In [None]:
def create_dataset(dataset,time_step=1):
    X_data_gogle,Y_data_gogle=[],[]
    for i in range(len(dataset)-time_step-1):
        a= dataset[i:(i+time_step),0]
        X_data_gogle.append(a)
        Y_data_gogle.append(dataset[i+time_step,0])
    return np.array(X_data_gogle),np.array(Y_data_gogle)    

In [None]:
time_step=100
gogle_X_train,gogle_Y_train=create_dataset(gogle_train_data,time_step)
gogle_X_test,gogle_Y_test=create_dataset(gogle_test_data,time_step)

In [None]:
print(gogle_X_test.shape)
print(gogle_X_train.shape)

In [None]:
# Now reshape your data into 3 dimensional because it is neccessary to make your data 3 dimensional
gogle_X_train=gogle_X_train.reshape(gogle_X_train.shape[0],gogle_X_train.shape[1],1)
gogle_X_test=gogle_X_test.reshape(gogle_X_test.shape[0],gogle_X_test.shape[1],1)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM

In [None]:
model=Sequential()
model.add(LSTM(50,return_sequences=True,input_shape=(100,1)))
model.add(LSTM(50,return_sequences=True))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(loss="mean_squared_error",optimizer='adam')


In [None]:
model.fit(gogle_X_train,gogle_Y_train,validation_data=(gogle_X_test,gogle_Y_test),epochs=100,batch_size=64,verbose=1)

In [None]:
import tensorflow as tf

In [None]:
gogle_train_predict=model.predict(gogle_X_train)
gogle_test_predict=model.predict(gogle_X_test)

In [None]:
gogle_train_predict=gogle_closing.inverse_transform(gogle_train_predict)
gogle_test_predict=gogle_closing.inverse_transform(gogle_test_predict)


# Train Predict

In [None]:
import math

math.sqrt(mse(gogle_Y_train,gogle_train_predict))

# Test Predict

In [None]:
math.sqrt(mse(gogle_Y_test,gogle_test_predict))

In [None]:
look_back=100
trainPredictPlot=np.empty_like(gogle_close)
trainPredictPlot[:, :]=np.nan
trainPredictPlot[look_back:len(gogle_train_predict)+look_back,:]=gogle_train_predict
testPredictPlot=np.empty_like(gogle_close)
testPredictPlot[:, :]=np.nan
testPredictPlot[len(gogle_train_predict)+(look_back*2)+1:len(gogle_close)-1,:]=gogle_test_predict
plt.figure(figsize=(15,8))
plt.title("Google Stocks Prediction")
plt.plot(gogle_closing.inverse_transform(gogle_close))
plt.plot(trainPredictPlot,label="Train Predict", )
plt.plot(testPredictPlot,label="Test Predict")
plt.legend()
plt.grid()
plt.show()




# Amazon Stocks Prediction

In [None]:
amzn_closing=MinMaxScaler(feature_range=(0,1))
amzn_close=amzn_closing.fit_transform(np.array(amzn_close).reshape(-1,1))

In [None]:
amzn_training_size=int(len(amzn_close)*0.65) # if you take 65% of data into training and remaining 35% into testing
amzn_test_size=len(amzn_close)-amzn_training_size
amzn_train_data,amzn_test_data=amzn_close[0:amzn_training_size,:],amzn_close[amzn_training_size:len(amzn_close),:1]

In [None]:
def amzn_create_dataset(dataset,time_step=1):
    X_data_amzn,Y_data_amzn=[],[]
    for i in range(len(dataset)-time_step-1):
        a= dataset[i:(i+time_step),0]
        X_data_amzn.append(a)
        Y_data_amzn.append(dataset[i+time_step,0])
    return np.array(X_data_amzn),np.array(Y_data_amzn)    

In [None]:
time_step=100
amzn_X_train,amzn_Y_train=amzn_create_dataset(amzn_train_data,time_step)
amzn_X_test,amzn_Y_test=amzn_create_dataset(amzn_test_data,time_step)

In [None]:
amzn_X_train=amzn_X_train.reshape(amzn_X_train.shape[0],amzn_X_train.shape[1],1)
amzn_X_test=amzn_X_test.reshape(amzn_X_test.shape[0],amzn_X_test.shape[1],1)

In [None]:
model.fit(amzn_X_train,amzn_Y_train,validation_data=(amzn_X_test,amzn_Y_test),epochs=100,batch_size=64,verbose=1)

In [None]:
amzn_train_predict=model.predict(amzn_X_train)
amzn_test_predict=model.predict(amzn_X_test)


In [None]:
amzn_train_predict=amzn_closing.inverse_transform(amzn_train_predict)
amzn_test_predict=amzn_closing.inverse_transform(amzn_test_predict)


In [None]:
import math

math.sqrt(mse(amzn_Y_train,amzn_train_predict))

In [None]:
math.sqrt(mse(amzn_Y_test,amzn_test_predict))

In [None]:
look_back=100
trainPredictPlot=np.empty_like(amzn_close)
trainPredictPlot[:, :]=np.nan
trainPredictPlot[look_back:len(amzn_train_predict)+look_back,:]=amzn_train_predict
testPredictPlot=np.empty_like(amzn_close)
testPredictPlot[:, :]=np.nan
testPredictPlot[len(amzn_train_predict)+(look_back*2)+1:len(amzn_close)-1,:]=amzn_test_predict
plt.figure(figsize=(15,8))
plt.title("Amazon Stocks Prediction")
plt.plot(amzn_closing.inverse_transform(amzn_close))
plt.plot(trainPredictPlot,label="Train Predict", )
plt.plot(testPredictPlot,label="Test Predict")
plt.legend()
plt.grid()
plt.show()


