In [None]:
#Importing Relevant Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

#Loading the Data

apple_data = pd.read_csv('C:\\Users\\PECU\\Documents\\aapl_raw_data.csv')
apple_data

#dealing with Missing values

apple_data['date'] = apple_data['date'].dropna()
apple_data['open'] = apple_data['open'].dropna()
apple_data['high'] = apple_data['high'].dropna()
apple_data['low'] = apple_data['low'].dropna()
apple_data['close'] = apple_data['close'].dropna()
apple_data['volume'] = apple_data['volume'].dropna()
apple_data['adjusted_close'] = apple_data['adjusted_close'].dropna()
apple_data['change_percent'] = apple_data['change_percent'].dropna()
apple_data['avg_vol_20d'] = apple_data['avg_vol_20d'].dropna()


#shape of data
print(apple_data.shape)

#type of data
type(apple_data)

#description of data
apple_data.describe(include = 'all')


#Filling Missing Values
apple_data['date'] = apple_data['date'].fillna(0)
apple_data['open'] = apple_data['open'].fillna(0)
apple_data['high'] = apple_data['high'].fillna(0)
apple_data['low'] = apple_data['low'].fillna(0)
apple_data['close'] = apple_data['close'].fillna(0)
apple_data['volume'] = apple_data['volume'].fillna(0)
apple_data['adjusted_close'] = apple_data['adjusted_close'].fillna(0)
apple_data['change_percent'] = apple_data['change_percent'].fillna(0)
apple_data['avg_vol_20d'] = apple_data['avg_vol_20d'].fillna(0)


#first 60 heads of data
apple_data.head(60)

#keys of data
apple_data.columns

#New data after dropping irrelevant columns
new_apple_data = apple_data.drop(['change_percent', 'avg_vol_20d'], axis = 1)
new_apple_data
new_apple_data.describe()

#new data shape and columns
new_apple_data.shape
new_apple_data.columns


#creating a boxplot to check for outliers
new_apple_data.boxplot(column = ['open', 'high', 'low', 'close', 'volume', 'adjusted_close'])


#Checking for Correlation
new_apple_data[['open', 'high', 'low', 'close', 'volume', 'adjusted_close']].corr()


#Creating a correlation map
cmap = sns.diverging_palette(220,10, as_cmap= True)
sns.heatmap(new_apple_data[['open', 'high', 'low', 'close', 'volume', 'adjusted_close']].corr(), vmax =.3, center = 0, cmap= cmap,
           square = True, linewidths=.5,cbar_kws=({"shrink":.5}))

#Creating Pivot Tables
pd.pivot_table(new_apple_data, index =['open', 'close', 'adjusted_close'])

pd.pivot_table(new_apple_data, index =['high', 'low'])


#new apple data columns and head
new_apple_data.columns
new_apple_data.head()

#Building  Model,
df = new_apple_data
new_apple_data_model = df[['volume','date', 'open', 'high', 'low', 'close', 'adjusted_close']]

#getting dummies and new shape nd columns
new_apple_datadum = pd.get_dummies(new_apple_data)
new_apple_datadum

new_apple_datadum.shape()
new_apple_datadum.columns

#dropping irrelevant columns
new_apple_datadumm = new_apple_datadum.drop(new_apple_datadum.iloc[:,:10768] , axis = 1)
new_apple_datadumm

#new headers
dict = {'date_2023-08-22' :'date',
        'date_2023-08-23' :'open',
        'date_2023-08-24' :'high',
        'date_2023-08-25' : 'low',
        'date_2023-08-28' : 'close',
        'date_2023-08-29' : 'volume',
        'date_2023-08-30' :  'adjusted_close',}

new_apple_datadumm.rename(columns = dict, inplace=True)

#description and shape
new_apple_datadumm.describe()
new_apple_datadumm.shape

#Train Test Split


from sklearn.model_selection import train_test_split

X = new_apple_datadumm.drop([('volume')], axis = 1)

y = new_apple_datadumm[('volume')].values.reshape(-1,1)
                        
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size =0.2, random_state = 0)

#Multiple Linear Regression

import statsmodels.api as sm
X_sm = X = sm.add_constant(X)
model = sm.OLS(y,X_sm)
model.fit()

#summary
model.fit().summary()


#LinearRegression

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score


reg = LinearRegression()
reg.fit(X_train, y_train)

cross_val_score(reg,X_train,y_train, scoring = 'neg_mean_absolute_error', cv = 3)

#getting mean absolute error
np.mean(cross_val_score(reg,X_train,y_train, scoring = 'neg_mean_absolute_error', cv = 3))

#reg coefficient
reg.coef_

#reg intercept
reg.intercept_

#Lasso Regression
from sklearn import datasets, linear_model
lasso = linear_model.Lasso()
print(cross_val_score(lasso,X_train,y_train, scoring = 'neg_mean_absolute_error', cv = 3))

lasso.fit(X_train, y_train)



#RandomForest Regression
from sklearn.ensemble import RandomForestRegressor
#from sklearn.datasets import make_regression

rf = RandomForestRegressor()
cross_val_score(rf,X_train,y_train, scoring = 'neg_mean_absolute_error',cv = 3)


#Testing Models
reg_predict = reg.predict(X_test)
lasso_predict = lasso.predict(X_test)


from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_test,reg_predict)
mean_absolute_error(y_test,lasso_predict)


(reg_predict+lasso_predict)/2


#Predictions
lasso = linear_model.Lasso()
lasso.fit(X_train,y_train)
lasso_prediction = lasso.predict(X_test)
print(lasso_prediction)

#for linear Reg
print(reg_predict)

#for RFR
rf = RandomForestRegressor()
rf.fit(X_train,y_train)
rf_predict = rf.predict(X_test)

print(rf_predict)