# Linear Regression

## Prerequsite
Checking for minimum python and pip version

In [1]:
import platform

assert platform.python_version() >= "3.8"

In [2]:
# Warnings
import warnings
warnings.filterwarnings('ignore')

# BEGIN: fix Python or Notebook SSL CERTIFICATE_VERIFY_FAILED
import os, ssl
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
    ssl._create_default_https_context = ssl._create_unverified_context
# END: fix Python or Notebook SSL CERTIFICATE_VERIFY_FAILED

In [3]:
!pip -q install -U yfinance numpy pandas sklearn tensorflow torch



In [4]:
import yfinance as yf
assert yf.__version__ >= "0.1.63"

import pandas as pd
assert pd.__version__ >= "1.2"

import numpy as np
assert np.__version__ >= "1.19"

import sklearn as skl
assert skl.__version__ >= "0.20"            

In [5]:
import inspect
import time
import datetime

In [6]:
## Python code

In [7]:
def get_stock_data(ticker, start_date, end_date):
    """Get stock data from Yahoo Finance"""
    startTime = time.perf_counter()
    try:
        stock = yf.Ticker(ticker)
        stock_data = stock.history(start=start_date, end=end_date)
        return stock_data
    except Exception:
        print(traceback.format_exc())
        return None
    finally:
        totalTime = (time.perf_counter() - startTime)
        print(f"METHOD: {inspect.stack()[0][3]} completed in {format(totalTime, '6.3f')} seconds or {format(totalTime / 60, '6.3f')} minutes ")

In [8]:
def compare(y_test, y_pred):
    """Compare the y_test and y_pred"""
    startTime = time.perf_counter()
    try:
        y_test.drop(['index'], axis=1)
        y_test['index'] = y_test.index
        compare_df = pd.DataFrame({"y_test": y_test['Close'], "y_pred": y_pred['Close']})
        return compare_df
    finally:
        totalTime = (time.perf_counter() - startTime)
        print(f"[METHOD: {inspect.stack()[0][3]} completed in {format(totalTime, '6.3f')} seconds or {format(totalTime / 60, '6.3f')} minutes ")

In [9]:
stock_data = get_stock_data("AAPL", "2021-03-01", "2021-08-31")
stock_data.reset_index(inplace=True)
stock_data.drop(columns=["High", "Low", "Volume", "Dividends", "Stock Splits"], inplace=True)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(stock_data[['Open']], stock_data[['Close']], test_size=.2)  # 20% test data
# Test set
print(f"\nTest set::")
print(f"X_test length:{len(X_test)}  y_test length:{len(y_test)} ")

# Training set
print(f"\nTraining set::")
print(f"X_train length:{len(X_train)}  y_train length:{len(y_train)} ")

METHOD: get_stock_data completed in  0.264 seconds or  0.004 minutes 

Test set::
X_test length:26  y_test length:26 

Training set::
X_train length:101  y_train length:101 


## scikit-learn model

Information at https://scikit-learn.org/stable/modules/linear_model.html#generalized-linear-regression

In [15]:
from sklearn.linear_model import LinearRegression as skl_LinearRegression

class ScikitLinearRegression:
    def model(self, X, y):
        """Linear Regression using sklearn"""
        # print(f"[BEGIN] METHOD: {inspect.stack()[0][3]} at {datetime.datetime.now()}")
        startTime = time.perf_counter()
        try:
            model = skl_LinearRegression()
            model.fit(X, y)
            return model
        except Exception:
            print(f" Exception: {traceback.format_exc()}")
            return None
        finally:
            totalTime = (time.perf_counter() - startTime)
            print(f"METHOD: {inspect.stack()[0][3]} completed in {format(totalTime, '6.3f')} seconds or {format(totalTime / 60, '6.3f')} minutes ")
        
  
    def predict(self, model, X):
        """Predict using the model"""
        startTime = time.perf_counter()
        try:
            y_pred = model.predict(X)
            return y_pred
        except Exception:
            print(f" Exception: {traceback.format_exc()}")
            return None
        finally:
            totalTime = (time.perf_counter() - startTime)
            print(f"METHOD: {inspect.stack()[0][3]} completed in {format(totalTime, '6.3f')} seconds or {format(totalTime / 60, '6.3f')} minutes ")


In [16]:
# Scikit Linear Regression - train model
sklr = ScikitLinearRegression()
reg = sklr.model(X_train, y_train)

# Scikit Linear Regression - predict
y_pred = sklr.predict(reg, X_test)

# Linear Regression - compare
y_test.reset_index(inplace=True)
y_pred = pd.DataFrame(y_pred, columns=['Close']).reset_index()
print(f"\nlength:: y_test: {len(y_test)}  y_pred: {len(y_pred)}")
print(f"\type:: y_test: {type(y_test)}  y_pred: {type(y_pred)}")
# print(f"*** y_pred: \n{y_pred['Close']} ")

compare(y_test, y_pred)

METHOD: model completed in  0.020 seconds or  0.000 minutes 
METHOD: predict completed in  0.002 seconds or  0.000 minutes 

length:: y_test: 26  y_pred: 26
	ype:: y_test: <class 'pandas.core.frame.DataFrame'>  y_pred: <class 'pandas.core.frame.DataFrame'>
[METHOD: compare completed in  0.003 seconds or  0.000 minutes 


Unnamed: 0,y_test,y_pred
0,133.210419,134.382837
1,124.72097,128.206514
2,149.710007,148.281829
3,120.823441,120.264868
4,146.699997,145.039654
5,146.730164,147.036057
6,131.519226,132.768523
7,146.089996,146.19616
8,127.159477,126.56588
9,131.040756,131.527026


In [None]:
print(f"Regression Coefficients: {reg.coef_}")
print(f"Regression Intercept: {reg.intercept_}")
# print(f"Regression Score: {reg.score(X_test, y_test)}")
# print(f"Regression Mean Squared Error: {reg.score(X_test, y_test)}")

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D

