In [None]:
import yfinance as yp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
import plotly.graph_objs as go
from plotly.offline import plot, init_notebook_mode


In [None]:
import sys
import os

sys.path.append(os.path.join(os.path.dirname(__file__), 'utils'))

In [None]:
# Import the helper functions
from create_feature_matrix import create_feature_matrix
from extend_feature_matrix import extend_feature_matrix
from split_data import split_data
init_notebook_mode(connected=True)

In [None]:
ticker = 'GOOGL'
start_date = '2010-06-29'
end_date = '2014-06-24'
stock_data = yp.download(ticker, start=start_date, end=end_date)

In [None]:
# Creating the matrix X
X = create_feature_matrix(stock_data)
y = stock_data['Close'][X.index]

In [None]:
# Adding additional features to X (example: (n-1 - n-2)^2)
X = extend_feature_matrix(X, {
    '(n-1 - n-2)^2': (X['lag_1'] - X['lag_2']) ** 2,
    'Volatility': (X['lag_1'] - X['lag_2']) / X['lag_2']
})

In [None]:
# Splitting the data
X_train, X_test, y_train, y_test = split_data(X, y)

In [None]:
# Feature scaling
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Machine learning model
lm = LinearRegression()
lm.fit(X_train_scaled, y_train)

In [None]:
y.head()

In [None]:
# Splitting the data
X_train, X_test, y_train, y_test = split_data(X, y)

In [None]:
# Feature scaling
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Machine learning model
lm = LinearRegression()
lm.fit(X_train_scaled, y_train)

# Predict using the model
predicted_values_train = lm.predict(X_train_scaled)
predicted_values_test = lm.predict(X_test_scaled)

In [None]:
# Combine the predictions and actual values
predicted_values = np.concatenate([predicted_values_train, predicted_values_test])
all_dates = np.concatenate([X_train.index, X_test.index])
actual_values = np.concatenate([y_train, y_test])

In [None]:
# Plotting actual vs predicted values
trace0 = go.Scatter(
    x=all_dates,
    y=actual_values,
    mode='markers',
    name='Actual'
)
trace1 = go.Scatter(
    x=all_dates,
    y=predicted_values,
    mode='lines',
    name='Predicted'
)

layout = go.Layout(
    title='Actual vs Predicted Stock Prices',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Stock Price')
)

fig = go.Figure(data=[trace0, trace1], layout=layout)
plot(fig)

In [None]:
stock_data.describe()
print(stock_data.describe())