In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import hvplot.pandas
from pathlib import Path
from sklearn.linear_model import LinearRegression

# %%
# Load the data into a Pandas DataFrame
stock_df = pd.read_csv(
    "Resources/data.csv",
    index_col="Company")

# %%
# Display sample data
stock_df.head(10)

# %%
# Remove the $ sign from the columns 
stock_df = stock_df.replace({'\$': ' '}, regex=True)

# Review the changes 
stock_df.head(5)

# %%
stock_df.info()

# %%
# Check for missing values
null_counts = stock_df.isnull().sum()
print(null_counts)

# %%
# Convert to datetime
stock_df['Date'] = pd.to_datetime(stock_df['Date'])

# Find the oldest and newest dates
oldest_date = stock_df['Date'].min()
newest_date = stock_df['Date'].max()

print(oldest_date)
print(newest_date)

# %%
# Sort the df by company and date 
stock_df.sort_values(by=['Company', 'Date'], inplace=True)
stock_df.head(5)

# %%
# Calculating the change between closing and opening prices per company
stock_df['Close/Last'] = pd.to_numeric(stock_df['Close/Last'], errors='coerce')
stock_df['Open'] = pd.to_numeric(stock_df['Open'], errors='coerce')

stock_df['Change'] = stock_df.groupby('Company')['Close/Last'].shift(1) - stock_df['Open']
stock_df.head(5)

# %%
# Delete null value in Change column
stock_df.dropna(subset=['Change'], inplace=True)
stock_df.head(5)

# %%
# # Drop Date column
# stock_df.drop(columns=['Date'], inplace=True)
# stock_df.head(5)

# %%
# Filter for AAPL data only
appl_data = stock_df[stock_df.index == 'AAPL']

appl_data.head()

# %%
# Plot AAPL data
appl_plot = appl_data.hvplot.scatter(
    x="Open",
    y="Close/Last",
    title="AAPL Open vs Close prices"
)
appl_plot

# %%
# Create a scatter plot with the stock information [AAPL]
stock_plot = stock_df.hvplot.scatter(
    x="Open",
    y="Close/Last",
    title="Open vs Close prices"
)
stock_plot

# %%
# Reformat data of the independent variable X as a single-column array
X = stock_df["Open"].values.reshape(-1, 1)

# Display sample data
X[:5]

# %%
# The shape of X is 30 samples, with a single feature (column)
X.shape

# %%
# Create an array for the dependent variable y
y = stock_df["Close/Last"]

# %%
# Create a model with scikit-learn
model = LinearRegression()

# %%
# Fit the data into the model
model.fit(X, y)

# %%
# Display the slope
print(f"Model's slope: {model.coef_}")

# %%
# Display the y-intercept
print(f"Model's y-intercept: {model.intercept_}")

# %%
# Display the model's best fit line formula
print(f"Model's formula: y = {model.intercept_} + {model.coef_[0]}X")

# %%
# Display the formula to predict the close price after 30 days
print(f"Model's formula: y = {model.intercept_} + {model.coef_[0]} * 30")

# Predict the salary for a person with 7 years of experience
y_30 = model.intercept_ + model.coef_[0] * 30

# Display the prediction
print(f"Predicted stock closing price after 30 days: ${y_30:.2f}")

# %%
# Make predictions using the X set
predicted_y_values = model.predict(X)

# %%
# Create a copy of the original data
df_close_predicted = stock_df.copy()

# Add a column with the predicted salary values
df_close_predicted["close_predicted"] = predicted_y_values

# Display sample data
df_close_predicted.head()

# %%
# Create a line plot of the predicted salary values
best_fit_line = df_close_predicted.hvplot.line(
    x = "Close/Last",
    y = "close_predicted",
    color = "red"
)
best_fit_line

# %%
# Superpose the original data and the best fit line
stock_plot * best_fit_line

# %%
# Import relevant metrics - score, r2, mse, rmse, std - from Scikit-learn
from sklearn.metrics import mean_squared_error, r2_score

# %%
# Compute the metrics for the linear regression model
score = model.score(X, y, sample_weight=None)
r2 = r2_score(y, predicted_y_values)
mse = mean_squared_error(y, predicted_y_values)
rmse = np.sqrt(mse)
std = np.std(y)

# Print relevant metrics.
print(f"The score is {score}.")
print(f"The r2 is {r2}.")
print(f"The mean squared error is {mse}.")
print(f"The root mean squared error is {rmse}.")
print(f"The standard deviation is {std}.")