In [23]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import (
    mean_squared_error,
    mean_absolute_error,
    r2_score,
    explained_variance_score,
)

import plotly.graph_objs as go
import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

# Load your dataset
data = pd.read_csv('A.csv')  # Update this with your data path

# Prepare the features and target variable
features = ['Open', 'High', 'Low', 'Volume']  # Add more features if necessary
target = 'Close'

# Split the data into training and testing sets
X = data[features]
y = data[target]

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize models
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(alpha=1.0),
    'Lasso Regression': Lasso(alpha=1.0),
    'Elastic Net Regression': ElasticNet(alpha=1.0, l1_ratio=0.5)
}

# Dictionary to hold predictions and evaluation metrics
predictions = {}
metrics = {}

# Train each model and make predictions
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    predictions[name] = y_pred

    # Calculate evaluation metrics
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mse)
    evs = explained_variance_score(y_test, y_pred)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100  # Calculate MAPE

    metrics[name] = {
        'MSE': mse,
        'MAE': mae,
        'R²': r2,
        'RMSE': rmse,
        'Explained Variance': evs,
        'MAPE': mape
    }

# Convert metrics to DataFrame for better visualization
metrics_df = pd.DataFrame(metrics).T

# Print the metrics DataFrame
print("\nModel Evaluation Metrics:")
print(metrics_df)

# Find the best model for each metric
best_models = metrics_df.idxmin()  # For minimization metrics (MSE, MAE, RMSE, MAPE)
best_models_r2 = metrics_df['R²'].idxmax()  # For maximization metric (R², Explained Variance)

# Display best models
print("\nBest Model Based on Evaluation Metrics:")
for metric in metrics_df.columns:
    if metric in ['MSE', 'MAE', 'RMSE', 'MAPE']:
        print(f"Best Model for {metric}: {best_models[metric]}")
    elif metric in ['R²', 'Explained Variance']:
        print(f"Best Model for {metric}: {best_models_r2}")

# Create individual plots for each model
for name, y_pred in predictions.items():
    # Create a new figure for each model
    fig = go.Figure()

    # Add actual vs predicted scatter plot
    fig.add_trace(go.Scatter(
        x=y_test,
        y=y_pred,
        mode='markers',
        name='Predicted',
        marker=dict(opacity=0.6),
        text=f'MSE: {metrics[name]["MSE"]:.2f}<br>MAE: {metrics[name]["MAE"]:.2f}<br>R²: {metrics[name]["R²"]:.2f}<br>MAPE: {metrics[name]["MAPE"]:.2f}%',
    ))

    # Add ideal fit line
    fig.add_trace(go.Scatter(
        x=[y_test.min(), y_test.max()],
        y=[y_test.min(), y_test.max()],
        mode='lines',
        name='Ideal Fit',
        line=dict(color='red', dash='dash')
    ))

    # Update layout
    fig.update_layout(
        title=f'{name}: Actual vs Predicted Close Price',
        xaxis_title='Actual Close Price',
        yaxis_title='Predicted Close Price',
        legend_title='Models',
        template='plotly'
    )

    # Show the plot
    fig.show()

# Plot stock prices over time
fig_stock_prices = go.Figure()

fig_stock_prices.add_trace(go.Scatter(
    x=data['Date'],
    y=data['Close'],
    mode='lines',
    name='Actual Close Price'
))

# Predict on the entire dataset for comparison
for name, model in models.items():
    model.fit(X_scaled, y)  # Train on full dataset
    y_full_pred = model.predict(X_scaled)
    fig_stock_prices.add_trace(go.Scatter(
        x=data['Date'],
        y=y_full_pred,
        mode='lines',
        name=f'{name} Predicted Close Price'
    ))

fig_stock_prices.update_layout(
    title='Stock Prices Over Time',
    xaxis_title='Date',
    yaxis_title='Close Price',
    legend_title='Models',
    template='plotly'
)

# Show the stock price plot
fig_stock_prices.show()


Model Evaluation Metrics:
                              MSE       MAE        R²      RMSE  \
Linear Regression        0.172402  0.213712  0.999504  0.415214   
Ridge Regression         0.244897  0.241982  0.999295  0.494871   
Lasso Regression         1.398900  0.874444  0.995975  1.182751   
Elastic Net Regression  10.072584  2.496441  0.971017  3.173733   

                        Explained Variance      MAPE  
Linear Regression                 0.999504  0.655024  
Ridge Regression                  0.999295  0.731592  
Lasso Regression                  0.995975  3.161327  
Elastic Net Regression            0.971019  9.295676  

Best Model Based on Evaluation Metrics:
Best Model for MSE: Linear Regression
Best Model for MAE: Linear Regression
Best Model for R²: Linear Regression
Best Model for RMSE: Linear Regression
Best Model for Explained Variance: Linear Regression
Best Model for MAPE: Linear Regression
