In [1]:
# Importing Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from statsmodels.api import OLS, add_constant
from statsmodels.stats.outliers_influence import variance_inflation_factor
import ipywidgets as widgets
from IPython.display import display
from io import BytesIO

In [3]:
# Step 1: Data Upload Function
def upload_data_ui():
    """UI for uploading data and displaying the preview."""
    upload_widget = widgets.FileUpload(accept='.csv', multiple=False)
    output = widgets.Output()

    def on_upload_change(change):
        global user_df
        if upload_widget.value:
            try:
                # Extract the uploaded file from the tuple structure
                uploaded_file = list(upload_widget.value.values())[0] if isinstance(upload_widget.value, dict) else upload_widget.value[0]
                content = uploaded_file['content']

                # Read the CSV file into a DataFrame
                user_df = pd.read_csv(BytesIO(content))

                # Display a preview of the dataset
                with output:
                    output.clear_output()
                    print("Dataset Preview:")
                    display(user_df.head())
            except Exception as e:
                with output:
                    output.clear_output()
                    print(f"Error loading file: {e}")

    upload_widget.observe(on_upload_change, names='value')
    display(upload_widget, output)

# Placeholder for user-uploaded data
user_df = pd.DataFrame()

# Display the UI for data upload
upload_data_ui()

FileUpload(value=(), accept='.csv', description='Upload')

Output()

In [9]:
# Step 3: Model Selection UI
def model_selection_ui():
    """UI for selecting between Simple Linear Regression and Multiple Linear Regression."""
    model_selector = widgets.Dropdown(
        options=['Simple Linear Regression', 'Multiple Linear Regression'],
        value='Simple Linear Regression',
        description='Select Model:'
    )

    proceed_button = widgets.Button(description="Proceed")
    clear_button = widgets.Button(description="Clear")
    output = widgets.Output()

    def on_proceed_click(b):
        with output:
            output.clear_output()
            selected_model = model_selector.value
            print(f"You selected: {selected_model}")
            if selected_model == 'Simple Linear Regression':
                simple_linear_regression_ui()
            elif selected_model == 'Multiple Linear Regression':
                multiple_linear_regression_ui()

    def on_clear_click(b):
        with output:
            output.clear_output()
            print("Selection cleared. Please choose a model again.")

    proceed_button.on_click(on_proceed_click)
    clear_button.on_click(on_clear_click)
    display(model_selector, proceed_button, clear_button, output)

# UI for Simple Linear Regression
def simple_linear_regression_ui():
    """UI for configuring and running Simple Linear Regression."""
    target_selector = widgets.Dropdown(
        options=user_df.select_dtypes(include=[np.number]).columns.tolist(),
        description='Target:'
    )

    predictor_selector = widgets.Dropdown(
        options=user_df.columns.tolist(),
        description='Predictor:'
    )

    run_button = widgets.Button(description="Run Simple Regression")
    output = widgets.Output()

    def on_run_click(b):
        with output:
            output.clear_output()
            target = target_selector.value
            predictor = predictor_selector.value

            if not target or not predictor:
                print("Please select both a target and a predictor variable.")
                return

            X = user_df[[predictor]]
            y = user_df[target]

            model = LinearRegression()
            model.fit(X, y)

            # Generate predictions
            y_pred = model.predict(X)

            # Calculate R-squared
            r_squared = model.score(X, y)

            # Plot the scatter plot with regression line
            plt.figure(figsize=(8, 6))
            plt.scatter(X, y, alpha=0.7, label="Data")
            plt.plot(X, y_pred, color='red', label="Regression Line")
            plt.title("Simple Linear Regression")
            plt.xlabel(predictor)
            plt.ylabel(target)
            plt.legend()
            plt.show()

            # Display equation and R-squared
            coef = model.coef_[0]
            intercept = model.intercept_
            print(f"Equation: y = {coef:.2f}x + {intercept:.2f}")
            print(f"R-squared: {r_squared:.4f}")

    run_button.on_click(on_run_click)
    display(target_selector, predictor_selector, run_button, output)

# UI for Multiple Linear Regression
def multiple_linear_regression_ui():
    """UI for configuring and running Multiple Linear Regression."""
    target_selector = widgets.Dropdown(
        options=user_df.select_dtypes(include=[np.number]).columns.tolist(),
        description='Target:'
    )

    predictor_selector = widgets.SelectMultiple(
        options=user_df.columns.tolist(),
        description='Predictors:'
    )

    run_button = widgets.Button(description="Run Multiple Regression")
    output = widgets.Output()

    def on_run_click(b):
        with output:
            output.clear_output()
            target = target_selector.value
            predictors = list(predictor_selector.value)

            if not target or not predictors:
                print("Please select a target variable and at least one predictor variable.")
                return

            X = user_df[predictors]
            y = user_df[target]

            # Check multicollinearity using VIF
            vif_data = pd.DataFrame()
            X_with_const = add_constant(X)
            vif_data["Feature"] = X_with_const.columns
            vif_data["VIF"] = [variance_inflation_factor(X_with_const.values, i) for i in range(X_with_const.shape[1])]

            print("Variance Inflation Factor (VIF):")
            display(vif_data)

            # Fit the model using OLS
            model = OLS(y, X_with_const).fit()

            # Display OLS results
            print("OLS Regression Results:")
            print(model.summary())

            # Generate predictions
            y_pred = model.predict(X_with_const)

            # Plot residuals
            residuals = y - y_pred
            plt.figure(figsize=(8, 6))
            plt.scatter(y, residuals, alpha=0.7)
            plt.axhline(y=0, color='red', linestyle='--', linewidth=1)
            plt.title("Residuals vs True Values")
            plt.xlabel("True Values")
            plt.ylabel("Residuals")
            plt.show()

    run_button.on_click(on_run_click)
    display(target_selector, predictor_selector, run_button, output)

# Display the UI for model selection
model_selection_ui()

Dropdown(description='Select Model:', options=('Simple Linear Regression', 'Multiple Linear Regression'), valu…

Button(description='Proceed', style=ButtonStyle())

Button(description='Clear', style=ButtonStyle())

Output()