# Linear Regression

### Import Libraries

In [None]:
# Import necessary libraries
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Set Seaborn style and color palette
sns.set(style="whitegrid", palette="coolwarm")

### 1. Prepare Data

In [None]:
def prepare_data():
    """
    Prepare a sample dataset for linear regression analysis.
    """
    data = {
        'Student': ['Jeremy', 'Noah', 'Alyssa', 'Daniel', 'Sofia', 'Colin', 'Gracie', 'Anne', 'Jamal', 'Trisha'],
        'Caffeine Intake (mg)': [0, 100, 150, 40, 250, 120, 300, 175, 210, 80],
        'Participation': [0.5, 1.5, 2.5, 1.0, 4.0, 2.0, 4.5, 2.5, 3.0, 1.5]
    }

    # Create a DataFrame
    return pd.DataFrame(data)

### 2. Train Linear Regression Model

In [None]:
def train_model(X, y):
    """
    Train a linear regression model.
    """
    model = LinearRegression()
    model.fit(X, y)
    return model

### 3. Visualize Regression Line

In [None]:
def plot_regression_line(X, y, model):
    """
    Plot the data points and the regression line.
    """
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x=X['Caffeine Intake (mg)'], y=y, color='blue', label='Data Points')
    plt.plot(X, model.predict(X), color='green', label='Regression Line')
    plt.xlabel('Caffeine Intake (mg)')
    plt.ylabel('Participation per lesson')
    plt.title('Linear Regression: Caffeine Intake vs. Participation')
    plt.legend()
    plt.xlim(0, X['Caffeine Intake (mg)'].max() + 50)
    plt.ylim(0, y.max() + 1)
    plt.show()  # Ensures the graph is displayed

### 4. Display Model Metrics

In [None]:
def display_model_metrics(X, y, model):
    """
    Display the equation, MSE, and R² score for the linear regression model.
    """
    predictions = model.predict(X)
    mse = mean_squared_error(y, predictions)
    r2 = r2_score(y, predictions)
    coef = model.coef_[0]
    intercept = model.intercept_

    print("\nLinear Regression Model:")
    print(f"Equation: Participation = {coef:.2f} * Caffeine Intake (mg) + {intercept:.2f}")
    print(f"Mean Squared Error (MSE): {mse:.2f}")
    print(f"R² Score: {r2:.2f}")

### 5. Predict and Visualize Participation

In [None]:
def predict_and_visualize_participation(model, X, y, feature_name):
    """
    Predict participation level based on user-provided caffeine intake and plot the result.
    """
    while True:
        try:
            # Input caffeine intake
            caffeine = float(input("\nEnter caffeine intake (mg) to predict participation level: "))
            # Ensure valid feature names in input
            input_data = pd.DataFrame({feature_name: [caffeine]})
            predicted = model.predict(input_data)[0]
            print(f"Predicted participation level: {predicted:.2f}")

            # Plot the regression line, training data, and the user's input
            plt.figure(figsize=(10, 6))
            sns.scatterplot(x=X['Caffeine Intake (mg)'], y=y, color='blue', label='Training Data')
            plt.plot(X, model.predict(X), color='green', label='Regression Line')
            sns.scatterplot(x=[caffeine], y=[predicted], color='orange', s=100, label=f'User Input ({caffeine}, {predicted:.2f})')
            plt.xlabel('Caffeine Intake (mg)')
            plt.ylabel('Participation per lesson')
            plt.title('Linear Regression: Caffeine Intake vs. Participation')
            plt.legend()
            plt.xlim(0, X['Caffeine Intake (mg)'].max() + 50)
            plt.ylim(0, y.max() + 1)
            plt.show()  # Ensure the graph displays
        except KeyboardInterrupt:
            print("\nExiting the prediction tool.")
            break
        except ValueError:
            print("Invalid input. Please enter a numeric value.")

### Main Program

In [None]:
def main():
    # Prepare data
    df = prepare_data()
    X = df[['Caffeine Intake (mg)']]  # Features
    y = df['Participation']  # Target

    # Train linear regression model
    model = train_model(X, y)

    # Visualize regression line
    plot_regression_line(X, y, model)

    # Display metrics
    display_model_metrics(X, y, model)

    # Predict participation and visualize input
    predict_and_visualize_participation(model, X, y, feature_name='Caffeine Intake (mg)')

if __name__ == "__main__":
    main()