# Linear Regression

### Import Libraries

In [None]:
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### 1. Prepare Data

In [None]:
# Prepare Data
def prepare_data():
    data = {
        'Student': ['Jeremy', 'Noah', 'Alyssa', 'Daniel', 'Sofia', 'Colin', 'Gracie', 'Anne', 'Jamal', 'Trisha'],
        'Caffeine Intake (mg)': [0, 100, 150, 40, 250, 120, 300, 175, 210, 80],
        'Participation': [0.5, 1.5, 2.5, 1.0, 4.0, 2.0, 4.5, 2.5, 3.0, 1.5]
    }
    return pd.DataFrame(data)

### 2. Train Linear Regression Model

In [None]:
# Calculate Linear Regression
def train_model(X, y):
    x = X.flatten()
    y = y.flatten()

    # Calculate slope (m) and intercept (b) using least squares method
    n = len(x)
    sum_x = np.sum(x)
    sum_y = np.sum(y)
    sum_xy = np.sum(x * y)
    sum_x2 = np.sum(x ** 2)

    m = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x ** 2)
    b = (sum_y - m * sum_x) / n

    return m, b

### 3. Visualize Regression Line

In [None]:
# Visualize Regression Line
def plot_regression_line(X, y, m, b):
    plt.figure(figsize=(10, 6))
    plt.scatter(X, y, color='blue', label='Data Points')
    plt.plot(X, m * X + b, color='green', label='Regression Line')
    plt.xlabel('Caffeine Intake (mg)')
    plt.ylabel('Participation per lesson')
    plt.title('Linear Regression: Caffeine Intake vs. Participation')
    plt.legend()
    plt.xlim(0, X.max() + 50)
    plt.ylim(0, y.max() + 1)
    plt.show()

### 4. Display Model Metrics

In [None]:
# Calculate and Display SSE
def display_model_metrics(X, y, m, b):
    predictions = m * X + b
    sse = np.sum((y - predictions) ** 2)
    r2 = 1 - (sse / np.sum((y - np.mean(y)) ** 2))

    print("\nLinear Regression Model:")
    print(f"Equation: Participation = {m:.2f} * Caffeine Intake (mg) + {b:.2f}")
    print(f"Sum of Squared Errors (SSE): {sse:.2f}")
    print(f"RÂ² Score: {r2:.2f}")

### 5. Predict and Visualize Participation

In [None]:
# Predict Participation and Visualize User Input (Single Input)
def predict_and_visualize_participation(m, b, X, y):
    try:
        # Ask for input once
        caffeine = float(input("\nEnter caffeine intake (mg) to predict participation level: "))
        predicted = m * caffeine + b
        print(f"Predicted participation level: {predicted:.2f}")

        # Plot result with user input
        plt.figure(figsize=(10, 6))
        plt.scatter(X, y, color='blue', label='Training Data')
        plt.plot(X, m * X + b, color='green', label='Regression Line')
        plt.scatter([caffeine], [predicted], color='orange', s=100, label=f'User Input ({caffeine}, {predicted:.2f})')
        plt.xlabel('Caffeine Intake (mg)')
        plt.ylabel('Participation per lesson')
        plt.title('Linear Regression: Caffeine Intake vs. Participation')
        plt.legend()
        plt.xlim(0, X.max() + 50)
        plt.ylim(0, y.max() + 1)
        plt.show()
    except ValueError:
        print("Invalid input. Please enter a numeric value.")

### Main Program

In [None]:
# Main Program
def main():
    # Prepare data
    df = prepare_data()
    X = df[['Caffeine Intake (mg)']].values
    y = df[['Participation']].values

    # Train model (calculate slope and intercept)
    m, b = train_model(X, y)

    # Visualize regression line
    plot_regression_line(X, y, m, b)

    # Display metrics
    display_model_metrics(X, y, m, b)

    # Predict and visualize (ask for input once)
    predict_and_visualize_participation(m, b, X, y)


if __name__ == "__main__":
    main()
