In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the data
df = pd.read_csv("studper.csv")  

# Rename columns for consistency
df.rename(columns={
    'Hours Studied': 'feature1',
    'Previous Scores': 'feature2',
    'Performance Index': 'target'
}, inplace=True)

# Visualize raw data in 3D
fig = px.scatter_3d(df, x='feature1', y='feature2', z='target')
fig.show()

# Prepare features and target
x = df[['feature1', 'feature2']].values
y = df['target'].values

# Split data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Train model
lr = LinearRegression()
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)

# Evaluation
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))

# Prepare regression plane
grid_size = 30
feature1_range = np.linspace(df['feature1'].min(), df['feature1'].max(), grid_size)
feature2_range = np.linspace(df['feature2'].min(), df['feature2'].max(), grid_size)
feature1_grid, feature2_grid = np.meshgrid(feature1_range, feature2_range)

grid_points = np.c_[feature1_grid.ravel(), feature2_grid.ravel()]
predicted_target = lr.predict(grid_points).reshape(feature1_grid.shape)

# Plot the regression plane with actual points
fig = go.Figure()

# Add actual data
fig.add_trace(go.Scatter3d(
    x=df['feature1'], y=df['feature2'], z=df['target'],
    mode='markers',
    marker=dict(size=4, color='blue'),
    name='Actual Data'
))

# Add regression surface
fig.add_trace(go.Surface(
    x=feature1_grid, y=feature2_grid, z=predicted_target,
    opacity=0.5, colorscale='Viridis', name='Regression Plane'
))

# Layout
fig.update_layout(
    scene=dict(
        xaxis_title='Hours Studied',
        yaxis_title='Previous Scores',
        zaxis_title='Performance Index'
    ),
    title='Multiple Linear Regression - 3D Regression Plane',
    width=800,
    height=700
)

fig.show()


MAE: 1.8269025376295696
MSE: 5.2419211865515205
R² Score: 0.985855014245765
