In [46]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import plotly.graph_objects as go

import pickle
import numpy as np

In [19]:
with open('../data/students_performance.pkl', 'rb') as f:
    X_train, y_train, X_test, y_test = pickle.load(f)

In [20]:
X_train[10]

array([1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1.])

In [21]:
basic_linear_model = LinearRegression()
basic_linear_model.fit(X_train, y_train)

In [22]:
basic_linear_model.intercept_

68.39889554930878

In [25]:
basic_linear_model.coef_

array([ 2.16877931, -2.16877931, -3.02507753, -1.57716741, -1.07815555,
        2.1485205 ,  3.53187999,  0.33517955,  3.92134747, -3.90274064,
        4.30064456, -0.31692655, -4.33750439, -4.40211011,  4.40211011,
        4.09123865, -4.09123865])

In [26]:
basic_linear_model.score(X_train, y_train)

0.2562641971544254

In [27]:
basic_linear_model.score(X_test, y_test)

0.13844514079614745

In [29]:
predictions = basic_linear_model.predict(X_test)

In [None]:
import plotly.express as px
import pandas as pd


df = pd.DataFrame({
    'Real': y_test,
    'Previsto': predictions
})

fig = px.scatter(df, x='Real', y='Previsto',
                 labels={'Real': 'Valor Real', 'Previsto': 'Valor Previsto'},
                 title='Valores Reais vs Valores Previstos')
fig.add_shape(
    dict(
        type='line',
        x0=df['Real'].min(),
        y0=df['Real'].min(),
        x1=df['Real'].max(),
        y1=df['Real'].max(),
        line=dict(color='Red', dash='dash')
    )
)

fig.show()


In [30]:
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, predictions)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R²: {r2:.2f}")

MAE: 10.34
RMSE: 12.93
R²: 0.14
