### Linear Regression model

In [17]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

import pickle

#### Importing processed dataset 

In [45]:
with open('../data/students_performance.pkl', 'rb') as f:
    X_students_train, y_students_train, X_students_test, y_students_test = pickle.load(f)

In [46]:
X_students_train.shape, y_students_train.shape

((850, 17), (850,))

In [47]:
X_students_test.shape, y_students_test.shape

((150, 17), (150,))

#### Training alghoritm

In [83]:
knn_students = KNeighborsRegressor(n_neighbors=16, metric='minkowski')
knn_students.fit(X_students_train, y_students_train)

In [84]:
predictions = knn_students.predict(X_students_test)

In [None]:
import plotly.express as px
import pandas as pd


df = pd.DataFrame({
    'Real': y_students_test,
    'Previsto': predictions
})

fig = px.scatter(df, x='Real', y='Previsto',
                 labels={'Real': 'Valor Real', 'Previsto': 'Valor Previsto'},
                 title='Valores Reais vs Valores Previstos')
fig.add_shape(
    dict(
        type='line',
        x0=df['Real'].min(),
        y0=df['Real'].min(),
        x1=df['Real'].max(),
        y1=df['Real'].max(),
        line=dict(color='Red', dash='dash')
    )
)

fig.show()

In [85]:
mae = mean_absolute_error(y_students_test, predictions)
mse = mean_squared_error(y_students_test, predictions)
rmse = np.sqrt(mse)
r2 = r2_score(y_students_test, predictions)

print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R²: {r2:.2f}")

MAE: 10.87
MSE: 182.05
RMSE: 13.49
R²: 0.06
