In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

d2 = pd.read_csv("d2_trabalhado.csv", sep=",")

In [6]:
X = d2.drop(['G3'], axis=1)
y = d2['G3']

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

In [9]:
y_pred = regressor.predict(X_test)

- Avaliando o modelo

In [10]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [11]:
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

Mean Squared Error: 1.4470130554418417
R^2 Score: 0.8516145200543244


- Selecionando variáveis

In [33]:
selected_features = ['G2','famrel','studytime', 'health', 'Dalc']

In [34]:
X = d2[selected_features]
y = d2['G3']

In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

regressor = LinearRegression()
regressor.fit(X_train, y_train)

y_pred = regressor.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

Mean Squared Error: 1.2627269262347525
R^2 Score: 0.8705123355418114


## Regressão Polinomial

In [36]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

X = d2[selected_features]
y = d2['G3']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

poly = PolynomialFeatures(degree=2)
X_poly_train = poly.fit_transform(X_train)
X_poly_test = poly.transform(X_test)

model = LinearRegression()
model.fit(X_poly_train, y_train)

y_pred = model.predict(X_poly_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error: %.2f" % mse)
print("R^2 Score: %.2f" % r2)

Mean Squared Error: 1.29
R^2 Score: 0.87


## Lasso Regression

In [40]:
from sklearn.linear_model import Lasso

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lasso = Lasso(alpha=0.1)

lasso.fit(X_train, y_train)
y_pred = lasso.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error: %.2f" % mse)
print("R^2 Score: %.2f" % r2)

print("Coeficientes:", lasso.coef_)

Mean Squared Error: 1.27
R^2 Score: 0.87
Coeficientes: [ 1.0191465 -0.         0.        -0.        -0.       ]


## Árvores de Decisão

In [114]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

selected_features = ['G2', 'famrel', 'schoolsup','failures']
#selected_features = ['G1', 'G2', 'schoolsup']

X = d2[selected_features]
y = d2['G3']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

decision_tree = DecisionTreeRegressor(random_state=42)

decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error: %.2f" % mse)
print("R^2 Score: %.2f" % r2)

Mean Squared Error: 1.46
R^2 Score: 0.85


## Validação cruzada

In [120]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
import numpy as np

feature_sets = [
    ['G2', 'famrel', 'schoolsup', 'failures'],
    ['famrel', 'G2', 'schoolsup', 'age'],
    ['G2', 'famrel', 'absences', 'Dalc'],
    ['G2', 'famrel', 'absences', 'romantic'],
     ['G2', 'famrel', 'absences', 'failures'],
     ['G2', 'schoolsup', 'freetime', 'famrel'],
    ['G2', 'Medu', 'studytime', 'failures']
]

for features in feature_sets:
    X = d2[features]
    y = d2['G3']

    decision_tree = DecisionTreeRegressor(random_state=42)
    
    scores = cross_val_score(decision_tree, X, y, cv=5, scoring='neg_mean_squared_error')
    
    mse_scores = -scores
    mean_mse = np.mean(mse_scores)
    std_mse = np.std(mse_scores)
    
    print(f"Features: {features}")
    print(f"Mean Squared Error (Cross-Validation): {mean_mse:.2f} ± {std_mse:.2f}")
    print()


Features: ['G2', 'famrel', 'schoolsup', 'failures']
Mean Squared Error (Cross-Validation): 1.99 ± 0.97

Features: ['famrel', 'G2', 'schoolsup', 'age']
Mean Squared Error (Cross-Validation): 2.71 ± 1.22

Features: ['G2', 'famrel', 'absences', 'Dalc']
Mean Squared Error (Cross-Validation): 3.67 ± 1.02

Features: ['G2', 'famrel', 'absences', 'romantic']
Mean Squared Error (Cross-Validation): 3.25 ± 1.20

Features: ['G2', 'famrel', 'absences', 'failures']
Mean Squared Error (Cross-Validation): 3.25 ± 1.08

Features: ['G2', 'schoolsup', 'freetime', 'famrel']
Mean Squared Error (Cross-Validation): 3.02 ± 1.40

Features: ['G2', 'Medu', 'studytime', 'failures']
Mean Squared Error (Cross-Validation): 2.70 ± 1.25

