## **Criando Modelos de Regressao Linear**

In [68]:
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import statsmodels.api as sm

In [41]:
alunos = pd.read_csv('student-mat.csv')

**Análise Exploratória**

In [None]:
alunos.shape

In [None]:
alunos.columns

In [None]:
alunos.columns

In [3]:
alunos['age'].describe()

count    395.000000
mean      16.696203
std        1.276043
min       15.000000
25%       16.000000
50%       17.000000
75%       18.000000
max       22.000000
Name: age, dtype: float64

In [4]:
alunos['absences'].describe()

count    395.000000
mean       5.708861
std        8.003096
min        0.000000
25%        0.000000
50%        4.000000
75%        8.000000
max       75.000000
Name: absences, dtype: float64

**Modelagem**

In [42]:
X = alunos[['age','absences','G1','G2']]
y = alunos[['G3']]

In [30]:
X

Unnamed: 0,age,absences,G1,G2
0,18,6,5,6
1,17,4,5,5
2,15,10,7,8
3,15,2,15,14
4,16,4,6,10
...,...,...,...,...
390,20,11,9,9
391,17,3,14,16
392,21,3,10,8
393,18,0,11,12


In [43]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [51]:
print('Tamanho da base: %d' % X.shape[0])

Tamanho da base: 395


In [55]:
print('Quantidade de registros para treino: %d' % X_train.shape[0])

Quantidade de registros para treino: 276


In [54]:
print('Quantidade de registros para teste: %d' % X_test.shape[0])

Quantidade de registros para teste: 119


In [56]:
modelo = LinearRegression().fit(X_train, y_train)

**Interpretação do Modelo**

In [59]:
previsoes = modelo.predict(X_test)

In [61]:
MSE = mean_squared_error(y_test,previsoes)
print('%.2f' % MSE)

4.27


In [65]:
#R2
r2 = r2_score(y_test, previsoes)
print('%.2f' % r2)

0.81


In [66]:
def adjusted_r2(y_test, y_pred,X_train):
    
  from sklearn.metrics import r2_score

  adj_r2 = (1 - ((1 - r2_score(y_test, y_pred)) * (len(y_test) - 1)) / 
          (len(y_test) - X_train.shape[1] - 1))
    
  return adj_r2

In [67]:
adjusted_r2(y_test,previsoes,X_train)

0.7991194322255301

In [70]:
# Criaçào de um constante obrigatória
X2 = sm.add_constant(X)
# Construção do modelo e treinamento
est = sm.OLS(y, X2)
est2 = est.fit()
# Avaliação
print(est2.summary())

                            OLS Regression Results                            
Dep. Variable:                     G3   R-squared:                       0.829
Model:                            OLS   Adj. R-squared:                  0.827
Method:                 Least Squares   F-statistic:                     471.9
Date:                Tue, 12 Apr 2022   Prob (F-statistic):          5.79e-148
Time:                        01:25:31   Log-Likelihood:                -812.65
No. Observations:                 395   AIC:                             1635.
Df Residuals:                     390   BIC:                             1655.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.0927      1.349      0.810      0.4

In [72]:
RMSE = mean_squared_error(y_test,previsoes,squared=False) 
print('%.2f' % RMSE)

2.07
