# Regression analysis

## Load data

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('final_principal.csv', index_col=0)

## Define the model

In [3]:
# simple model

X = df.loc[:, ['Ano_2022', 'Entorno_tecnologico',
               '2022_x_entorno_tecnologico']]

# Model with covariates

X2 = df.loc[:, ['Ano_2022', 'Entorno_tecnologico',
                '2022_x_entorno_tecnologico', 'Edad',
                'Estu_genero_f', 'Cole_bilingue',
                'Fami_educacionmadre', 'Fami_educacionpadre',
                'Cole_area_ubicacion_urbano', 'Cole_naturaleza_no oficial']]

# Dependent variable

y = df.loc[:, 'Punt_global']

## Regression coefficients with Scikit-Learn

### Simple model

In [4]:
from sklearn.linear_model import LinearRegression

In [5]:
model = LinearRegression(fit_intercept=True).fit(X, y)

# parameters of the model
params = pd.Series(model.coef_, index=X.columns)
params

Ano_2022                       1.567935
Entorno_tecnologico           34.706726
2022_x_entorno_tecnologico     1.040196
dtype: float64

In [6]:
# intercept
print('Intercept:', model.intercept_)

Intercept: 235.32303603882193


### Model with covariates

In [7]:
model = LinearRegression(fit_intercept=True).fit(X2, y)

# parameters of the model
params = pd.Series(model.coef_, index=X2.columns)
params

Ano_2022                      -0.692532
Entorno_tecnologico           15.104835
2022_x_entorno_tecnologico     2.381686
Edad                          -8.439519
Estu_genero_f                 -6.507558
Cole_bilingue                 19.808505
Fami_educacionmadre            1.531849
Fami_educacionpadre            1.246472
Cole_area_ubicacion_urbano     7.110228
Cole_naturaleza_no oficial    16.575893
dtype: float64

In [8]:
# intercept
print('Intercept:', model.intercept_)

Intercept: 348.24714670760727
