In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler


In [3]:
import os
os.chdir(r"C:\Users\dsala\Downloads")

In [4]:
base=pd.read_csv('GEIH2018_filtered_data.csv')

base['Log_salario']=np.log(base['y_total_m_ha'])
base['edad^2']=base['age']**2

# Eliminar las variables que tienen colinealidad con la dependiente
base_reducida = base.drop(columns=['y_total_m_ha', 'y_total_m', 'ingtot', 'Log_salario', 'dominio', 'edad^2'])

# Definir las variables categóricas
categorical_vars = ['estrato1', 'p6210', 'relab', 'orden']

# Convertir las variables categóricas en dummy variables
X_categoricas = pd.get_dummies(base_reducida[categorical_vars], drop_first=True)

# Concatenar las variables categóricas con la base reducida
base_reducida = pd.concat([base_reducida.drop(columns=categorical_vars), X_categoricas], axis=1)

# Añadir el cuadrado de cada variable restante
for column in base_reducida.columns:
    # Comprobar si la columna es numérica y no es booleana
    if np.issubdtype(base_reducida[column].dtype, np.number) and not np.issubdtype(base_reducida[column].dtype, np.bool_):
        base_reducida[f'{column}^2'] = base_reducida[column] ** 2

# Verificar que las variables cuadradas se hayan agregado correctamente
print(base_reducida.head())


   directorio  secuencia_p  clase  age  sex  p6240  mes  p6426  p6870  p6920  \
0     4514331            1      1   36    1      1    1    166      9      1   
1     4514333            1      1   51    1      1    1     12      4      1   
2     4514334            1      1   45    1      1    1     15      3      1   
3     4514335            1      1   61    1      1    1    120      7      2   
4     4514339            1      1   35    0      1    1     36      9      1   

   ...  informal^2  cuentaPropia^2  microEmpresa^2  sizeFirm^2  college^2  \
0  ...           0               0               0          25          1   
1  ...           0               0               0           9          0   
2  ...           0               0               1           4          0   
3  ...           1               0               0          16          0   
4  ...           0               0               0          25          1   

   depto^2  estrato1^2  p6210^2  relab^2  orden^2  
0   

In [5]:
# Definir la variable dependiente (Log_salario)
y = base['Log_salario']
seed=202013547

# Dividir la base de datos en conjunto de entrenamiento y prueba (70% - 30%)
X_train, X_test, y_train, y_test = train_test_split(base_reducida, y, test_size=0.3, random_state=seed)

backward elimination para reducir X

In [6]:
# Añadir constante al conjunto de entrenamiento
X_train = sm.add_constant(X_train)

# Iniciar la lista de variables seleccionadas
variables_seleccionadas = list(X_train.columns)

# Aplicar backward elimination con un loop
while len(variables_seleccionadas) > 0:
   
    modelo = sm.OLS(y_train, X_train[variables_seleccionadas]).fit()
    
    # Obtener los p-valores
    pvalores = modelo.pvalues
    
    # Encontrar el p-valor más alto
    max_pvalor = pvalores.max()
    
    # Si el p-valor más alto es mayor que 0.05, eliminar la variable
    if max_pvalor > 0.05:
        variable_a_eliminar = pvalores.idxmax()
        variables_seleccionadas.remove(variable_a_eliminar)
        print(f"Eliminando {variable_a_eliminar} con p-valor de {max_pvalor:.4f}")
    else:
        # Si no hay p-valores mayores a 0.05, salir del loop
        break




Eliminando p6920^2 con p-valor de 0.7947
Eliminando directorio^2 con p-valor de 0.6579
Eliminando p6920 con p-valor de 0.6479
Eliminando mes^2 con p-valor de 0.6413
Eliminando clase con p-valor de 0.5927
Eliminando p6240 con p-valor de 0.5928
Eliminando depto con p-valor de 0.5928
Eliminando depto^2 con p-valor de 0.5927
Eliminando p7040^2 con p-valor de 0.7301
Eliminando informal con p-valor de 0.6876
Eliminando informal^2 con p-valor de 0.6876
Eliminando clase^2 con p-valor de 0.6434
Eliminando secuencia_p^2 con p-valor de 0.4373
Eliminando sizeFirm con p-valor de 0.2993
Eliminando sizeFirm^2 con p-valor de 0.2456
Eliminando secuencia_p con p-valor de 0.0555
Variables finales seleccionadas: ['directorio', 'age', 'sex', 'mes', 'p6426', 'p6870', 'p7040', 'p7070', 'maxEducLevel', 'totalHoursWorked', 'formal', 'cuentaPropia', 'microEmpresa', 'college', 'estrato1', 'p6210', 'relab', 'orden', 'age^2', 'sex^2', 'p6240^2', 'p6426^2', 'p6870^2', 'p7070^2', 'maxEducLevel^2', 'totalHoursWorked^

LASSO para reducir más las variables y empezar a probar modelos

In [7]:
# Estandarizar las variables 
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train[variables_seleccionadas])
X_test_scaled = scaler.transform(X_test[variables_seleccionadas])

# LassoCV para encontrar el mejor alpha automáticamente
lasso = LassoCV(cv=5, random_state=seed).fit(X_train_scaled, y_train)

# Identificar las variables que sobrevivieron al Lasso
# Filtrar las columnas cuyo coeficiente no sea cero
variables_lasso = np.array(variables_seleccionadas)[lasso.coef_ != 0]



# Ajustar el modelo final con las variables seleccionadas por Lasso
X_train_lasso = X_train[variables_lasso]
X_test_lasso = X_test[variables_lasso]

# Reentrenar el modelo con las variables seleccionadas
modelo_lasso = sm.OLS(y_train, sm.add_constant(X_train_lasso)).fit()

# Resultados del modelo Lasso
print(modelo_lasso.summary())

# Predecir en el conjunto de prueba
y_pred_lasso = modelo_lasso.predict(sm.add_constant(X_test_lasso))

# Evaluar el modelo (RMSE)
rmse_lasso = np.sqrt(mean_squared_error(y_test, y_pred_lasso))



Variables seleccionadas por Lasso: ['directorio', 'age', 'sex', 'p6426', 'p6870', 'p7040', 'p7070', 'totalHoursWorked', 'cuentaPropia', 'microEmpresa', 'estrato1', 'relab', 'orden', 'age^2', 'sex^2', 'p6426^2', 'p6870^2', 'p7070^2', 'maxEducLevel^2', 'totalHoursWorked^2', 'formal^2', 'cuentaPropia^2', 'college^2', 'estrato1^2', 'p6210^2', 'relab^2', 'orden^2']
                            OLS Regression Results                            
Dep. Variable:            Log_salario   R-squared:                       0.582
Model:                            OLS   Adj. R-squared:                  0.580
Method:                 Least Squares   F-statistic:                     497.6
Date:                Sat, 14 Sep 2024   Prob (F-statistic):               0.00
Time:                        16:20:00   Log-Likelihood:                -6795.6
No. Observations:                8978   AIC:                         1.364e+04
Df Residuals:                    8952   BIC:                         1.383e+04
Df Mo

Se volvió a correr un backward sobre las sobrevivientes de lasso para reducir aún más

In [8]:
# Utilizar las variables que sobrevivieron a Lasso
X_train_backward = X_train_lasso.copy()
X_test_backward = X_test_lasso.copy()

# Inicializar las variables seleccionadas con todas las variables
variables_seleccionadas = X_train_backward.columns.tolist()

# Realizar el mismo backward
while len(variables_seleccionadas) > 0:
    
    X_train_selected = X_train_backward[variables_seleccionadas]
    modelo = sm.OLS(y_train, sm.add_constant(X_train_selected)).fit()
    
  
    pvalores = modelo.pvalues.drop('const')
    
    
    p_valor_max = pvalores.max()
    variable_eliminar = pvalores.idxmax()
    
    
    if p_valor_max > 0.05:
        print(f"Eliminando {variable_eliminar} con p-valor de {p_valor_max:.4f}")
        variables_seleccionadas.remove(variable_eliminar)
    else:
        
        break


modelo_final = sm.OLS(y_train, sm.add_constant(X_train_backward[variables_seleccionadas])).fit()

# Mostrar los resultados finales
print("Variables finales seleccionadas por Backward Elimination después de Lasso:")
print(variables_seleccionadas)
print(modelo_final.summary())

# Predecir en el conjunto de prueba
X_test_backward_final = X_test_backward[variables_seleccionadas]
y_pred_backward = modelo_final.predict(sm.add_constant(X_test_backward_final))

# Evaluar el modelo (RMSE)
rmse_backward = np.sqrt(mean_squared_error(y_test, y_pred_backward))
print(f"RMSE del modelo final con Backward Elimination en el conjunto de prueba: {rmse_backward:.4f}")

# este es el modelo que no se usó porque dependeia mucho de lasso que como tal no entraba en el taller, por eso se decidio intentar
# seguir con otras especificaciones de las sobrevivientes


Eliminando maxEducLevel^2 con p-valor de 0.3169
Eliminando directorio con p-valor de 0.2483
Variables finales seleccionadas por Backward Elimination después de Lasso:
['age', 'sex', 'p6426', 'p6870', 'p7040', 'p7070', 'totalHoursWorked', 'cuentaPropia', 'microEmpresa', 'estrato1', 'relab', 'orden', 'age^2', 'sex^2', 'p6426^2', 'p6870^2', 'p7070^2', 'totalHoursWorked^2', 'formal^2', 'cuentaPropia^2', 'college^2', 'estrato1^2', 'p6210^2', 'relab^2', 'orden^2']
                            OLS Regression Results                            
Dep. Variable:            Log_salario   R-squared:                       0.581
Model:                            OLS   Adj. R-squared:                  0.580
Method:                 Least Squares   F-statistic:                     540.8
Date:                Sat, 14 Sep 2024   Prob (F-statistic):               0.00
Time:                        16:20:04   Log-Likelihood:                -6796.7
No. Observations:                8978   AIC:                   

finalmente se quitaron variables con una alta correlación

In [9]:
# Calcular la matriz de correlación
correlacion = X_train_backward.corr().abs()

# Seleccionar características con alta correlación 
umbral_correlacion = 0.9
columnas_eliminar = set()
for i in range(len(correlacion.columns)):
    for j in range(i):
        if correlacion.iloc[i, j] > umbral_correlacion:
            colname = correlacion.columns[i]
            columnas_eliminar.add(colname)

# Eliminar las variables con alta correlación
X_train_backward_reducido = X_train_backward.drop(columns=columnas_eliminar)
print(f"Variables después de eliminar alta correlación: {X_train_backward_reducido.columns.tolist()}")


Variables después de eliminar alta correlación: ['directorio', 'age', 'sex', 'p6426', 'p6870', 'p7040', 'p7070', 'totalHoursWorked', 'cuentaPropia', 'microEmpresa', 'estrato1', 'relab', 'orden', 'p7070^2', 'maxEducLevel^2', 'formal^2', 'college^2']


Con las vraibels sobrevivientes de todos estos porcesos se empiezan a probar los siguientes modelos

modelo 0

In [25]:
# Definir las variables del modelo 
columnas_finales = X_train_backward_reducido.columns.tolist()

# Seleccionar las mismas variables en el conjunto de prueba
X_test_reducido = X_test_backward[columnas_finales]

# Añadir la constante 
X_train_reducido_const = sm.add_constant(X_train_backward_reducido)
X_test_reducido_const = sm.add_constant(X_test_reducido)

# Ajustar el modelo de regresión lineal
modelo_0 = sm.OLS(y_train, X_train_reducido_const).fit()

# tabla
print(modelo_0.summary())

# Realizar predicciones en el conjunto de prueba
y_pred = modelo_0.predict(X_test_reducido_const)

# Evaluar el modelo (RMSE)
rmse_0 = mean_squared_error(y_test, y_pred, squared=False)
print(f"RMSE modelo 0 : {rmse_0:.4f}")


                            OLS Regression Results                            
Dep. Variable:            Log_salario   R-squared:                       0.540
Model:                            OLS   Adj. R-squared:                  0.539
Method:                 Least Squares   F-statistic:                     619.4
Date:                Sat, 14 Sep 2024   Prob (F-statistic):               0.00
Time:                        17:00:53   Log-Likelihood:                -7217.7
No. Observations:                8978   AIC:                         1.447e+04
Df Residuals:                    8960   BIC:                         1.460e+04
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                7.2311      0.341  



modelo 1

In [11]:
X_train_nl1 = X_train[['age', 'totalHoursWorked', 'sex', 'estrato1', 'relab']].copy()
X_test_nl1 = X_test[['age', 'totalHoursWorked', 'sex', 'estrato1', 'relab']].copy()

X_train_nl1['age^2'] = X_train_nl1['age'] ** 2
X_train_nl1['totalHoursWorked^2'] = X_train_nl1['totalHoursWorked'] ** 2

X_test_nl1['age^2'] = X_test_nl1['age'] ** 2
X_test_nl1['totalHoursWorked^2'] = X_test_nl1['totalHoursWorked'] ** 2


X_train_nl1_const = sm.add_constant(X_train_nl1)
X_test_nl1_const = sm.add_constant(X_test_nl1)

modelo_1 = sm.OLS(y_train, X_train_nl1_const).fit()
y_pred_1 = modelo_1.predict(X_test_nl1_const)
rmse_1 = mean_squared_error(y_test, y_pred_1, squared=False)

print("Modelo 1 Summary:")
print(modelo_1.summary())
print(f"RMSE modelo 1 : {rmse_1:.4f}")

Modelo 1 Summary:
                            OLS Regression Results                            
Dep. Variable:            Log_salario   R-squared:                       0.379
Model:                            OLS   Adj. R-squared:                  0.378
Method:                 Least Squares   F-statistic:                     781.2
Date:                Sat, 14 Sep 2024   Prob (F-statistic):               0.00
Time:                        16:20:16   Log-Likelihood:                -8569.5
No. Observations:                8978   AIC:                         1.716e+04
Df Residuals:                    8970   BIC:                         1.721e+04
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const             



Modelo 2

In [12]:
X_train_nl2 = X_train[['p7070', 'maxEducLevel', 'cuentaPropia', 'estrato1', 'formal']].copy()
X_test_nl2 = X_test[['p7070', 'maxEducLevel', 'cuentaPropia', 'estrato1', 'formal']].copy()

X_train_nl2['p7070^2'] = X_train_nl2['p7070'] ** 2
X_train_nl2['maxEducLevel^2'] = X_train_nl2['maxEducLevel'] ** 2

X_test_nl2['p7070^2'] = X_test_nl2['p7070'] ** 2
X_test_nl2['maxEducLevel^2'] = X_test_nl2['maxEducLevel'] ** 2

X_train_nl2_const = sm.add_constant(X_train_nl2)
X_test_nl2_const = sm.add_constant(X_test_nl2)

modelo_2 = sm.OLS(y_train, X_train_nl2_const).fit()
y_pred_2 = modelo_2.predict(X_test_nl2_const)
rmse_2 = mean_squared_error(y_test, y_pred_2, squared=False)

print("Modelo 2 Summary:")
print(modelo_2.summary())
print(f"RMSE Modelo 2: {rmse_2:.4f}")

Modelo 2 Summary:
                            OLS Regression Results                            
Dep. Variable:            Log_salario   R-squared:                       0.433
Model:                            OLS   Adj. R-squared:                  0.433
Method:                 Least Squares   F-statistic:                     980.6
Date:                Sat, 14 Sep 2024   Prob (F-statistic):               0.00
Time:                        16:20:20   Log-Likelihood:                -8155.4
No. Observations:                8978   AIC:                         1.633e+04
Df Residuals:                    8970   BIC:                         1.638e+04
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const              7.9142 



Modelo 3

In [13]:
X_train_nl3 = X_train[['age', 'p6426', 'sex', 'formal', 'orden']].copy()
X_test_nl3 = X_test[['age', 'p6426', 'sex', 'formal', 'orden']].copy()

X_train_nl3['age^2'] = X_train_nl3['age'] ** 2
X_train_nl3['p6426^2'] = X_train_nl3['p6426'] ** 2

X_test_nl3['age^2'] = X_test_nl3['age'] ** 2
X_test_nl3['p6426^2'] = X_test_nl3['p6426'] ** 2

X_train_nl3_const = sm.add_constant(X_train_nl3)
X_test_nl3_const = sm.add_constant(X_test_nl3)

modelo_3 = sm.OLS(y_train, X_train_nl3_const).fit()
y_pred_3 = modelo_3.predict(X_test_nl3_const)
rmse_3 = mean_squared_error(y_test, y_pred_3, squared=False)

print("Modelo 3 Summary:")
print(modelo_3.summary())
print(f"RMSE Modelo 3: {rmse_3:.4f}")


Modelo 3 Summary:
                            OLS Regression Results                            
Dep. Variable:            Log_salario   R-squared:                       0.233
Model:                            OLS   Adj. R-squared:                  0.232
Method:                 Least Squares   F-statistic:                     389.3
Date:                Sat, 14 Sep 2024   Prob (F-statistic):               0.00
Time:                        16:20:25   Log-Likelihood:                -9515.5
No. Observations:                8978   AIC:                         1.905e+04
Df Residuals:                    8970   BIC:                         1.910e+04
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          7.4324      0.077  



Modelo 4

In [14]:
X_train_nl4 = X_train[['estrato1', 'relab', 'p7040', 'formal', 'college']].copy()
X_test_nl4 = X_test[['estrato1', 'relab', 'p7040', 'formal', 'college']].copy()

X_train_nl4['estrato1^2'] = X_train_nl4['estrato1'] ** 2
X_train_nl4['relab^2'] = X_train_nl4['relab'] ** 2

X_test_nl4['estrato1^2'] = X_test_nl4['estrato1'] ** 2
X_test_nl4['relab^2'] = X_test_nl4['relab'] ** 2

X_train_nl4_const = sm.add_constant(X_train_nl4)
X_test_nl4_const = sm.add_constant(X_test_nl4)

modelo_4 = sm.OLS(y_train, X_train_nl4_const).fit()
y_pred_4 = modelo_4.predict(X_test_nl4_const)
rmse_4 = mean_squared_error(y_test, y_pred_4, squared=False)

print("Modelo 4 Summary:")
print(modelo_4.summary())
print(f"RMSE Modelo 4: {rmse_4:.4f}")

Modelo 4 Summary:
                            OLS Regression Results                            
Dep. Variable:            Log_salario   R-squared:                       0.409
Model:                            OLS   Adj. R-squared:                  0.408
Method:                 Least Squares   F-statistic:                     885.5
Date:                Sat, 14 Sep 2024   Prob (F-statistic):               0.00
Time:                        16:20:30   Log-Likelihood:                -8348.2
No. Observations:                8978   AIC:                         1.671e+04
Df Residuals:                    8970   BIC:                         1.677e+04
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          8.1819      0.084  



Modelo 5

In [26]:
X_train_nl5 = X_train[['age', 'totalHoursWorked', 'sex', 'estrato1', 'relab', 'age^2', 'totalHoursWorked^2']].copy()
X_train_nl5['age_totalHoursWorked'] = X_train['age'] * X_train['totalHoursWorked']
X_train_nl5['sex_estrato1'] = X_train['sex'] * X_train['estrato1']
X_train_nl5['relab_age'] = X_train['relab'] * X_train['age']

X_test_nl5 = X_test[['age', 'totalHoursWorked', 'sex', 'estrato1', 'relab', 'age^2', 'totalHoursWorked^2']].copy()
X_test_nl5['age_totalHoursWorked'] = X_test['age'] * X_test['totalHoursWorked']
X_test_nl5['sex_estrato1'] = X_test['sex'] * X_test['estrato1']
X_test_nl5['relab_age'] = X_test['relab'] * X_test['age']

# Ajustar el modelo
modelo_5 = sm.OLS(y_train, sm.add_constant(X_train_nl5)).fit()
y_pred_5 = modelo_5.predict(sm.add_constant(X_test_nl5))

# Calcular el RMSE
rmse_5 = mean_squared_error(y_test, y_pred_5, squared=False)

print(f"RMSE modelo 5: {rmse_5:.4f}")

RMSE modelo 5: 0.6478




modelo 6

In [27]:

X_train_nl6 = X_train[['sex', 'formal', 'estrato1', 'orden', 'maxEducLevel^2', 'college^2']].copy()
X_train_nl6['sex_formal'] = X_train['sex'] * X_train['formal']
X_train_nl6['estrato1_orden'] = X_train['estrato1'] * X_train['orden']
X_train_nl6['maxEducLevel2_college2'] = X_train['maxEducLevel^2'] * X_train['college^2']

X_test_nl6 = X_test[['sex', 'formal', 'estrato1', 'orden', 'maxEducLevel^2', 'college^2']].copy()
X_test_nl6['sex_formal'] = X_test['sex'] * X_test['formal']
X_test_nl6['estrato1_orden'] = X_test['estrato1'] * X_test['orden']
X_test_nl6['maxEducLevel2_college2'] = X_test['maxEducLevel^2'] * X_test['college^2']

# Ajustar el modelo
modelo_6 = sm.OLS(y_train, sm.add_constant(X_train_nl6)).fit()
y_pred_6 = modelo_6.predict(sm.add_constant(X_test_nl6))

# Calcular el RMSE
rmse_6 = mean_squared_error(y_test, y_pred_6, squared=False)

print(f"RMSE Modelo 6: {rmse_6:.4f}")

RMSE Modelo 6: 0.5988




modelo 7

In [28]:
X_train_nl7 = X_train[['age', 'totalHoursWorked', 'formal', 'estrato1', 'p6426', 'age^2', 'totalHoursWorked^2']].copy()
X_train_nl7['age_formal'] = X_train['age'] * X_train['formal']
X_train_nl7['totalHoursWorked2_estrato1'] = X_train['totalHoursWorked^2'] * X_train['estrato1']
X_train_nl7['p6426_age2'] = X_train['p6426'] * X_train['age^2']

X_test_nl7 = X_test[['age', 'totalHoursWorked', 'formal', 'estrato1', 'p6426', 'age^2', 'totalHoursWorked^2']].copy()
X_test_nl7['age_formal'] = X_test['age'] * X_test['formal']
X_test_nl7['totalHoursWorked2_estrato1'] = X_test['totalHoursWorked^2'] * X_test['estrato1']
X_test_nl7['p6426_age2'] = X_test['p6426'] * X_test['age^2']

# Ajustar el modelo
modelo_7 = sm.OLS(y_train, sm.add_constant(X_train_nl7)).fit()
y_pred_7 = modelo_7.predict(sm.add_constant(X_test_nl7))

# Calcular el RMSE
rmse_7 = mean_squared_error(y_test, y_pred_7, squared=False)

print(f"RMSE Modelo 7: {rmse_7:.4f}")

RMSE Modelo 7: 0.5983




In [19]:
modelos = ['Modelo 0','Modelo 1', 'Modelo 2', 'Modelo 3', 'Modelo 4', 'Modelo 5', 'Modelo 6', 'Modelo 7']
rmses = [rmse_0, rmse_1, rmse_2, rmse_3, rmse_4, rmse_5,rmse_6, rmse_7] 

# Crear un DataFrame a partir de las listas
tabla_rmse = pd.DataFrame({
    'Nombre del Modelo': modelos,
    'RMSE': rmses
})

In [36]:
tabla_rmse.sort_values(by='RMSE', ascending=True)

Unnamed: 0,Nombre del Modelo,RMSE
0,Modelo 0,0.559971
7,Modelo 7,0.598285
6,Modelo 6,0.598802
2,Modelo 2,0.616751
4,Modelo 4,0.629182
1,Modelo 1,0.647716
5,Modelo 5,0.647828
3,Modelo 3,0.703115


Con estos resultados se utilizan el modelo 0 y el 7 para el siguiente punto de LOOCV