In [105]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from xgboost import XGBRegressor
from comparation2 import comparation2
from sklearn.model_selection import GridSearchCV


In [106]:
# Cargar datos
oferta_file = "oferta_academica_unificada.csv"
real_file = "resumen_cupos_2025A.xlsx"
df = pd.read_csv(oferta_file)
df_real = pd.read_excel(real_file)


In [107]:
# Preprocesamiento
cols_needed = ['Materia', 'Total_Cupos', 'semestre_numerico', 'Total_Secciones', 'Residuos_Cupos']
df = df.dropna(subset=cols_needed)
df['Cupos_Usados'] = df['Total_Cupos'] - df['Residuos_Cupos'].fillna(0)
df_real['Cupos_Usados'] = df_real['Total_Cupos'] - df_real['Residuos_Cupos'].fillna(0)


In [108]:
# Codificar materia
le = LabelEncoder()
df['materia_codificada'] = le.fit_transform(df['Materia'])


In [109]:
# Features y target
features = ['materia_codificada', 'Total_Secciones', 'semestre_numerico', 'Residuos_Cupos']
target = 'Cupos_Usados'


In [110]:
# Separar entrenamiento y predicción (2025A)
semestre_pred = 4049  # 2025A
train = df[df['semestre_numerico'] < semestre_pred]
pred = df[df['semestre_numerico'] == semestre_pred]


In [111]:
# Escalar features y target
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_train = scaler_X.fit_transform(train[features])
y_train = scaler_y.fit_transform(train[[target]]).ravel()
X_pred = scaler_X.transform(pred[features])


In [112]:
model = XGBRegressor(
    n_estimators=1000,      # Más árboles
    learning_rate=0.01,       # Más bajo
    max_depth=4,              # Menor profundidad
    subsample=0.7,            # Submuestreo
    colsample_bytree=1,     # Submuestreo de features
    min_child_weight=3,       # Regularización
    gamma=0,                 # Regularización
    random_state=42,
    n_jobs=-1
)
model.fit(X_train, y_train)


In [113]:
# Predecir y desescalar
y_pred_scaled = model.predict(X_pred).reshape(-1, 1)
y_pred_real = scaler_y.inverse_transform(y_pred_scaled)
pred = pred.copy()
pred['Cupos_Estimados'] = y_pred_real


In [114]:
# Guardar predicciones agrupadas por materia
df_guardar = pred[['Materia', 'Cupos_Estimados']].groupby('Materia').mean().reset_index()
df_guardar.to_excel("predicciones_cupos_proximo_semestre.xlsx", index=False)
print("✅ Archivo generado: predicciones_cupos_proximo_semestre.xlsx")


✅ Archivo generado: predicciones_cupos_proximo_semestre.xlsx


In [115]:
# Comparar resultados
comparation2() 

                                                                                  Materia  Cupos_Usados  Cupos_Estimados  Error_Absoluto  Desviacion_%
                                                                   ESTRUCTURAS DE DATOS I             1        38.693802       37.693802   3769.380188
                            SEMINARIO DE SOLUCION DE PROBLEMAS DE ESTRUCTURAS DE DATOS II            14       138.654892      124.654892    890.392085
                                                                  ESTRUCTURAS DE DATOS II             8        75.543732       67.543732    844.296646
                                                                               HIPERMEDIA            33       195.336670      162.336670    491.929303
                                                                           BASES DE DATOS            27       152.013535      125.013535    463.013091
                                                                               ALGORITMIA     