In [35]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from comparation2 import comparation2


In [36]:
# Cargar datos
oferta_file = "oferta_academica_unificada.csv"
real_file = "resumen_cupos_2025A.xlsx"
df = pd.read_csv(oferta_file)
df_real = pd.read_excel(real_file)


In [37]:
# Preprocesamiento
# Eliminar filas sin datos clave
cols_needed = ['Materia', 'Total_Cupos', 'semestre_numerico', 'Total_Secciones', 'Residuos_Cupos']
df = df.dropna(subset=cols_needed)
df['Cupos_Usados'] = df['Total_Cupos'] - df['Residuos_Cupos'].fillna(0)
df_real['Cupos_Usados'] = df_real['Total_Cupos'] - df_real['Residuos_Cupos'].fillna(0)


In [38]:
# Codificar materia
le = LabelEncoder()
df['materia_codificada'] = le.fit_transform(df['Materia'])


In [39]:
# Features y target
features = ['materia_codificada', 'Total_Secciones', 'semestre_numerico', 'Residuos_Cupos']
target = 'Cupos_Usados'


In [40]:

# Separar entrenamiento y predicción (2025A)
semestre_pred = 4049  # 2025A
train = df[df['semestre_numerico'] < semestre_pred]
pred = df[df['semestre_numerico'] == semestre_pred]


In [41]:
# Escalar features y target
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_train = scaler_X.fit_transform(train[features])
y_train = scaler_y.fit_transform(train[[target]]).ravel()
X_pred = scaler_X.transform(pred[features])


In [42]:
# Entrenar modelo
model = RandomForestRegressor(
    n_estimators=500,
    max_depth=8,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=42,
)
model.fit(X_train, y_train)


In [43]:
# Predecir y desescalar
y_pred_scaled = model.predict(X_pred).reshape(-1, 1)
y_pred_real = scaler_y.inverse_transform(y_pred_scaled)
pred = pred.copy()
pred['Cupos_Estimados'] = y_pred_real


In [44]:
# Guardar predicciones agrupadas por materia
df_guardar = pred[['Materia', 'Cupos_Estimados']].groupby('Materia').mean().reset_index()
df_guardar.to_excel("predicciones_cupos_proximo_semestre.xlsx", index=False)
print("✅ Archivo generado: predicciones_cupos_proximo_semestre.xlsx")


✅ Archivo generado: predicciones_cupos_proximo_semestre.xlsx


In [45]:
# Comparar resultados
comparation2() 

                                                                                  Materia  Cupos_Usados  Cupos_Estimados  Error_Absoluto  Desviacion_%
                                                                   ESTRUCTURAS DE DATOS I             1        68.701072       67.701072   6770.107169
                            SEMINARIO DE SOLUCION DE PROBLEMAS DE ESTRUCTURAS DE DATOS II            14       172.614977      158.614977   1132.964122
                                                                  ESTRUCTURAS DE DATOS II             8        73.569778       65.569778    819.622226
                             SEMINARIO DE SOLUCION DE PROBLEMAS DE ESTRUCTURAS DE DATOS I             9        59.902960       50.902960    565.588444
                                                                               HIPERMEDIA            33       189.915743      156.915743    475.502250
                                                                           BASES DE DATOS     