In [162]:
# 📦 Importar librerías
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error


In [163]:
# 📁 Cargar archivos
df = pd.read_csv("oferta_academica_unificada.csv")
df_real = pd.read_excel("resumen_cupos_2025A.xlsx")


In [164]:
# 🎯 Crear columna Cupos_Usados (Target real para evaluación)
df_real["Cupos_Usados"] = df_real["Total_Cupos"] - df_real["Residuos_Cupos"].fillna(0)


In [165]:
# 🧹 Preprocesamiento
df = df.dropna(subset=['Materia', 'Total_Cupos', 'semestre_numerico'])
df['Cupos_Usados'] = df['Total_Cupos'] - df['Residuos_Cupos'].fillna(0)


In [166]:
# Codificar materias
le = LabelEncoder()
df['materia_codificada'] = le.fit_transform(df['Materia'])


In [167]:
# 📊 Separar entrenamiento (4042–4048) y predicción (4049 = 2025A)
df_train = df[df['semestre_numerico'] < 4049]
df_pred = df[df['semestre_numerico'] == 4049]


In [168]:
# Escalar features y target por separado
features = ['materia_codificada', 'Total_Secciones', 'semestre_numerico']
target = 'Cupos_Usados'

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

# Fit-transform para X y y en entrenamiento
X_train = scaler_X.fit_transform(df_train[features])
y_train = scaler_y.fit_transform(df_train[[target]])

# Transformar X del conjunto de prueba
X_pred = scaler_X.transform(df_pred[features])


In [169]:
# 🚀 Entrenar modelo
X_train = df_train_scaled[features]
y_train = df_train_scaled[target]

model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train, y_train)


RandomForestRegressor(n_estimators=200, random_state=42)

In [170]:
# 🤖 Predicción y desescalado
y_pred_scaled = model.predict(X_pred).reshape(-1, 1)
y_pred_real = scaler_y.inverse_transform(y_pred_scaled)

df_pred['Cupos_Estimados'] = y_pred_real


  "X does not have valid feature names, but"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [171]:
# ✅ Agrupar por materia para exportar
df_guardar = df_pred[['Materia', 'Cupos_Estimados']].groupby('Materia').mean().reset_index()
df_guardar.to_excel("predicciones_cupos_proximo_semestre.xlsx", index=False)
print("✅ Archivo generado: predicciones_cupos_proximo_semestre.xlsx")
print(df_guardar.head())

✅ Archivo generado: predicciones_cupos_proximo_semestre.xlsx
                               Materia  Cupos_Estimados
0     ADMINISTRACION DE BASES DE DATOS       117.746329
1              ADMINISTRACION DE REDES       170.856803
2         ADMINISTRACION DE SERVIDORES       113.229901
3                           ALGORITMIA       116.877001
4  ALMACENES DE DATOS (DATA WAREHOUSE)       116.877001
