In [1]:
from pathlib import Path
import joblib
from typing import List

import pandas as pd
from sklearn.pipeline import Pipeline

from pipeline import (
    DropColumns,
    DropHighNAPercentage,
    NormalizeCurrency,
    OrdinalColumnMapper,
    DataframeOneHotEncoder,
    NanInputer,
    InfoDisplayer,
    MultiDataFramePipeline,
    MultiDataFrameMerger,
)
from utils import (
    show,
    show_null_percentages,
)

pd.set_option("display.max_columns", None)

In [2]:
ROOT_DIR = Path.cwd().parent
DATA_DIR = ROOT_DIR / "data"

DATA_PATH = DATA_DIR / 'indices_institucional_2005_2022.xlsx'
DATA_PATH.exists()

True

In [3]:
df = joblib.load(DATA_DIR / "base_indices.pkl")
df = df.reset_index()
df = df[df["Promedio Puntaje (promedio matemáticas y lenguaje)"].notna()].reset_index(
    drop=True
)
df = (
    df[df["Pregrado/Posgrado"] == "Pregrado"]
    .drop(columns=["Pregrado/Posgrado"])
    .reset_index(drop=True)
    # .rename(columns={"Año": "Fecha"})
)
df["Año"] = df["Año"].dt.year

In [4]:
inmuebles, labs, docentes = joblib.load(DATA_DIR / "extra_data.pkl")


def prepare_for_merge(df_to_merge: pd.DataFrame, year_col: str) -> pd.DataFrame:
    df_to_merge["Año"] = df_to_merge[year_col].astype(int)
    df_to_merge = df_to_merge.drop(columns=[year_col])
    df_to_merge = df_to_merge.rename(columns={"idSede": "Cód. Sede"})
    df_to_merge["Cód. Sede"] = df_to_merge["Cód. Sede"].astype(int)
    df_to_merge["Cód. Institución"] = df_to_merge["Cód. Institución"].astype(int)
    df_to_merge = df_to_merge.drop(columns=["Tipo Institución", "Cód. Institución"])

    return df_to_merge


inmuebles = prepare_for_merge(inmuebles, year_col="Año Información")
labs = prepare_for_merge(labs, year_col="Año Proceso")
docentes = prepare_for_merge(docentes, year_col="Año Proceso")

### Main Data Pipeline

In [5]:
# DROP COLUMNS
COLUMNS_TO_DROP = [
    # drop por no ser relevantes
    "Nombre de la Sede",
    "Orden Geográfico de la Región (Norte aSur)",
    "Mención o Especialidad",
    "idgenerocarrera",
    "Códgo SIES",
    "Máximo Puntaje (promedio matemáticas y lenguaje)",
    "Máximo Puntaje NEM",
    "Máximo Puntaje Ranking",
    "Mínimo Puntaje (promedio matemáticas y lenguaje)",
    "Mínimo Puntaje NEM",
    "Mínimo Puntaje Ranking",
    # drop por decision de mining
    "Grado Académico",
    "Cód. Institución",
    "Nombre Region",
    "Carrera Genérica",
    "Cód. Carrera",
    "Nombre Programa",
    "Nombre del Campus",
    "Título",
]

# DROP HIGH NA COLUMNS
EXCLUDE_COLUMNS_OF_DROPHIGHNA = ["Nombre del Campus"]

# NORMALIZE CURRENCY
CURRENCY_COLUMNS = [
    "Valor de matrícula",
    "Valor de arancel",
    "Valor del Título",
]

# ORDINAL ENCODER
m_class1 = {
    "(a) Universidades CRUCH": 0,
    "(b) Universidades Privadas": 1,
    "(c) Institutos Profesionales": 2,
    "(d) Centros de Formación Técnica": 3,
    "(e) Centros de Formación Técnica Estatales": 3,
    "(f) F.F.A.A.": 4,
}

m_class2 = {
    "(a) Universidades Estatales CRUCH": 0,
    "(b) Universidades Privadas CRUCH": 1,
    "(c) Univ. Privadas Adscritas SUA": 2,
    "(d) Universidades Privadas": 3,
    "(e) Institutos Profesionales": 4,
    "(f) Centros de Formación Técnica": 5,
    "(g) Centros de Formación Técnica statales": 5,
    "(h) F.F.A.A.": 6,
}

m_class3 = {"(a) Acreditada": 0, "(b) No Acreditada": 1}

m_class4 = {
    "(a) Autónoma": 0,
    "(b) Licenciamiento": 1,
    "(c) Examinación": 2,
    "(d) Supervisión": 3,
    "(e) F.F.A.A.": 4,
    "(e) Cerrada": 5,
}

m_class5 = {"(a) Adscritas a Gratuidad": 0, "(b) No Adscritas/No Aplica": 1}

m_class6 = {
    "(a) Subsistema Universitario": 0,
    "(b) Subsistema Técnico Profesional": 1,
    "(c) No adscrito": 2,
    "(d) F.F.A.A.": 3,
}

m_inst = {
    "Univ.": 0,
    "I.P.": 1,
    "C.F.T.": 2,
    "F.F.A.A.": 3,
}

m_tipo_programa = {"Programa Regular": 0, "Programa Especial": 1}

m_tipo_carrera = {
    "Profesional con Licenciatura": 0,
    "Técnico Nivel Superior": 3,
    "Profesional": 1,
    "Licenciatura": 2,
    "Bachillerato": 4,
    "Plan Común o Ciclo Básico": 4,
}

m_tipo_ingreso = {"Ingreso Directo": 0, "No es Ingreso Directo": 1}

COLUMNS_TO_MAP = [f"Clasificación{i}" for i in range(1, 7)]
COLUMNS_TO_MAP.extend(
    [
        "Tipo Institución",
        "Tipo Programa",
        "Tipo Carrera",
        "IngresoDirecto",
    ]
)
MAPPINGS = [
    m_class1,
    m_class2,
    m_class3,
    m_class4,
    m_class5,
    m_class6,
    m_inst,
    m_tipo_programa,
    m_tipo_carrera,
    m_tipo_ingreso,
]

# ONE HOT ENCODER
COLUMNS_TO_ONE_HOT = [
    "Nombre Institución",
    "Comuna donde se imparte la carrera o programa",
    "Area Conocimiento",
    "Horario",
]

In [6]:
data_pipeline = Pipeline(
    steps=[
        # ("info_displayer1", InfoDisplayer(name="Original Data")),
        (
            "drop_columns",
            DropColumns(
                columns=COLUMNS_TO_DROP,
            ),
        ),
        # ("info_displayer2", InfoDisplayer(name="After Drop Columns")),
        (
            "drop_high_na",
            DropHighNAPercentage(
                na_threshold=0.24, exclude=EXCLUDE_COLUMNS_OF_DROPHIGHNA
            ),
        ),
        # ("info_displayer3", InfoDisplayer(name="After Drop High NA")),
        (
            "preprocess_tipo_moneda",
            NormalizeCurrency(columns=CURRENCY_COLUMNS),
        ),
        # ("info_displayer4", InfoDisplayer(name="After Normalize Currency")),
        (
            "ordinal_encoder",
            OrdinalColumnMapper(columns=COLUMNS_TO_MAP, mappings=MAPPINGS),
        ),
        # ("info_displayer5", InfoDisplayer(name="After Ordinal Encoder")),
        (
            "one_hot_encoder",
            DataframeOneHotEncoder(
                columns=COLUMNS_TO_ONE_HOT,
                min_frequency=20,
                # max_categories=30,
            ),
        ),
        # ("info_displayer6", InfoDisplayer(name="After One Hot Encoder")),
        (
            "nan_inputer",
            NanInputer(n_neighbors=5, columns="auto"),
        ),
        # ("info_displayer7", InfoDisplayer(name="After Nan Inputer")),
    ],
    # verbose=True,
)

### Data to merge pipelines

In [7]:
inmuebles_pipe = Pipeline(
    steps=[
        (
            "drop_columns",
            DropColumns(
                columns=["Nombre Institución", "Sede", "idInstitucion"], errors="ignore"
            ),
        ),
        ("drop_high_na", DropHighNAPercentage(na_threshold=0.3, exclude=[])),
        ("inputer", NanInputer(n_neighbors=5)),
    ]
)

labs_pipe = Pipeline(
    steps=[
        (
            "drop_columns",
            DropColumns(
                columns=["Nombre Institución", "Sede", "idInstitucion"], errors="ignore"
            ),
        ),
        ("drop_high_na", DropHighNAPercentage(na_threshold=0.3, exclude=[])),
        ("inputer", NanInputer(n_neighbors=5)),
    ]
)

docentes_pipe = Pipeline(
    steps=[
        (
            "drop_columns",
            DropColumns(
                columns=["Nombre Institución", "Sede", "idInstitucion"], errors="ignore"
            ),
        ),
        ("drop_high_na", DropHighNAPercentage(na_threshold=0.3, exclude=[])),
        ("inputer", NanInputer(n_neighbors=5)),
    ]
)

In [8]:
# for pipe in [data_pipeline, inmuebles_pipe, labs_pipe, docentes_pipe]:
#     display(pipe)

In [9]:
MERGE_COLS = [
    "Año",
    "Cód. Sede",
]

multi_pipe = MultiDataFramePipeline(
    transformers=[
        ("data_processor", data_pipeline),
        ("inmuebles_processor", inmuebles_pipe),
        ("labs_processor", labs_pipe),
        ("docentes_processor", docentes_pipe),
    ]
)

pipe_merger = MultiDataFrameMerger(columns=MERGE_COLS)

data_processor = Pipeline(
    [
        ("multi_pipe", multi_pipe),
        ("pipe_merger", pipe_merger),
    ]
)
    

In [10]:
processed_data = data_processor.fit_transform(
    [
        df,
        inmuebles,
        labs,
        docentes,
    ]
)

Fitting Pipeline...
Fitting Pipeline...
Fitting Pipeline...
Fitting Pipeline...
Returning 4 DataFrames.
Fitting MultiDataFrameMerger...


In [12]:
show(processed_data)

(30607, 187)


Unnamed: 0,Año,Tipo Institución,Clasificación1,Clasificación2,Clasificación3,Clasificación4,Clasificación5,Clasificación6,Tipo Programa,Tipo Carrera,IngresoDirecto,Año Inicio Actividades,Duración (en semestres),Cód. Sede,Promedio Puntaje (promedio matemáticas y lenguaje),Puntaje de corte (primer seleccionado),Puntaje de corte (promedio de la carrera),Puntaje de corte (último seleccionado),Nº Alumnos Ingreso Via PSU o PDT,Valor de matrícula,Valor de arancel,Valor del Título,Vacantes,Matrícula primer año hombres,Matrícula primer año mujeres,Matrícula Primer Año,Matrícula total hombres,Matrícula total mujeres,Matrícula Total,Nombre Institución_C.F.T. DIEGO PORTALES,Nombre Institución_C.F.T. JOHN F. KENNEDY,Nombre Institución_C.F.T. SANTO TOMÁS,Nombre Institución_C.F.T. SIMÓN BOLIVAR,Nombre Institución_ESCUELA DE AVIACIÓN,Nombre Institución_I.P. AIEP,Nombre Institución_I.P. DEL VALLE CENTRAL,Nombre Institución_I.P. DIEGO PORTALES,Nombre Institución_I.P. ESCUELA MODERNA DE MÚSICA,Nombre Institución_I.P. GUILLERMO SUBERCASEAUX,Nombre Institución_I.P. LOS LEONES,Nombre Institución_I.P. SANTO TOMÁS,Nombre Institución_PONTIFICIA U. CATÓLICA DE CHILE,Nombre Institución_PONTIFICIA U. CATÓLICA DE VALPARAÍSO,Nombre Institución_U. ACADEMIA DE HUMANISMO CRISTIANO,Nombre Institución_U. ADOLFO IBÁÑEZ,Nombre Institución_U. ADVENTISTA DE CHILE,Nombre Institución_U. ALBERTO HURTADO,Nombre Institución_U. ANDRÉS BELLO,Nombre Institución_U. ARTURO PRAT,Nombre Institución_U. AUSTRAL DE CHILE,Nombre Institución_U. AUTÓNOMA DE CHILE,Nombre Institución_U. BERNARDO O`HIGGINS,Nombre Institución_U. BOLIVARIANA,Nombre Institución_U. CATÓLICA CARDENAL RAÚL SILVA HENRÍQUEZ,Nombre Institución_U. CATÓLICA DE LA SANTÍSIMA CONCEPCIÓN,Nombre Institución_U. CATÓLICA DE TEMUCO,Nombre Institución_U. CATÓLICA DEL MAULE,Nombre Institución_U. CATÓLICA DEL NORTE,Nombre Institución_U. CENTRAL DE CHILE,Nombre Institución_U. CHILENO-BRITÁNICA DE CULTURA,Nombre Institución_U. DE ANTOFAGASTA,Nombre Institución_U. DE ARTE Y CIENCIAS SOCIALES ARCIS,Nombre Institución_U. DE ATACAMA,Nombre Institución_U. DE AYSEN,Nombre Institución_U. DE CHILE,Nombre Institución_U. DE CONCEPCIÓN,Nombre Institución_U. DE LA FRONTERA,Nombre Institución_U. DE LA SERENA,Nombre Institución_U. DE LAS AMÉRICAS,Nombre Institución_U. DE LOS ANDES,Nombre Institución_U. DE LOS LAGOS,Nombre Institución_U. DE MAGALLANES,Nombre Institución_U. DE O`HIGGINS,Nombre Institución_U. DE PLAYA ANCHA DE CIENCIAS DE LA EDUCACIÓN,Nombre Institución_U. DE SANTIAGO DE CHILE,Nombre Institución_U. DE TALCA,Nombre Institución_U. DE TARAPACÁ,Nombre Institución_U. DE VALPARAÍSO,Nombre Institución_U. DE VIÑA DEL MAR,Nombre Institución_U. DEL ALBA,Nombre Institución_U. DEL BÍO-BÍO,Nombre Institución_U. DEL DESARROLLO,Nombre Institución_U. DEL MAR,Nombre Institución_U. DEL PACÍFICO,Nombre Institución_U. DIEGO PORTALES,Nombre Institución_U. FINIS TERRAE,Nombre Institución_U. GABRIELA MISTRAL,Nombre Institución_U. IBEROAMERICANA DE CIENCIAS Y TECNOLOGÍA,Nombre Institución_U. LA REPÚBLICA,Nombre Institución_U. MARÍTIMA DE CHILE,Nombre Institución_U. MAYOR,Nombre Institución_U. METROPOLITANA DE CIENCIAS DE LA EDUCACIÓN,Nombre Institución_U. SAN SEBASTIÁN,Nombre Institución_U. SANTO TOMÁS,Nombre Institución_U. SEK,Nombre Institución_U. TECNOLÓGICA METROPOLITANA,Nombre Institución_U. TÉCNICA FEDERICO SANTA MARÍA,Nombre Institución_U. UCINF,Nombre Institución_infrequent_sklearn,Comuna donde se imparte la carrera o programa_Antofagasta,Comuna donde se imparte la carrera o programa_Arica,Comuna donde se imparte la carrera o programa_Calama,Comuna donde se imparte la carrera o programa_Chillán,Comuna donde se imparte la carrera o programa_Concepción,Comuna donde se imparte la carrera o programa_Copiapó,Comuna donde se imparte la carrera o programa_Coquimbo,Comuna donde se imparte la carrera o programa_Coyhaique,Comuna donde se imparte la carrera o programa_Curicó,Comuna donde se imparte la carrera o programa_El Bosque,Comuna donde se imparte la carrera o programa_Estación Central,Comuna donde se imparte la carrera o programa_Huechuraba,Comuna donde se imparte la carrera o programa_Iquique,Comuna donde se imparte la carrera o programa_La Florida,Comuna donde se imparte la carrera o programa_La Pintana,Comuna donde se imparte la carrera o programa_La Serena,Comuna donde se imparte la carrera o programa_Las Condes,Comuna donde se imparte la carrera o programa_Linares,Comuna donde se imparte la carrera o programa_Los Ángeles,Comuna donde se imparte la carrera o programa_Macul,Comuna donde se imparte la carrera o programa_Maipú,Comuna donde se imparte la carrera o programa_Melipilla,Comuna donde se imparte la carrera o programa_Osorno,Comuna donde se imparte la carrera o programa_Ovalle,Comuna donde se imparte la carrera o programa_Peñalolén,Comuna donde se imparte la carrera o programa_Providencia,Comuna donde se imparte la carrera o programa_Puerto Montt,Comuna donde se imparte la carrera o programa_Punta Arenas,Comuna donde se imparte la carrera o programa_Quillota,Comuna donde se imparte la carrera o programa_Quilpué,Comuna donde se imparte la carrera o programa_Rancagua,Comuna donde se imparte la carrera o programa_Recoleta,Comuna donde se imparte la carrera o programa_San Felipe,Comuna donde se imparte la carrera o programa_San Fernando,Comuna donde se imparte la carrera o programa_San Joaquín,Comuna donde se imparte la carrera o programa_San Miguel,Comuna donde se imparte la carrera o programa_Santiago,Comuna donde se imparte la carrera o programa_Talca,Comuna donde se imparte la carrera o programa_Talcahuano,Comuna donde se imparte la carrera o programa_Temuco,Comuna donde se imparte la carrera o programa_Valdivia,Comuna donde se imparte la carrera o programa_Valparaíso,Comuna donde se imparte la carrera o programa_Victoria,Comuna donde se imparte la carrera o programa_Villarrica,Comuna donde se imparte la carrera o programa_Vitacura,Comuna donde se imparte la carrera o programa_Viña del Mar,Comuna donde se imparte la carrera o programa_Ñuñoa,Comuna donde se imparte la carrera o programa_infrequent_sklearn,Area Conocimiento_Administración y Comercio,"Area Conocimiento_Agricultura, Silvicultura, Pesca y Veterinaria",Area Conocimiento_Arte y Arquitectura,Area Conocimiento_Ciencias,Area Conocimiento_Ciencias Sociales,Area Conocimiento_Derecho,Area Conocimiento_Educación,"Area Conocimiento_FFAA, Orden y Seguridad",Area Conocimiento_Humanidades,Area Conocimiento_Salud,Area Conocimiento_Tecnología,Horario_Diurno,Horario_Otro,Horario_Vespertino,N° Inmuebles,M2 Terreno,M2 Construido_x,M2 Salas,Nº Oficinas,Nº Salas,Nº Laboratorios,M2 Construido_y,Nº de PC para alumnos,N° Computadores con Internet,N°DocentesJornadaMedia,N°DocentesJornadaHora,N°DocentesJornadaCompleta,N°HorasJornadaCompleta,N°HorasJornadaMedia,N°HorasJornadaHora,N°HorasProfJornadaHora,N°HorasProfJornadaMedia,N°HorasProfJornadaCompleta,N°ProfesionalJornadaHora,N°ProfesionalJornadaMedia,N°ProfesionalJornadaCompleta,N°Docentes,N°Horas,N°DocentesHombres,N°DocentesMujeres
2058,2022.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1954.0,11.0,1001001.0,599.0,809.0,660.0,620.0,125.0,157900.0,4329400.0,75000.0,125.0,59.0,77.0,136.0,291.0,401.0,692.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,150.0,109043910.0,705759.0,50986.0,3500.0,708.0,1091.0,64706.0,5325.0,5231.0,680.0,1584.0,1727.0,75316.0,15040.0,14201.0,4117.0,4299.0,8129.0,464.0,194.0,190.0,3991.0,104557.0,2440.0,1551.0
2059,2022.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2013.0,10.0,1001001.0,630.0,750.0,680.0,654.0,85.0,157900.0,5465600.0,75000.0,67.0,44.0,49.0,93.0,178.0,204.0,382.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,150.0,109043910.0,705759.0,50986.0,3500.0,708.0,1091.0,64706.0,5325.0,5231.0,680.0,1584.0,1727.0,75316.0,15040.0,14201.0,4117.0,4299.0,8129.0,464.0,194.0,190.0,3991.0,104557.0,2440.0,1551.0
2060,2022.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1924.0,10.0,1001001.0,682.0,811.0,718.0,698.0,378.0,157900.0,6094500.0,75000.0,370.0,281.0,228.0,509.0,1480.0,1054.0,2534.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,150.0,109043910.0,705759.0,50986.0,3500.0,708.0,1091.0,64706.0,5325.0,5231.0,680.0,1584.0,1727.0,75316.0,15040.0,14201.0,4117.0,4299.0,8129.0,464.0,194.0,190.0,3991.0,104557.0,2440.0,1551.0
2061,2022.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2013.0,10.0,1001001.0,637.0,768.0,677.0,651.0,164.0,157900.0,5465600.0,75000.0,137.0,92.0,81.0,173.0,386.0,355.0,741.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,150.0,109043910.0,705759.0,50986.0,3500.0,708.0,1091.0,64706.0,5325.0,5231.0,680.0,1584.0,1727.0,75316.0,15040.0,14201.0,4117.0,4299.0,8129.0,464.0,194.0,190.0,3991.0,104557.0,2440.0,1551.0
2062,2022.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1915.0,10.0,1001001.0,597.0,777.0,657.0,600.0,73.0,157900.0,5059300.0,75000.0,150.0,44.0,34.0,78.0,378.0,273.0,651.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,150.0,109043910.0,705759.0,50986.0,3500.0,708.0,1091.0,64706.0,5325.0,5231.0,680.0,1584.0,1727.0,75316.0,15040.0,14201.0,4117.0,4299.0,8129.0,464.0,194.0,190.0,3991.0,104557.0,2440.0,1551.0


In [14]:
processed_data.describe()

Unnamed: 0,Año,Tipo Institución,Clasificación1,Clasificación2,Clasificación3,Clasificación4,Clasificación5,Clasificación6,Tipo Programa,Tipo Carrera,IngresoDirecto,Año Inicio Actividades,Duración (en semestres),Cód. Sede,Promedio Puntaje (promedio matemáticas y lenguaje),Puntaje de corte (primer seleccionado),Puntaje de corte (promedio de la carrera),Puntaje de corte (último seleccionado),Nº Alumnos Ingreso Via PSU o PDT,Valor de matrícula,Valor de arancel,Valor del Título,Vacantes,Matrícula primer año hombres,Matrícula primer año mujeres,Matrícula Primer Año,Matrícula total hombres,Matrícula total mujeres,Matrícula Total,Nombre Institución_C.F.T. DIEGO PORTALES,Nombre Institución_C.F.T. JOHN F. KENNEDY,Nombre Institución_C.F.T. SANTO TOMÁS,Nombre Institución_C.F.T. SIMÓN BOLIVAR,Nombre Institución_ESCUELA DE AVIACIÓN,Nombre Institución_I.P. AIEP,Nombre Institución_I.P. DEL VALLE CENTRAL,Nombre Institución_I.P. DIEGO PORTALES,Nombre Institución_I.P. ESCUELA MODERNA DE MÚSICA,Nombre Institución_I.P. GUILLERMO SUBERCASEAUX,Nombre Institución_I.P. LOS LEONES,Nombre Institución_I.P. SANTO TOMÁS,Nombre Institución_PONTIFICIA U. CATÓLICA DE CHILE,Nombre Institución_PONTIFICIA U. CATÓLICA DE VALPARAÍSO,Nombre Institución_U. ACADEMIA DE HUMANISMO CRISTIANO,Nombre Institución_U. ADOLFO IBÁÑEZ,Nombre Institución_U. ADVENTISTA DE CHILE,Nombre Institución_U. ALBERTO HURTADO,Nombre Institución_U. ANDRÉS BELLO,Nombre Institución_U. ARTURO PRAT,Nombre Institución_U. AUSTRAL DE CHILE,Nombre Institución_U. AUTÓNOMA DE CHILE,Nombre Institución_U. BERNARDO O`HIGGINS,Nombre Institución_U. BOLIVARIANA,Nombre Institución_U. CATÓLICA CARDENAL RAÚL SILVA HENRÍQUEZ,Nombre Institución_U. CATÓLICA DE LA SANTÍSIMA CONCEPCIÓN,Nombre Institución_U. CATÓLICA DE TEMUCO,Nombre Institución_U. CATÓLICA DEL MAULE,Nombre Institución_U. CATÓLICA DEL NORTE,Nombre Institución_U. CENTRAL DE CHILE,Nombre Institución_U. CHILENO-BRITÁNICA DE CULTURA,Nombre Institución_U. DE ANTOFAGASTA,Nombre Institución_U. DE ARTE Y CIENCIAS SOCIALES ARCIS,Nombre Institución_U. DE ATACAMA,Nombre Institución_U. DE AYSEN,Nombre Institución_U. DE CHILE,Nombre Institución_U. DE CONCEPCIÓN,Nombre Institución_U. DE LA FRONTERA,Nombre Institución_U. DE LA SERENA,Nombre Institución_U. DE LAS AMÉRICAS,Nombre Institución_U. DE LOS ANDES,Nombre Institución_U. DE LOS LAGOS,Nombre Institución_U. DE MAGALLANES,Nombre Institución_U. DE O`HIGGINS,Nombre Institución_U. DE PLAYA ANCHA DE CIENCIAS DE LA EDUCACIÓN,Nombre Institución_U. DE SANTIAGO DE CHILE,Nombre Institución_U. DE TALCA,Nombre Institución_U. DE TARAPACÁ,Nombre Institución_U. DE VALPARAÍSO,Nombre Institución_U. DE VIÑA DEL MAR,Nombre Institución_U. DEL ALBA,Nombre Institución_U. DEL BÍO-BÍO,Nombre Institución_U. DEL DESARROLLO,Nombre Institución_U. DEL MAR,Nombre Institución_U. DEL PACÍFICO,Nombre Institución_U. DIEGO PORTALES,Nombre Institución_U. FINIS TERRAE,Nombre Institución_U. GABRIELA MISTRAL,Nombre Institución_U. IBEROAMERICANA DE CIENCIAS Y TECNOLOGÍA,Nombre Institución_U. LA REPÚBLICA,Nombre Institución_U. MARÍTIMA DE CHILE,Nombre Institución_U. MAYOR,Nombre Institución_U. METROPOLITANA DE CIENCIAS DE LA EDUCACIÓN,Nombre Institución_U. SAN SEBASTIÁN,Nombre Institución_U. SANTO TOMÁS,Nombre Institución_U. SEK,Nombre Institución_U. TECNOLÓGICA METROPOLITANA,Nombre Institución_U. TÉCNICA FEDERICO SANTA MARÍA,Nombre Institución_U. UCINF,Nombre Institución_infrequent_sklearn,Comuna donde se imparte la carrera o programa_Antofagasta,Comuna donde se imparte la carrera o programa_Arica,Comuna donde se imparte la carrera o programa_Calama,Comuna donde se imparte la carrera o programa_Chillán,Comuna donde se imparte la carrera o programa_Concepción,Comuna donde se imparte la carrera o programa_Copiapó,Comuna donde se imparte la carrera o programa_Coquimbo,Comuna donde se imparte la carrera o programa_Coyhaique,Comuna donde se imparte la carrera o programa_Curicó,Comuna donde se imparte la carrera o programa_El Bosque,Comuna donde se imparte la carrera o programa_Estación Central,Comuna donde se imparte la carrera o programa_Huechuraba,Comuna donde se imparte la carrera o programa_Iquique,Comuna donde se imparte la carrera o programa_La Florida,Comuna donde se imparte la carrera o programa_La Pintana,Comuna donde se imparte la carrera o programa_La Serena,Comuna donde se imparte la carrera o programa_Las Condes,Comuna donde se imparte la carrera o programa_Linares,Comuna donde se imparte la carrera o programa_Los Ángeles,Comuna donde se imparte la carrera o programa_Macul,Comuna donde se imparte la carrera o programa_Maipú,Comuna donde se imparte la carrera o programa_Melipilla,Comuna donde se imparte la carrera o programa_Osorno,Comuna donde se imparte la carrera o programa_Ovalle,Comuna donde se imparte la carrera o programa_Peñalolén,Comuna donde se imparte la carrera o programa_Providencia,Comuna donde se imparte la carrera o programa_Puerto Montt,Comuna donde se imparte la carrera o programa_Punta Arenas,Comuna donde se imparte la carrera o programa_Quillota,Comuna donde se imparte la carrera o programa_Quilpué,Comuna donde se imparte la carrera o programa_Rancagua,Comuna donde se imparte la carrera o programa_Recoleta,Comuna donde se imparte la carrera o programa_San Felipe,Comuna donde se imparte la carrera o programa_San Fernando,Comuna donde se imparte la carrera o programa_San Joaquín,Comuna donde se imparte la carrera o programa_San Miguel,Comuna donde se imparte la carrera o programa_Santiago,Comuna donde se imparte la carrera o programa_Talca,Comuna donde se imparte la carrera o programa_Talcahuano,Comuna donde se imparte la carrera o programa_Temuco,Comuna donde se imparte la carrera o programa_Valdivia,Comuna donde se imparte la carrera o programa_Valparaíso,Comuna donde se imparte la carrera o programa_Victoria,Comuna donde se imparte la carrera o programa_Villarrica,Comuna donde se imparte la carrera o programa_Vitacura,Comuna donde se imparte la carrera o programa_Viña del Mar,Comuna donde se imparte la carrera o programa_Ñuñoa,Comuna donde se imparte la carrera o programa_infrequent_sklearn,Area Conocimiento_Administración y Comercio,"Area Conocimiento_Agricultura, Silvicultura, Pesca y Veterinaria",Area Conocimiento_Arte y Arquitectura,Area Conocimiento_Ciencias,Area Conocimiento_Ciencias Sociales,Area Conocimiento_Derecho,Area Conocimiento_Educación,"Area Conocimiento_FFAA, Orden y Seguridad",Area Conocimiento_Humanidades,Area Conocimiento_Salud,Area Conocimiento_Tecnología,Horario_Diurno,Horario_Otro,Horario_Vespertino,N° Inmuebles,M2 Terreno,M2 Construido_x,M2 Salas,Nº Oficinas,Nº Salas,Nº Laboratorios,M2 Construido_y,Nº de PC para alumnos,N° Computadores con Internet,N°DocentesJornadaMedia,N°DocentesJornadaHora,N°DocentesJornadaCompleta,N°HorasJornadaCompleta,N°HorasJornadaMedia,N°HorasJornadaHora,N°HorasProfJornadaHora,N°HorasProfJornadaMedia,N°HorasProfJornadaCompleta,N°ProfesionalJornadaHora,N°ProfesionalJornadaMedia,N°ProfesionalJornadaCompleta,N°Docentes,N°Horas,N°DocentesHombres,N°DocentesMujeres
count,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0,30607.0
mean,2013.956285,0.050871,0.57696,1.603032,0.104747,0.015813,0.427876,0.313458,0.002418,0.406966,0.007678,1997.80707,9.400366,1077179.0,550.832097,675.713464,570.055739,511.306335,46.792792,181180.9,2785296.0,146200.0,56.058581,26.427125,30.262816,56.594341,107.189238,126.225177,233.406966,0.000555,0.000849,0.004607,0.000947,0.001046,0.006502,0.005816,0.002941,0.000359,0.002156,0.001046,0.003431,0.023753,0.014931,0.012579,0.007449,0.004411,0.014539,0.063515,0.009834,0.027902,0.040514,0.010618,0.003333,0.013428,0.014408,0.023459,0.012252,0.023589,0.023981,0.001503,0.019081,0.0,0.004541,0.00134,0.030875,0.008789,0.01944,0.019505,0.04842,0.014964,0.008429,0.011925,0.003169,0.025778,0.033848,0.01699,0.018427,0.025615,0.022348,0.008397,0.017806,0.020878,0.009606,0.009214,0.015683,0.011599,0.002418,0.004672,0.002908,0.000653,0.029111,0.011762,0.038749,0.052896,0.001928,0.010226,0.025452,0.008135,0.008168,0.043454,0.017643,0.000425,0.016173,0.073088,0.006632,0.008723,0.001666,0.008168,0.000653,0.000359,0.007155,0.018264,0.006698,0.002483,0.031855,0.046035,0.00147,0.00624,0.01614,0.005587,0.003659,0.008691,0.002189,0.004345,0.067566,0.021858,0.012285,0.001013,0.0,0.005326,0.005652,0.006502,0.001405,0.004051,0.01088,0.212598,0.035188,0.006306,0.067664,0.029242,0.067795,0.001568,0.000719,0.002483,0.069233,0.030843,0.002026,0.104355,0.036266,0.064234,0.043814,0.106185,0.043715,0.196295,0.001993,0.030255,0.182965,0.189924,0.927468,0.002156,0.070376,40.128729,2166638.0,84458.992681,9602.821283,500.74251,162.370373,198.641389,11413.93593,1046.80413,1103.146666,118.021858,568.108276,311.480968,13679.124285,2751.720064,4333.77332,2288.174797,1188.947953,3131.685595,303.966413,50.468618,71.208155,997.540465,20762.770673,593.338615,404.236384
std,5.266596,0.281761,0.61395,1.296359,0.306233,0.218884,0.494779,0.723001,0.049112,1.002917,0.087288,18.633066,1.902734,281531.0,62.377615,67.608082,59.555815,76.181935,49.446895,125365.6,1225213.0,229784.1,48.334561,34.834063,30.080145,55.007028,157.026388,136.320693,255.279986,0.023561,0.029134,0.067718,0.030767,0.032318,0.080372,0.07604,0.054148,0.018955,0.046387,0.032318,0.058472,0.15228,0.12128,0.11145,0.085989,0.066268,0.119701,0.243891,0.098681,0.164695,0.197164,0.102499,0.057633,0.115102,0.119169,0.151358,0.110011,0.151768,0.152994,0.038739,0.136811,0.0,0.067238,0.036576,0.172983,0.093337,0.138068,0.138295,0.214656,0.12141,0.091426,0.108552,0.056207,0.158476,0.180842,0.129234,0.134492,0.157987,0.147814,0.09125,0.132249,0.142977,0.097538,0.095546,0.124247,0.107072,0.049112,0.068194,0.053847,0.025555,0.16812,0.107815,0.193,0.22383,0.043864,0.100609,0.157495,0.08983,0.090009,0.20388,0.131652,0.020605,0.126142,0.260285,0.081171,0.092993,0.040787,0.090009,0.025555,0.018955,0.084287,0.133906,0.081567,0.04977,0.175618,0.209565,0.038316,0.078751,0.126016,0.074538,0.060382,0.09282,0.046737,0.065777,0.251004,0.146221,0.110156,0.03181,0.0,0.072783,0.07497,0.080372,0.037456,0.063522,0.103739,0.409153,0.184258,0.079159,0.251173,0.168486,0.251398,0.039571,0.026801,0.04977,0.253853,0.172894,0.044963,0.305726,0.186955,0.245173,0.204683,0.308079,0.204465,0.397201,0.044599,0.17129,0.386644,0.392247,0.259372,0.046387,0.255784,58.204333,14189870.0,115146.733939,10166.03214,701.911663,155.3423,278.61681,15086.637011,1538.930622,2366.050498,133.510573,522.348726,361.921666,16242.60499,3104.82474,4175.315257,2345.701851,1430.205629,3719.354359,301.658473,60.480271,73.088226,901.30715,21732.76941,565.262018,351.85915
min,2005.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1833.0,2.0,1001001.0,250.0,266.0,266.0,201.0,1.0,0.0,300000.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,20.0,4.0,1.0,3.0,1.0,20.0,1.0,1.0,1.0,1.0,1.0,33.0,20.0,3.0,2.0,20.0,34.0,1.0,1.0,1.0,4.0,22.0,1.0,1.0
25%,2009.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1993.0,9.0,1012001.0,515.0,634.0,540.0,452.0,16.0,115000.0,1910000.0,5500.0,30.0,8.0,10.0,25.0,27.0,35.0,84.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,5.0,27000.0,20899.0,3102.0,100.0,58.0,38.0,2341.0,248.0,248.0,32.0,158.0,81.0,3477.0,726.0,1254.0,599.0,308.0,972.0,82.0,13.0,24.0,374.0,6833.5,222.0,144.0
50%,2014.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2003.0,10.0,1029001.0,546.0,693.0,553.0,500.0,36.0,143000.0,2630000.0,102500.0,45.0,17.0,22.0,44.0,63.0,83.0,166.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,16.0,102041.0,52481.0,6221.0,300.0,103.0,103.0,6423.0,554.0,547.0,73.0,367.0,223.0,9678.0,1672.0,2755.0,1499.0,694.0,2294.0,209.0,30.0,53.0,668.0,14147.0,387.0,272.0
75%,2019.0,0.0,1.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2007.0,10.0,1042001.0,585.0,713.0,601.0,554.0,61.0,190000.0,3472000.0,176200.0,65.0,33.0,40.0,71.0,135.0,170.0,301.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,43.0,353772.0,100566.0,12546.0,589.0,227.0,262.0,14676.0,1087.0,1087.0,146.0,843.0,362.0,15633.0,3344.0,6111.0,3115.0,1383.0,3865.0,426.0,60.0,87.0,1401.0,26805.0,819.0,576.0
max,2022.0,3.0,4.0,6.0,1.0,4.0,1.0,3.0,1.0,4.0,1.0,2022.0,15.0,4061001.0,817.0,880.0,814.0,804.0,796.0,1735255.0,11507750.0,4261000.0,750.0,628.0,665.0,948.0,4221.0,1827.0,6048.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,305.0,109097100.0,705759.0,333060.0,4459.0,775.0,1703.0,110438.0,11401.0,52949.0,708.0,2435.0,1854.0,137500.0,15663.0,23156.0,13473.0,10428.0,64988.0,1504.0,474.0,458.0,3991.0,145377.0,2532.0,1551.0


In [11]:
multi_pipe.transformers

[Pipeline(steps=[('info_displayer1', InfoDisplayer(name='Original Data')),
                 ('drop_columns',
                  DropColumns(columns=['Nombre de la Sede', 'Orden Geográfico de la Región (Norte aSur)', 'Mención o Especialidad', 'idgenerocarrera', 'Códgo SIES', 'Máximo Puntaje (promedio matemáticas y lenguaje)', 'Máximo Puntaje NEM', 'Máximo Puntaje Ranking', 'Mínimo Puntaje (promedio matemáticas y lenguaje)'...
                  InfoDisplayer(name='After Ordinal Encoder')),
                 ('one_hot_encoder',
                  DataframeOneHotEncoder(columns=['Nombre Institución',
                                                  'Comuna donde se imparte la '
                                                  'carrera o programa',
                                                  'Area Conocimiento',
                                                  'Horario'],
                                         min_frequency=20)),
                 ('info_displayer6',
                