⚠️ **Aviso:** Este cuaderno NO requiere login ni API. Solo lee archivos locales.

# 03 - modelo con preprocesado de tal forma y SVM

In [None]:

import os, pandas as pd

def load_table(base_name):
    csv_path = os.path.join('.', f'{base_name}.csv')
    xlsx_path = os.path.join('.', f'{base_name}.xlsx')
    if os.path.exists(csv_path):
        return pd.read_csv(csv_path), csv_path
    elif os.path.exists(xlsx_path):
        return pd.read_excel(xlsx_path), xlsx_path
    else:
        raise FileNotFoundError(f'No se encontró {base_name}.csv ni {base_name}.xlsx en la carpeta del notebook.')

def detect_id(df):
    for c in df.columns:
        name = c.lower()
        if name == 'id' or 'documento' in name or 'cedul' in name or 'identific' in name or 'codigo' in name or 'código' in name:
            return c
    return 'ID' if 'ID' in df.columns else df.columns[0]

import numpy as np, pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.svm import LinearSVC

train, _ = load_table('train')
test, _  = load_table('test')

ID_COL = detect_id(train)
TARGET='RENDIMIENTO_GLOBAL'
ALLOWED = ['bajo','medio-bajo','medio-alto','alto']

y = (train[TARGET].astype(str).str.strip().str.lower()
     .replace({'medio bajo':'medio-bajo','medio alto':'medio-alto'}))
X = train.drop(columns=[TARGET])

cat_cols = [c for c in X.columns if X[c].dtype=='object']
num_cols = [c for c in X.columns if c not in cat_cols]

preprocess = ColumnTransformer([
    ('cat', Pipeline([('imp', SimpleImputer(strategy='most_frequent')),
                      ('ohe', OneHotEncoder(handle_unknown='ignore'))]), cat_cols),
    ('num', Pipeline([('imp', SimpleImputer(strategy='median')),
                      ('sc', StandardScaler(with_mean=False))]), num_cols)
])

clf = LinearSVC(dual='auto', random_state=42, max_iter=5000)
pipe = Pipeline([('prep', preprocess), ('clf', clf)])
pipe.fit(X, y)

X_test = test.drop(columns=[ID_COL]) if ID_COL in test.columns else test.copy()
pred = pipe.predict(X_test)

submission = pd.DataFrame({'ID': test[ID_COL], 'RENDIMIENTO_GLOBAL': pred})
submission = submission[['ID','RENDIMIENTO_GLOBAL']]
submission.to_csv('submission_03_svm.csv', index=False, encoding='utf-8')
print('Guardado:', os.path.abspath('submission_03_svm.csv'))
display(submission.head(10))
