In [1]:
#!unzip Test.zip

In [1]:
import os
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.transform import resize 
import joblib


In [2]:
suffix = '.pgm'
kaggle_files = os.listdir('Test')
pgm_kaggle_files = [filename for filename in kaggle_files if filename.endswith(suffix)]

In [3]:
len(pgm_kaggle_files)

8000

In [4]:
IMG_SIZE = (64, 64)  # Tamaño al que se redimensionarán las imágenes

kaggle_patches = []
for filename in tqdm(pgm_kaggle_files):
  path = 'Test/' + filename
  with open(path, 'rb') as pgmf:
      image = plt.imread(pgmf)
      img_resized = resize(image, IMG_SIZE, anti_aliasing=True)
      kaggle_patches.append(img_resized.flatten())

  0%|          | 0/8000 [00:00<?, ?it/s]

100%|██████████| 8000/8000 [00:02<00:00, 3277.62it/s]


In [5]:
kaggle_patches[1].shape

(4096,)

In [6]:
pgm_kaggle_files_id = []

for filename in tqdm(pgm_kaggle_files):
  pgm_kaggle_files_id.append(filename[5:-4])

100%|██████████| 8000/8000 [00:00<00:00, 1786901.27it/s]


In [7]:
# Aquí debe utilizar su modelo
model = joblib.load('models/XGBoost_optuna.joblib')

In [9]:
from skimage import feature

# Aquí debe procesar las imagenes de pgm_kaggle_files
X_kag = []

suffix = '.pgm'
dir = os.listdir('Test/')
pgm_files = [filename for filename in dir if filename.endswith(suffix)]

for filename in tqdm(pgm_files):
    path = 'Test/' + filename
    with open (path, 'rb') as pgmf:
        image = plt.imread(pgmf)
        img_resized = resize(image, IMG_SIZE, anti_aliasing=True)
        X_kag.append(img_resized)
     
X = np.array(X_kag)

100%|██████████| 8000/8000 [00:02<00:00, 3777.16it/s]


In [11]:
X.shape

(8000, 64, 64)

In [12]:
X = np.array([feature.hog(im) for im in tqdm((X), desc='Construyendo X')])

Construyendo X: 100%|██████████| 8000/8000 [00:03<00:00, 2336.96it/s]


In [13]:
X

array([[0.0115602 , 0.01434561, 0.06819321, ..., 0.00287777, 0.00091003,
        0.00593613],
       [0.05284609, 0.        , 0.0960887 , ..., 0.12875551, 0.        ,
        0.        ],
       [0.23157454, 0.04765686, 0.01664109, ..., 0.09664346, 0.12037459,
        0.02464515],
       ...,
       [0.07593272, 0.        , 0.03304156, ..., 0.00304399, 0.00481296,
        0.01361312],
       [0.12542525, 0.00989001, 0.06152377, ..., 0.00735016, 0.0288684 ,
        0.03341208],
       [0.13633066, 0.102894  , 0.05164999, ..., 0.12081661, 0.05118057,
        0.1205824 ]])

In [14]:
scaler = joblib.load('models/scaler_optuna.joblib')
x_scaler = scaler.transform(X)

In [15]:
X

array([[0.0115602 , 0.01434561, 0.06819321, ..., 0.00287777, 0.00091003,
        0.00593613],
       [0.05284609, 0.        , 0.0960887 , ..., 0.12875551, 0.        ,
        0.        ],
       [0.23157454, 0.04765686, 0.01664109, ..., 0.09664346, 0.12037459,
        0.02464515],
       ...,
       [0.07593272, 0.        , 0.03304156, ..., 0.00304399, 0.00481296,
        0.01361312],
       [0.12542525, 0.00989001, 0.06152377, ..., 0.00735016, 0.0288684 ,
        0.03341208],
       [0.13633066, 0.102894  , 0.05164999, ..., 0.12081661, 0.05118057,
        0.1205824 ]])

# PCA

In [16]:
pca_model = joblib.load('models/pca_optuna.joblib')

In [17]:
X_pca_kag = pca_model.transform(x_scaler)

# Predicción
y_kag = model.predict(X_pca_kag)

In [18]:
# Predicción + ID
y_kag_dic = {pgm_kaggle_files_id[i]: y_kag[i] for i in range(len(pgm_kaggle_files_id))}
kaggle_hat = pd.DataFrame(list(y_kag_dic.items()), columns=['id', 'target_feature'])
kaggle_hat['id'] = kaggle_hat['id'].astype(int)
kaggle_hat['target_feature'] = kaggle_hat['target_feature'].astype(int)
kaggle_hat.sort_values(by='id', inplace=True)

In [19]:
kaggle_hat.head()

Unnamed: 0,id,target_feature
2069,0,0
2527,1,0
3030,2,0
2576,3,0
4060,4,0


In [20]:
from datetime import datetime

fecha_hora = datetime.now().strftime("%Y-%m-%d %H:%M")

print(fecha_hora)

2025-06-22 18:47


In [21]:
# Guardamos en un .csv para subir a kaggle
submission_name = f'CSV_samples/XGBoost_{fecha_hora}'
kaggle_hat.to_csv(submission_name + '.csv', index=False)

# HOG

In [13]:
from skimage.feature import hog

In [19]:
IMG_SIZE = (64, 64)  # Tamaño al que se redimensionarán las imágenes

kaggle_patches = []
for filename in tqdm(pgm_kaggle_files):
  path = 'Test/' + filename
  with open(path, 'rb') as pgmf:
      image = plt.imread(pgmf)
      img_resized = resize(image, IMG_SIZE, anti_aliasing=True)
      img_norm = (img_resized*255).astype(np.uint8)
      hog_feature = hog(img_norm, feature_vector=True)
      kaggle_patches.append(hog_feature)

100%|██████████| 8000/8000 [00:06<00:00, 1256.38it/s]


In [20]:
pgm_kaggle_files_id = []

for filename in tqdm(pgm_kaggle_files):
  pgm_kaggle_files_id.append(filename[5:-4])

100%|██████████| 8000/8000 [00:00<00:00, 3805651.81it/s]


In [21]:
model = joblib.load('models/hog_model.joblib')

In [22]:
# Predicción
y_kag = model.predict(kaggle_patches)

In [23]:
# Predicción + ID
y_kag_dic = {pgm_kaggle_files_id[i]: y_kag[i] for i in range(len(pgm_kaggle_files_id))}
kaggle_hat = pd.DataFrame(list(y_kag_dic.items()), columns=['id', 'target_feature'])
kaggle_hat['id'] = kaggle_hat['id'].astype(int)
kaggle_hat['target_feature'] = kaggle_hat['target_feature'].astype(int)
kaggle_hat.sort_values(by='id', inplace=True)

In [24]:
kaggle_hat.head()

Unnamed: 0,id,target_feature
2069,0,0
2527,1,0
3030,2,0
2576,3,0
4060,4,0


In [27]:
kaggle_hat['target_feature'].value_counts()

target_feature
0    7561
1     439
Name: count, dtype: int64

In [28]:
from datetime import datetime

fecha_hora = datetime.now().strftime("%Y-%m-%d %H:%M")

print(fecha_hora)

2025-06-16 19:38


In [29]:
# Guardamos en un .csv para subir a kaggle
submission_name = f'CSV_samples/hog_{fecha_hora}'
kaggle_hat.to_csv(submission_name + '.csv', index=False)