In [1]:
#!unzip Test.zip

In [2]:
import os
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.transform import resize 


In [3]:
suffix = '.pgm'
kaggle_files = os.listdir('Test')
pgm_kaggle_files = [filename for filename in kaggle_files if filename.endswith(suffix)]

In [4]:
len(pgm_kaggle_files)

8000

In [5]:
IMG_SIZE = (64, 64)  # Tamaño al que se redimensionarán las imágenes

kaggle_patches = []
for filename in tqdm(pgm_kaggle_files):
  path = 'Test/' + filename
  with open(path, 'rb') as pgmf:
      image = plt.imread(pgmf)
      img_resized = resize(image, IMG_SIZE, anti_aliasing=True)
      kaggle_patches.append(img_resized.flatten())

100%|██████████| 8000/8000 [00:04<00:00, 1967.39it/s]


In [6]:
kaggle_patches[1].shape

(4096,)

In [7]:
pgm_kaggle_files_id = []

for filename in tqdm(pgm_kaggle_files):
  pgm_kaggle_files_id.append(filename[5:-4])

100%|██████████| 8000/8000 [00:00<00:00, 6633932.78it/s]


In [8]:
import joblib

In [9]:
# Aquí debe utilizar su modelo
model = joblib.load('models/knn_model.joblib')

In [10]:
from skimage import feature

# Aquí debe procesar las imagenes de pgm_kaggle_files
X_kag = []

suffix = '.pgm'
dir = os.listdir('Test/')
pgm_files = [filename for filename in dir if filename.endswith(suffix)]

for filename in tqdm(pgm_files):
    path = 'Test/' + filename
    with open (path, 'rb') as pgmf:
        image = plt.imread(pgmf)
        img_resized = resize(image, IMG_SIZE, anti_aliasing=True)
        X_kag.append(img_resized.flatten())
     
X = np.array(X_kag)

100%|██████████| 8000/8000 [00:02<00:00, 3109.55it/s]


In [11]:
X.shape

(8000, 4096)

In [12]:
X

array([[0.47843137, 0.47843137, 0.47058824, ..., 0.1372549 , 0.13333333,
        0.13333333],
       [0.15686275, 0.15294118, 0.15294118, ..., 0.17254902, 0.16862745,
        0.16470588],
       [0.74901961, 0.75686275, 0.77254902, ..., 0.58823529, 0.50980392,
        0.41176471],
       ...,
       [0.4       , 0.4       , 0.4       , ..., 0.63529412, 0.63137255,
        0.62745098],
       [0.77647059, 0.78431373, 0.78431373, ..., 0.63921569, 0.63529412,
        0.63529412],
       [0.87058824, 0.85490196, 0.82352941, ..., 0.63529412, 0.40392157,
        0.34117647]])

In [13]:
X = (X*255).astype(np.uint8)

In [14]:
X

array([[121, 121, 120, ...,  35,  34,  34],
       [ 40,  39,  39, ...,  44,  43,  42],
       [191, 193, 197, ..., 150, 130, 105],
       ...,
       [102, 102, 102, ..., 162, 161, 160],
       [198, 200, 200, ..., 163, 162, 162],
       [222, 218, 210, ..., 162, 103,  87]], dtype=uint8)

In [15]:
pca_model = joblib.load('models/pca_model.joblib')

In [16]:
X_pca_kag = pca_model.transform(X)

# Predicción
y_kag = model.predict(X_pca_kag)

In [17]:
# Predicción + ID
y_kag_dic = {pgm_kaggle_files_id[i]: y_kag[i] for i in range(len(pgm_kaggle_files_id))}
kaggle_hat = pd.DataFrame(list(y_kag_dic.items()), columns=['id', 'target_feature'])
kaggle_hat['id'] = kaggle_hat['id'].astype(int)
kaggle_hat['target_feature'] = kaggle_hat['target_feature'].astype(int)
kaggle_hat.sort_values(by='id', inplace=True)

In [18]:
kaggle_hat.head()

Unnamed: 0,id,target_feature
2069,0,0
2527,1,1
3030,2,0
2576,3,0
4060,4,0


In [19]:
from datetime import datetime

fecha_hora = datetime.now().strftime("%Y-%m-%d %H:%M")

print(fecha_hora)

2025-06-14 00:54


In [20]:
# Guardamos en un .csv para subir a kaggle
submission_name = f'CSV_samples/knn_{fecha_hora}'
kaggle_hat.to_csv(submission_name + '.csv', index=False)