In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
from PIL import Image
        
INFERENCE = False

# Motivation

The motivation for this approach is to use a pretrained image classifier on image representations of the data provided. The reason I'm trying this is that Jeremy Howard of Fast AI provided several examples in his book about deep learning where such approach provided state of the art results. So let's try how it will perform with this problem.

In [None]:
train_df = pd.read_csv("/kaggle/input/tabular-playground-series-feb-2022/train.csv")
test_df = pd.read_csv("/kaggle/input/tabular-playground-series-feb-2022/test.csv")

In [None]:
train_df.shape, test_df.shape

In [None]:
print("TRAIN:")
display(train_df.head(10))

print("TEST:")
display(test_df.head(10))

# Save each row as image

In [None]:
os.mkdir('train')

In [None]:
from fastai import *
from fastai.vision.all import *

In [None]:
def convert_df_to_3d(data, train_min_max=None):
    # Add 3 features to enable resizing into 17x17
    data.loc[:, ['extra_0', 'extra_1', 'extra_2']] = [255, 255, 255]
    
    # Min Max Scaling
    if train_min_max:
        data_min, data_max = train_min_max 
    else:
        data_min, data_max = data.min(axis=0), data.max(axis=0)
        
        
    data = (data - data_min) / (data_max - data_min)
    
    # Scale to 255
    data *= 255

    # Reshape data into 17x17
    data = data.values.reshape((-1, 17, 17))[..., None]
    data = np.repeat(data, 3, axis=-1)
    
    return data, (data_min, data_max)

In [None]:
feats = train_df.columns[1:-1]
train_data, train_min_max = convert_df_to_3d(train_df[feats])

In [None]:
train_data_0 = train_data[0]
plt.imshow(train_data_0[:, :, :] / 255)

In [None]:
# plt.imshow(convert_df_to_3d(train_df[feats].iloc[1:2], train_min_max)[0][0])

In [None]:
# train_df_0 = train_df[feats].iloc[1]
# # train_df_0 = train_df_0.append(pd.Series([0, 0, 0]))
# train_df_0 = (train_df_0 - train_min_max[0]) / (train_min_max[1] - train_min_max[0])
# train_0 = train_df_0.values.reshape((17, 17))
# plt.imshow(train_0, cmap='gray')

In [None]:
for i, label in enumerate(train_df.target):
    print(f'{i+1}/{len(train_df)}', end='\r')
    image = Image.fromarray(np.uint8(train_data[i])).convert('RGB')
    image.save(f'train/{label}_{i}.jpg')

In [None]:
# path.ls()

In [None]:
# fname = (path).ls()[0]

In [None]:
# re.findall(r'(.+)_\d+.jpg$', fname.name)

In [None]:
# imgs = DataBlock(blocks = (ImageBlock, CategoryBlock),
#                 get_items=get_image_files,
#                 splitter=RandomSplitter(seed=42),
#                 get_y=using_attr(RegexLabeller(r'(.+)_\d+.jpg$'), 'name'),
#                 item_tfms=Resize(224),
# #                 batch_tfms=aug_transforms(size=224, min_scale=0.75)
#                 )

In [None]:
# dls = imgs.dataloaders(path)

In [None]:
# dls.show_batch(nrows=1, ncols=8)

In [None]:
# learn = cnn_learner(dls, resnet34, metrics=error_rate)
# learn.fine_tune(3)

In [None]:
# learn.save('img_model_v1')

In [None]:
# learn.load('img_model_v1')

In [None]:
os.mkdir('test')
# test_path = Path('test')

In [None]:
test_data, _ = convert_df_to_3d(test_df[feats], train_min_max)

In [None]:
for i in range(len(test_data)):
    print(f'{i+1}/{len(test_data)}', end='\r')
    image = Image.fromarray(np.uint8(test_data[i])).convert('RGB')
    image.save(f'test/{i}.jpg')

In [None]:
!tar -czf data.tar.gz 'train/' 'test/'

In [None]:
!ls

In [None]:
!rm -r 'train'

In [None]:
!rm -r 'test'

In [None]:
!ls -lh

In [None]:
# test_dl = learn.dls.test_dl(test_path.ls())

In [None]:
# preds, labels, decoded = learn.get_preds(dl=test_dl, with_decoded=True)

In [None]:
# torch.unique(decoded)

In [None]:
# learn.dls.vocab

In [None]:
# preds = [learn.dls.vocab[i] for i in decoded]

In [None]:
# submission = pd.read_csv('/kaggle/input/tabular-playground-series-feb-2022/sample_submission.csv')
# submission['target'] = preds

In [None]:
# submission.head()

In [None]:
# submission.to_csv('submission.csv', index=False)