In [None]:
import os
import gc
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import sys
from PIL import Image
from joblib import Parallel, delayed

In [None]:
! mkdir ../working/images

In [None]:
class Config:
    """
        Here we save the configuration for the experiments
    """
    # dirs
    base_dir = os.path.abspath('../')
    data_dir = os.path.join(base_dir, 'input/bengaliai-cv19')
    working_dir = os.path.join(base_dir, 'working')
    images_dir = os.path.join(working_dir, 'images')




In [None]:
def get_parquet_lists():
    """
    Load all .parquet files and get train and test splits
    """
    
    parquet_files = [f for f in os.listdir(Config.data_dir) if f.endswith(".parquet")]
    train_files = [f for f in parquet_files if 'train' in f]
    test_files = [f for f in parquet_files if 'test' in f]

    return train_files, test_files


def convert_files(file:str):
    try:
        print(f'[INFO] Loading {file}')

        df = pd.read_parquet(os.path.join(Config.data_dir, file), engine='pyarrow')


        images_ids = df.image_id.values
        images = df.drop("image_id", axis=1)

        print(f'[INFO] Working on {file}')

        for index, img_id in enumerate(tqdm(images_ids, desc=f"Converting files in {file} to .npy format")):
            path = os.path.join(Config.working_dir, "images", f'{img_id}.npy')
            # print(path)
            #break
            try:        
                np.save(path, images.iloc[index].values.reshape(137, 236))

            except Exception as ex:
                print(ex)

        del df 
        del images
        del images_ids
    except:
        pass

In [None]:
train_files, test_files = get_parquet_lists()

train_files+test_files

In [None]:
for file in train_files+test_files:
    convert_files(file=file)


gc.collect()

In [None]:
! zip -q -r images.zip ../working/images/

In [None]:
print('end')

In [None]:
! ls -lah ../working/

# Generate download link

In [None]:
!cd ../working/

In [None]:
from IPython.display import FileLink

FileLink(r'images.zip')

or

<a href="images.zip"> Download File </a>