# Importing Modules

In [None]:
# Standard imports
import os
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import trange
from colorama import Fore
from glob import glob
import json
from pprint import pprint
import time
import cv2
from enum import Enum
from IPython.display import display

import warnings
warnings.filterwarnings("ignore")

# Creating Image Datasets
- 3 datasets
    - 512 X 512 images
    - 256 X 256 images
    - 224 x 224 images

In [None]:
os.getcwd()

In [None]:
data_paths = glob("../input/petfinder-pawpularity-score/train/*.jpg")
test_paths = glob("../input/petfinder-pawpularity-score/test/*.jpg")

total_paths = data_paths + test_paths

In [None]:
# !rm -rf ./petFinder_512_images
# !rm -rf ./petFinder_256_images
# !rm -rf ./petFinder_224_images

In [None]:
def buildGridImages(paths = None, nrows=5, ncols=4, img_size=128):
    """
    To build an image grid
    """
    plt.figure(figsize=(20, 12))
    for i in range(nrows * ncols):
        plt.subplot(nrows, ncols, i+1)
        img = cv2.imread(paths[i])
        h, w, c = img.shape
        img = cv2.resize(img, (img_size, img_size))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        plt.axis("off")
        plt.title(f"( {w} X {h})")
        plt.imshow(img)

    plt.tight_layout()
    plt.show()

buildGridImages(test_paths, nrows=1, ncols=len(test_paths), img_size=512)

<div style="text-align:center">
    <img src="https://c.tenor.com/7NP9xWr6GHIAAAAM/face-palm-sheldon-cooper.gif" />
    <h4>WADU HEK !!!</h4>
</div>



# Creating dataset of the images

In [None]:
def createDifferentImageShapeDataset(total_paths : "list of paths", folder_name = "output"):
    """
    
    Helper function to reshape images and create dataset
        > 512 X 512 imgs
        > 224 X 224 imgs
        > 128 X 128 imgs
    
    """

    shapes = [512, 224, 128]
    cwd = os.getcwd()

    
    parent_dir = f"{cwd}/{folder_name}"
    if not os.path.exists(parent_dir):
        os.mkdir(parent_dir)
    
    for shape in shapes:
        shape_dir = f"{parent_dir}/{shape}_images"
        if not os.path.exists(shape_dir):
            os.mkdir(shape_dir)
        if not os.path.exists(f"{shape_dir}/train"):
            os.mkdir(f"{shape_dir}/train")
        if not os.path.exists(f"{shape_dir}/test"):
            os.mkdir(f"{shape_dir}/test")

        for i in trange(len(total_paths), desc = f"Reshaping Images to {shape}...", bar_format="{l_bar}%s{bar:50}%s{r_bar}" % (Fore.CYAN, Fore.RESET), position = 0, leave = True):
            dir_type = total_paths[i].split("/")[-2]
            img_name = total_paths[i].split("/")[-1]
            img = cv2.imread(total_paths[i])
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, (shape, shape), interpolation = cv2.INTER_NEAREST)

            cv2.imwrite(f"{shape_dir}/{dir_type}/{img_name}", img)

createDifferentImageShapeDataset(total_paths, "petFinder")

# Saving Image datasets

In [None]:
def saveModelsKaggle(dir_name : str, title: "title of dataset", token_path = "../input/kaggletoken/kaggle.json"):
    """
     > Helper function to automate the process of saving models 
        as kaggle datasets using kaggle API   
     > dir_name should be compatible with hyperlink formats
     
    """
    if not os.path.exists(token_path):
        print("Token doesn't exist")
        return
    
    if not os.path.exists(f"./{dir_name}"):
        print("Directory doesn't exist")
        return
    
    os.system(
        f"""
        
        pip install kaggle
        cp {token_path} ./
        cp ./kaggle.json ../../root/
        mkdir ../../root/.kaggle
        mv ../../root/kaggle.json ../../root/.kaggle/kaggle.json

        chmod 600 /root/.kaggle/kaggle.json
        kaggle datasets init -p ./{dir_name}
        
        """
    )
    # Upto this we will be having a meta data file in the form of a json
    with open(f"./{dir_name}/dataset-metadata.json", 'r+') as file_:
        meta_data = json.load(file_)
        meta_data['title'] = f'{title}'
        meta_data['id'] = f'hotsonhonet/{title}'
        file_.seek(0)        
        json.dump(meta_data, file_, indent=4)
        file_.truncate()
        
    os.system(f"""
        kaggle datasets create -p ./{dir_name} --dir-mode zip
    """)
    
    print("[INFO] Dataset saved successfully")

In [None]:
# saveModelsKaggle("petFinder", "petFinderImages", "../input/kaggletoken/kaggle.json")