https://www.kaggle.com/sermakarevich/download-resize-clean-12-hours-44gb

In [1]:
import logging 
import math
import os
import subprocess
from multiprocessing.pool import ThreadPool

from PIL import Image

In [2]:
import wget
import tarfile

In [3]:
def move_images_from_sub_to_root_folder(root_folder, subfolder):
    subfolder_content = os.listdir(subfolder)
    folders_in_subfolder = [i for i in subfolder_content if os.path.isdir(os.path.join(subfolder, i))]
    for folder_in_subfolder in folders_in_subfolder:
        subfolder_ = os.path.join(subfolder, folder_in_subfolder)
        move_images_from_sub_to_root_folder(root_folder, subfolder_)
    images = [i for i in subfolder_content if i not in folders_in_subfolder]
    for image in images:
        path_to_image = os.path.join(subfolder, image) 
        os.rename(f'{path_to_image}', f'{root_folder}/{image}')
    try:
        os.rmdir(subfolder)
    except:
        #removed all sufolders and exception for removing the main file
        return
        
def resize_folder_images(src_dir, dst_dir, size=224):
    if not os.path.isdir(dst_dir):
        logger.info("destination directory does not exist, creating destination directory.")
        os.makedirs(dst_dir)

    image_filenames=os.listdir(src_dir)
    for filename in image_filenames:
        dst_filepath = os.path.join(dst_dir, filename)
        src_filepath = os.path.join(src_dir, filename)
        new_img = read_and_resize_image(src_filepath, size)
        if new_img is not None:
            new_img = new_img.convert("RGB")
            new_img.save(dst_filepath)
            # if resize successfull, delete other 
            if os.path.normpath(src_filepath) != os.path.normpath(dst_filepath):
                os.remove(src_filepath)
    
    
def read_and_resize_image(filepath, size):
    img = read_image(filepath)
    if img:
        img = resize_image(img, size)
    return img


def resize_image(img, size):
    if type(size) == int:
        size = (size, size)
    if len(size) > 2:
        raise ValueError("Size needs to be specified as Width, Height")
    return resize_contain(img, size)


def read_image(filepath):
    try:
        img = Image.open(filepath)
        return img
    except (OSError, Exception) as e:
        return None


def resize_contain(image, size, resample=Image.LANCZOS, bg_color=(255, 255, 255, 0)):
    img_format = image.format
    img = image.copy()
    img.thumbnail((size[0], size[1]), resample)
    background = Image.new('RGBA', (size[0], size[1]), bg_color)
    img_position = (
        int(math.ceil((size[0] - img.size[0]) / 2)),
        int(math.ceil((size[1] - img.size[1]) / 2))
    )
    background.paste(img, img_position)
    background.format = img_format
    return background.convert('RGB')
    
def extract_tar(tar_file, path):
    opened_tar = tarfile.open(tar_file)
    
    if tarfile.is_tarfile(tar_file):
        opened_tar.extractall(path)
    else:
        print('invalid tar')
        
    opened_tar.close()


k_trainSize = 64
k_trainPath = f'train/images_{k_trainSize}'
k_trainPath_tars = 'train/tars'

def download_resize_clean(index):
        if not os.path.exists(k_trainPath_tars):
            os.makedirs(k_trainPath_tars, exist_ok=True)

        file_index = '{0:0>3}'.format(index)
        images_file_name = f'images_{file_index}.tar'
        images_folder = f"{k_trainPath}/images_{file_index}"
        
        if not os.path.exists(f'{k_trainPath_tars}/{images_file_name}'):
            images_tar_url = f'https://s3.amazonaws.com/google-landmark/train/{images_file_name}'
            print(f'Downloading: {images_file_name}')
            wget.download(images_tar_url, f'{k_trainPath_tars}/{images_file_name}')
        
        print(f'Unarchiving images into: {images_folder}')
        os.makedirs(images_folder, exist_ok=True)
        extract_tar(f'{k_trainPath_tars}/{images_file_name}', images_folder)
        print(f'Moving images ({images_file_name}) into their root folder ({images_folder})')
        move_images_from_sub_to_root_folder(images_folder, images_folder)
        print(f'Resize the images and remove the full sized ones and their containing folder ({images_folder})')
        resize_folder_images(src_dir=images_folder, dst_dir=k_trainPath, size=k_trainSize)
        os.rmdir(images_folder)

In [4]:
import numpy as np

In [5]:
ThreadPool().imap_unordered(download_resize_clean, np.arange(500))

<multiprocessing.pool.IMapUnorderedIterator at 0x181383ad828>

Unarchiving images into: train/images_64/images_000Unarchiving images into: train/images_64/images_001
Unarchiving images into: train/images_64/images_002

Unarchiving images into: train/images_64/images_003
Moving images (images_003.tar) into their root folder (train/images_64/images_003)
Moving images (images_001.tar) into their root folder (train/images_64/images_001)
Moving images (images_002.tar) into their root folder (train/images_64/images_002)
Moving images (images_000.tar) into their root folder (train/images_64/images_000)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_001)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_002)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_003)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_000)
Unarchiving images into: train/image

Resize the images and remove the full sized ones and their containing folder (train/images_64/images_033)
Unarchiving images into: train/images_64/images_037
Moving images (images_034.tar) into their root folder (train/images_64/images_034)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_034)
Unarchiving images into: train/images_64/images_038
Moving images (images_035.tar) into their root folder (train/images_64/images_035)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_035)
Unarchiving images into: train/images_64/images_039
Moving images (images_036.tar) into their root folder (train/images_64/images_036)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_036)
Unarchiving images into: train/images_64/images_040
Moving images (images_037.tar) into their root folder (train/images_64/images_037)
Resize the images and remove the ful

Resize the images and remove the full sized ones and their containing folder (train/images_64/images_068)
Moving images (images_067.tar) into their root folder (train/images_64/images_067)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_067)
Unarchiving images into: train/images_64/images_071
Unarchiving images into: train/images_64/images_072
Moving images (images_069.tar) into their root folder (train/images_64/images_069)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_069)
Moving images (images_070.tar) into their root folder (train/images_64/images_070)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_070)
Unarchiving images into: train/images_64/images_073
Unarchiving images into: train/images_64/images_074
Moving images (images_072.tar) into their root folder (train/images_64/images_072)
Resize the images and remove the ful

Resize the images and remove the full sized ones and their containing folder (train/images_64/images_103)
Moving images (images_102.tar) into their root folder (train/images_64/images_102)
Moving images (images_101.tar) into their root folder (train/images_64/images_101)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_101)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_102)
Unarchiving images into: train/images_64/images_105
Moving images (images_104.tar) into their root folder (train/images_64/images_104)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_104)
Unarchiving images into: train/images_64/images_106
Unarchiving images into: train/images_64/images_107
Unarchiving images into: train/images_64/images_108
Moving images (images_105.tar) into their root folder (train/images_64/images_105)
Resize the images and remove the ful

Unarchiving images into: train/images_64/images_137
Unarchiving images into: train/images_64/images_138
Moving images (images_136.tar) into their root folder (train/images_64/images_136)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_136)
Unarchiving images into: train/images_64/images_139
Unarchiving images into: train/images_64/images_140
Moving images (images_137.tar) into their root folder (train/images_64/images_137)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_137)
Unarchiving images into: train/images_64/images_141
Moving images (images_139.tar) into their root folder (train/images_64/images_139)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_139)
Moving images (images_140.tar) into their root folder (train/images_64/images_140)
Resize the images and remove the full sized ones and their containing folder (train/images

Unarchiving images into: train/images_64/images_173
Moving images (images_170.tar) into their root folder (train/images_64/images_170)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_170)
Moving images (images_171.tar) into their root folder (train/images_64/images_171)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_171)
Unarchiving images into: train/images_64/images_174
Unarchiving images into: train/images_64/images_175
Moving images (images_172.tar) into their root folder (train/images_64/images_172)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_172)
Unarchiving images into: train/images_64/images_176
Moving images (images_173.tar) into their root folder (train/images_64/images_173)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_173)
Unarchiving images into: train/image

Unarchiving images into: train/images_64/images_207
Moving images (images_205.tar) into their root folder (train/images_64/images_205)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_205)
Moving images (images_204.tar) into their root folder (train/images_64/images_204)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_204)
Unarchiving images into: train/images_64/images_208
Moving images (images_206.tar) into their root folder (train/images_64/images_206)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_206)
Unarchiving images into: train/images_64/images_209
Unarchiving images into: train/images_64/images_210
Moving images (images_207.tar) into their root folder (train/images_64/images_207)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_207)
Unarchiving images into: train/image

Unarchiving images into: train/images_64/images_241
Moving images (images_238.tar) into their root folder (train/images_64/images_238)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_238)
Unarchiving images into: train/images_64/images_242
Moving images (images_239.tar) into their root folder (train/images_64/images_239)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_239)
Unarchiving images into: train/images_64/images_243
Moving images (images_240.tar) into their root folder (train/images_64/images_240)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_240)
Unarchiving images into: train/images_64/images_244
Moving images (images_241.tar) into their root folder (train/images_64/images_241)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_241)
Unarchiving images into: train/image

Unarchiving images into: train/images_64/images_275
Moving images (images_272.tar) into their root folder (train/images_64/images_272)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_272)
Moving images (images_273.tar) into their root folder (train/images_64/images_273)
Unarchiving images into: train/images_64/images_276
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_273)
Unarchiving images into: train/images_64/images_277
Moving images (images_274.tar) into their root folder (train/images_64/images_274)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_274)
Unarchiving images into: train/images_64/images_278
Moving images (images_275.tar) into their root folder (train/images_64/images_275)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_275)
Unarchiving images into: train/image

Resize the images and remove the full sized ones and their containing folder (train/images_64/images_306)
Unarchiving images into: train/images_64/images_308
Moving images (images_307.tar) into their root folder (train/images_64/images_307)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_307)
Unarchiving images into: train/images_64/images_309
Unarchiving images into: train/images_64/images_310
Unarchiving images into: train/images_64/images_311
Moving images (images_308.tar) into their root folder (train/images_64/images_308)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_308)
Moving images (images_309.tar) into their root folder (train/images_64/images_309)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_309)
Unarchiving images into: train/images_64/images_312
Unarchiving images into: train/images_64/images_313
Moving images (

Resize the images and remove the full sized ones and their containing folder (train/images_64/images_341)
Unarchiving images into: train/images_64/images_343
Moving images (images_342.tar) into their root folder (train/images_64/images_342)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_342)
Unarchiving images into: train/images_64/images_344
Moving images (images_340.tar) into their root folder (train/images_64/images_340)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_340)
Moving images (images_343.tar) into their root folder (train/images_64/images_343)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_343)
Unarchiving images into: train/images_64/images_345
Moving images (images_344.tar) into their root folder (train/images_64/images_344)
Resize the images and remove the full sized ones and their containing folder (train/imag

Resize the images and remove the full sized ones and their containing folder (train/images_64/images_374)
Moving images (images_375.tar) into their root folder (train/images_64/images_375)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_375)
Moving images (images_376.tar) into their root folder (train/images_64/images_376)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_376)
Unarchiving images into: train/images_64/images_378
Unarchiving images into: train/images_64/images_379
Moving images (images_377.tar) into their root folder (train/images_64/images_377)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_377)
Unarchiving images into: train/images_64/images_380
Moving images (images_378.tar) into their root folder (train/images_64/images_378)
Resize the images and remove the full sized ones and their containing folder (train/imag

Resize the images and remove the full sized ones and their containing folder (train/images_64/images_408)
Moving images (images_409.tar) into their root folder (train/images_64/images_409)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_409)
Unarchiving images into: train/images_64/images_412
Unarchiving images into: train/images_64/images_413
Moving images (images_410.tar) into their root folder (train/images_64/images_410)
Moving images (images_411.tar) into their root folder (train/images_64/images_411)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_410)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_411)
Moving images (images_412.tar) into their root folder (train/images_64/images_412)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_412)
Moving images (images_413.tar) int

Unarchiving images into: train/images_64/images_444
Unarchiving images into: train/images_64/images_445
Unarchiving images into: train/images_64/images_446
Moving images (images_444.tar) into their root folder (train/images_64/images_444)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_444)
Moving images (images_446.tar) into their root folder (train/images_64/images_446)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_446)
Moving images (images_445.tar) into their root folder (train/images_64/images_445)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_445)
Unarchiving images into: train/images_64/images_447
Unarchiving images into: train/images_64/images_448
Unarchiving images into: train/images_64/images_449
Moving images (images_443.tar) into their root folder (train/images_64/images_443)
Resize the images and remove the full 

Moving images (images_476.tar) into their root folder (train/images_64/images_476)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_476)
Unarchiving images into: train/images_64/images_479
Unarchiving images into: train/images_64/images_480
Unarchiving images into: train/images_64/images_481
Moving images (images_478.tar) into their root folder (train/images_64/images_478)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_478)
Unarchiving images into: train/images_64/images_482
Moving images (images_479.tar) into their root folder (train/images_64/images_479)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_479)
Moving images (images_480.tar) into their root folder (train/images_64/images_480)
Resize the images and remove the full sized ones and their containing folder (train/images_64/images_480)
Moving images (images_481.tar) into 

In [None]:
# import time

# with ThreadPool() as tp:
#     print("\nimap_unordered")
#     imap_unordered_start = time.time()
#     for imap_un_result in tp.imap_unordered(download_resize_clean,  np.arange(500)):
#         print(f"{imap_un_result} took" 
#               f"{time.time() - imap_unordered_start:.0f} seconds")

#### Get training data csv

In [13]:
import requests
from tqdm import tqdm
import urllib
import os

def check_size(url):
    r = requests.get(url, stream=True)
    return int(r.headers['Content-Length'])

def download_file(url, filename: str, forced=False, bar=True):
    """
    Helper method handling downloading large files from `url` to `filename`. Returns a pointer to `filename`.
    """
    if os.path.exists(filename) and not forced:
        return
    
    try:
        chunkSize = 1024
        r = requests.get(url, stream=True)
        with open(filename, 'wb') as f:
            if bar:
                pbar = tqdm(unit="B", total=int(r.headers['Content-Length']))
            for chunk in r.iter_content(chunk_size=chunkSize): 
                if chunk: # filter out keep-alive new chunks
                    if bar: 
                        pbar.update (len(chunk))
                    f.write(chunk)
        return filename
    except Exception as e:
        print(e)
        return

In [14]:
download_file(url="https://s3.amazonaws.com/google-landmark/metadata/train.csv", filename="train.csv")

In [16]:
import pandas as pd
train = pd.read_csv("train.csv")
print(train.head())
print(train.shape)
print("Number of classes {}".format(len(train.landmark_id.unique())))

                 id                                                url  \
0  6e158a47eb2ca3f6  https://upload.wikimedia.org/wikipedia/commons...   
1  202cd79556f30760  http://upload.wikimedia.org/wikipedia/commons/...   
2  3ad87684c99c06e1  http://upload.wikimedia.org/wikipedia/commons/...   
3  e7f70e9c61e66af3  https://upload.wikimedia.org/wikipedia/commons...   
4  4072182eddd0100e  https://upload.wikimedia.org/wikipedia/commons...   

   landmark_id  
0       142820  
1       104169  
2        37914  
3       102140  
4         2474  
(4132914, 3)
Number of classes 203094


In [None]:
from collections import Counter
import operator
counts = dict(Counter(train.landmark_id))
display(np.array(train[train.landmark_id == max(counts.items(), key=operator.itemgetter(1))[0]].url[:5]).astype(str))
display(np.array(train[train.landmark_id == min(counts.items(), key=operator.itemgetter(1))[0]].url[:5]).astype(str))

Conclusion : need of over/under sampling some classes have very few appearences : e.g. 1 augment-duplicate the few sampled images (ideea also from here https://towardsdatascience.com/deep-learning-unbalanced-training-data-solve-it-like-this-6c528e9efea6) and then just duplicate in the training dataframe

In [17]:
from pandas import DataFrame
from PIL import Image, ImageFilter, ImageOps
from tqdm import tqdm

In [23]:
k_min_samples_per_landmark = 50 #heard 50 is a good number (50-250)
k_train_csv_file_name = "train.csv"

class Augmenter:
    def __init__(self, minNumberOfSamples: int = 50, folderRelativePathString: str = "", backup_csv_name: str = 'Augmenter.csv'):
        self.minNumberOfSamples = minNumberOfSamples
        self.folderRelativePathString = f'{folderRelativePathString}/' if (folderRelativePathString != "") else ""
        self.backup_csv_name = backup_csv_name

    # Augments and duplicates the samples in the dataframe
    # @ dataframe should have 2 at least collumns : landmark_id and id, the latter being the photo id as strings
    def generate_samples(self, trainDataframe: DataFrame) -> DataFrame:
        generators = [self.generate_grayscale,
                      self.generate_filtered_unsharp,
                      self.generate_filtered_gaussian_blur,
                      self.generate_mirrored,
                      self.generate_inverted,
                      self.generate_median,
                      self.generate_modal,
                      self.generate_red,
                      self.generate_green,
                      self.generate_blue,
                      #keep it as last resort - duplication
                      self.generate_duplicate]
        
        landmarks = trainDataframe.groupby('landmark_id').apply(lambda x: list(x.id)).to_dict()
        landmarks = sorted(landmarks.items(), key=lambda x: len(x[1]))
        
        for (landmark_id, photos_list) in tqdm(landmarks):
            counts_of_current_landmark = len(photos_list)
            
            if counts_of_current_landmark >= self.minNumberOfSamples:
                continue #to next landmark since this has enough samples
                
            # generate the augment/duplic-ations from the original images and not from duplications
            for augment_index in range(self.minNumberOfSamples - counts_of_current_landmark):
                original_image_name = generated_img_name = photos_list[augment_index % len(photos_list)]
                try:
                    original_image = Image.open(f'{self.folderRelativePathString}{original_image_name}.jpg') #already rgb
                    (generated_img, generated_img_name) = generators[augment_index % len(generators)](image=original_image,
                                                                                                      named=original_image_name)
                    try:
                        generated_img.save(f'{self.folderRelativePathString}{generated_img_name}.jpg')
                    except:
                        print(f'Err. Image save problem (generated: {generated_img_name})')   
                    finally:
                        try: #load the new image
                            generated_img = Image.open(f'{self.folderRelativePathString}{generated_img_name}.jpg')
                            # If successfull: append the generation and save.
                            trainDataframe = trainDataframe.append({'landmark_id': landmark_id, 'id': generated_img_name},
                                                                   ignore_index=True)
                            trainDataframe.to_csv(self.backup_csv_name, index=False)
                        except:
                            print(f'Err. Image open problem (generated: {generated_img_name})')
                except:
                    print(f'Err. Image open problem (original: {original_image_name})')
                    
        return trainDataframe
                
    # returns the name of the generated image
    def generate_grayscale(self, image: Image, named: str) -> (Image, str):
        augmented = image.convert('L')
        augment_name = f'{named}_gray'
        return (augmented, augment_name)
    
    def generate_filtered_unsharp(self, image: Image, named: str) -> (Image, str):
        augmented = image.filter(ImageFilter.UnsharpMask)
        augment_name = f'{named}_unsharp'
        return (augmented, augment_name)
    
    def generate_filtered_gaussian_blur(self, image: Image, named: str) -> (Image, str):
        augmented = image.filter(ImageFilter.GaussianBlur)
        augment_name = f'{named}_blur'
        return (augmented, augment_name)
    
    def generate_duplicate(self, image: Image, named: str) -> (Image, str):
        #just duplicate the entry
        return (image, named)
        
    def generate_red(self, image: Image, named: str) -> (Image, str):
        r, _, _ = image.split()
        return (r.convert("RGB"), f'{named}_r')
        
    def generate_green(self, image: Image, named: str) -> (Image, str):
        _, g, _ = image.split()
        return (g.convert("RGB"), f'{named}_g')
    
    def generate_blue(self, image: Image, named: str) -> (Image, str):
        _, _, b = image.split()
        return (b.convert("RGB"), f'{named}_b')
    
    def generate_median(self, image: Image, named: str) -> (Image, str):
        augmented = image.filter(ImageFilter.MedianFilter(size=3))
        return (augmented, f'{named}_median')
    
    def generate_modal(self, image: Image, named: str) -> (Image, str):
        augmented = image.filter(ImageFilter.ModeFilter(size=3))
        return (augmented, f'{named}_modal')
    
    def generate_mirrored(self, image: Image, named: str) -> (Image, str):
        augmented = ImageOps.mirror(image=image)
        return (augmented, f'{named}_mirror')
    
    def generate_inverted(self, image: Image, named: str) -> (Image, str):
        augmented = ImageOps.invert(image=image)
        return (augmented, f'{named}_negative')

df_to_augm = DataFrame([["142820", "0000ae056149919f"]], columns=['landmark_id', 'id'])
print(Augmenter(minNumberOfSamples=10).generate_samples(df_to_augm))

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 11.39it/s]


  landmark_id                         id
0      142820           0000ae056149919f
1      142820      0000ae056149919f_gray
2      142820   0000ae056149919f_unsharp
3      142820      0000ae056149919f_blur
4      142820    0000ae056149919f_mirror
5      142820  0000ae056149919f_negative
6      142820    0000ae056149919f_median
7      142820     0000ae056149919f_modal
8      142820         0000ae056149919f_r
9      142820         0000ae056149919f_g


In [29]:
k_train_augmented = "train_augmented.csv"
augmenter = Augmenter(minNumberOfSamples=50, folderRelativePathString=k_trainPath, backup_csv_name=k_train_augmented)

In [30]:
train = pd.read_csv(k_train_augmented, usecols=['landmark_id', 'id'])
train.head()

Unnamed: 0,id,landmark_id
0,6e158a47eb2ca3f6,142820
1,202cd79556f30760,104169
2,3ad87684c99c06e1,37914
3,e7f70e9c61e66af3,102140
4,4072182eddd0100e,2474


In [None]:
augmenter.generate_samples(train)

  0%|                                                                        | 7/203094 [48:53<23698:27:31, 420.09s/it]