In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing and Setting up

In [None]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())

import fastai
print(fastai.__version__)

from fastai.vision.all import *

from sklearn import metrics as skm
import logging, sys

from scipy import stats
import psutil

import re
import itertools

pd.options.mode.chained_assignment = None


In [None]:
pip install gputil

In [None]:
import GPUtil
gpus = GPUtil.getGPUs()
try:
    gpu = gpus[0]
    gpu_available = True
except:
    print("no gpu detected")
    gpu_available = False

In [None]:
def get_size(bytes, suffix="B"):
    """
    Scale bytes to its proper format
    e.g:
        1253656 => '1.20MB'
        1253656678 => '1.17GB'
    """
    factor = 1024
    for unit in ["", "K", "M", "G", "T", "P"]:
        if bytes < factor:
            return f"{bytes:.2f}{unit}{suffix}"
        bytes /= factor

In [None]:
import string
  
# initializing size of string  
N = 7
  
# using random.choices() 
# generating random strings
def randomstr():
    return ''.join(random.choices(string.ascii_uppercase + string.digits, k = N)) 

In [None]:
# p = psutil.Process().cpu_percent(interval=1)
# cpupcnt = p.cpu_percent() / psutil.cpu_count()

In [None]:
# This functions pulls up hardware details like CPU and memory usage
def get_hardware_info():
    cputpcnt = psutil.cpu_percent()
    CPU_data = {"CPU Percent":cputpcnt}
    GPU_data = {}
    if gpu_available:
        for gpu in gpus:
            gpu = {"GPU Load":gpu.load, "GPU Memory Free":gpu.memoryFree, "GPU Memory Used":gpu.memoryUsed, "GPU Total":gpu.memoryTotal}
            GPU_data = {**GPU_data, **gpu}
    svm = psutil.virtual_memory()
    vmds = [get_size(svu) for svu in [svm.total, svm.available, svm.used, svm.free]]
    vMem_data = {"vMem percent":svm.percent, "vMem total":vmds[0], "vMem available":vmds[1], "vMem used":vmds[2], "vMem free=":vmds[3]}
    fulldata = {**CPU_data, **GPU_data ,**vMem_data}
    hardware_log = ""
    for x, y in fulldata.items(): hardware_log+= f"{x}={y}, "
    return hardware_log

In [None]:
## If GPU runs out of memory
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
print("Setting up Complete")

# Exploring the Data

After looking going through the dataset, the structure seems to be as follows:
```
nybg2020 - test - metadata.json
         |      \- images - 000 - 0.jpg
         |                |     |- 1.jpg
         |                |     \- ...
         |                \- ...    
         |                
         \- train - metadata.json
                  \- images - 000 - 00 - 437000.jpg
                           |     |    - ...
                           |     |- 01
                           |     \- ...
                           |- 001 - 00
                           |      \- ...
                           \- ...
```

Let's now pull up the metadata json files from train and test, and check their contents.

In [None]:
train_metadata_path = r"../input/herbarium-2020-fgvc7/nybg2020/train/metadata.json"
test_metadata_path = r"../input/herbarium-2020-fgvc7/nybg2020/test/metadata.json"
train_path = Path('/kaggle/input/herbarium-2020-fgvc7/nybg2020/train/')
test_path = Path('/kaggle/input/herbarium-2020-fgvc7/nybg2020/test/')
with open(train_metadata_path, encoding="utf8", errors='ignore') as f:
     train_metadata = json.load(f)
with open(test_metadata_path, encoding="utf8", errors='ignore') as f:
     test_metadata = json.load(f)

In [None]:
len(train_metadata['images']), len(train_metadata['annotations'])

In [None]:
total_images = len(train_metadata['images'])

In [None]:
train_metadata['annotations'][0], train_metadata['images'][0]

The structure of the metadata is as follows. The number of elements of annotations and images are the same. The labels (independent variable) is in annotations, while the directery of the image is in images.

```
metadata = {annotations:[{'category_id': 11524, 
                          'id': 818566, 
                          'image_id': 818566, 
                          'region_id': 1}, ...],
            categories: [{'family': 'Orchidaceae',
                          'genus': 'Aa',
                          'id': 0,
                          'name': 'Aa mathewsii (Rchb.f.) Schltr.'}, ...]
            images: [{'file_name': 'images/156/72/354106.jpg',
                      'height': 1000,
                      'id': 354106,
                      'license': 1,
                      'width': 661},...]}
         
```

In [None]:
# y = 0
# for x in range(len(train_metadata["annotations"])):
#     if not train_metadata["annotations"][x]["image_id"] == train_metadata["images"][x]["id"]:
#         print("The ordering is not the same!")
#         break
#     else: y+=1
# print("Ordering is the same for %d images." %y)
# print(y==total_images)

In [None]:
path = r'/kaggle/input/herbarium-2020-fgvc7/nybg2020/train/images/000/'
subpath = r'/kaggle/input/herbarium-2020-fgvc7/nybg2020/train/images/010/05/'
test_path = Path('/kaggle/input/herbarium-2020-fgvc7/nybg2020/test/')

In [None]:

# imagelist = []
# subdir_imagelist = []

# # creates a list of all image file names in all subdirectories in int format
# for files in os.listdir(path):   
#     subdir = os.path.join(path, files)
#     if os.path.isdir(subdir):
#         imgs = os.scandir(subdir)
#         imagelist += [int(x.name.split('.')[0]) for x in list(imgs)]
        
# # creates a list of all image file names in int format
# if os.path.isdir(subpath):
#     subdir_imgs = os.scandir(subpath)
#     subdir_imagelist += [int(x.name.split('.')[0]) for x in list(subdir_imgs)]
    
# #imagelist, subdir_imagelist

In [None]:
#for entry in train_metadata['annotations']:
#    if entry['image_id'] in imagelist:
#        print(entry['category_id'])

In [None]:
# creating a dataframe from train_metadata annotations and imagesges, because it is easier to search
train_images_df = pd.DataFrame(train_metadata["images"])
train_annotations_df = pd.DataFrame(train_metadata["annotations"])
dataset = train_images_df.merge(train_annotations_df, how="outer", on="id")[["id", "image_id", "category_id", "file_name"]]
dataset["img_in_cat"] = dataset.groupby("category_id")["category_id"].transform(len)

In [None]:
total_category_list = train_annotations_df["category_id"].unique()
total_category_list.sort()
total_category_list = list(total_category_list)
# creating a list with the indices of all the images we want in the subset
# random_species_subset = random.sample(range(total_species), 10000)

In [None]:
total_species = len(train_metadata["categories"])
print("Total number of species is %d, Total number of images is %d" %(total_species, total_images))

In [None]:
print("Metadata has been imported. DataFrame with all values has been created.")

# Statistical Inference

In [None]:
# Get a dataframe images per species vs number of species
def get_stats(ds):
    df = ds.loc[:,["id", "category_id"]]
    df["num_imgs"] = df.groupby("category_id").transform(len)
    sample = df.groupby("category_id").sample(1)
    sample.reset_index()
    freq_list = sample["num_imgs"].to_list()
    sample["num_cats"]=sample.groupby("num_imgs")["category_id"].transform(len)
    sample = sample.groupby("num_imgs").sample(1)
    sample = sample.sort_values("num_imgs")
    return sample.reset_index().loc[:, ["num_imgs", "num_cats"]], freq_list

In [None]:
def desc_extract(percent=1, df=dataset):
    img_num_by_pcnt = int(df.shape[0]*percent/100)
    species_num_by_pcnt = int(df.shape[0]*percent/100)
    sdataset = df.sample(img_num_by_pcnt)
    num_cats_in_pcnt = len(sdataset["category_id"].unique())
    pcnt_species = num_cats_in_pcnt/img_num_by_pcnt
    hist_chart, freq_list = get_stats(sdataset)
    return stats.describe(freq_list), hist_chart, freq_list, [pcnt_species, len(freq_list), num_cats_in_pcnt]

In [None]:
def print_stats(pcnt=100, print_desc=True, dtf=dataset):
    full_desc, full_hc, full_fl, full_list = desc_extract(pcnt, dtf)
    low10 = np.quantile(full_fl, 0.10)
    top10 = np.quantile(full_fl, 0.90)
    q0 = np.quantile(full_fl, 0.0)
    q1 = np.quantile(full_fl, 0.25)
    q2 = np.quantile(full_fl, 0.5)
    q3 = np.quantile(full_fl, 0.75)
    q4 = np.quantile(full_fl, 1)
    full_mean = np.mean(full_fl)
    line1 = "1/4 species have between %d and %d images \n1/4 species have between %d and %d images \n"%(q0, q1, q1, q2)
    line2 = "1/4 species have between %d and %d images \n1/4 species have between %d and %d images \n"%(q2, q3, q3, q4)
    line3 = f"%10 species have between 1 and {low10} images and 10% have between {top10} and {q4} images \n"
    line4 = "On average, each specie has %d images \n"%full_mean
    line5 = "Median number of images is %d \n"%q3
    fullprint = line1+line2+line3+line4+line5
    if print_desc:
        print(fullprint)
    return [low10, q1, q2, q3, top10, q4, full_mean], fullprint

In [None]:
# desclist = []
# for x in [0.1, 0.5, 1, 5, 10, 50, 100]:
#     st, hc, _, y = desc_extract(percent=x)
#     desclist.append(st)
#     normalized_stats = {"normalized mean":st.mean*100/x, "normalized variance":st.variance/((x/100)**2)}
#     print("\n Percent is: ", x, " Some stats: ",y, "\n Description: ", st, "\n stats normalized to 100%", normalized_stats, "\n")

In [None]:
# print_stats(100)

In [None]:
# Plot number of images per species vs number of species
def plot_bar(min_num=0, num_to_check=total_images, train_metadata=train_metadata):
    tr_img_df = pd.DataFrame(random.sample(train_metadata["images"], num_to_check))
    tr_anno_df = pd.DataFrame(random.sample(train_metadata["annotations"], num_to_check))
    tr_anno_df["len_rows"] = tr_anno_df.groupby("category_id")["category_id"].transform(len)
    sampledlist = tr_anno_df.groupby("category_id")["id"].apply(lambda s: s.sample(1)).to_list()
    #tr_anno_df = tr_anno_df.sort_values("len_rows")
    lenandcat = tr_anno_df.groupby("len_rows")
    unq = lenandcat["category_id"].unique()
    frequency_series = unq.apply(func = lambda s: len(s))
    graph_x = list(frequency_series.axes[0])[min_num:]
    graph_y = list(frequency_series)[min_num:]
    graph_data = pd.DataFrame(zip(graph_x, graph_y), columns=["Number of Images", "Number of Species"])
    graph_data.plot(x="Number of Images", y="Number of Species", kind="bar")
    return graph_data

In [None]:
# The data for the entire dataset is saved as frequency_table
# graph_data.to_csv("frequency_table.csv")
# pd.read_csv("frequency_table.csv")

In [None]:
# The following function takes int min_num and num_to_check, and train_metadata
# It returns a list of categories where each category has at least min_num images. It'll only go through the first
# num_to_check images for this.

def find_species(min_num=10, num_to_check=total_images, df=dataset):
    set_of_cats = df[df["img_in_cat"]>min_num]['category_id']
    set_of_cats = set(set_of_cats.to_list())
    return list(set_of_cats)

In [None]:
# Efficienct code for fetching images
def image_fetcher(min_images = 10, min_species = 100, max_img_per_species = -1, dataset=dataset, exclude_categories=[]):
    #This function will output a dataframe which has a minimum of species_min species and 
    #minimum of min_img images.
    # First, check if we are collecting all species and/or all images:
    all_species = True if (min_species==total_species or min_species<0) else False
    
    # Firstly, select all categories with minimum number of images = min_images
    # If min_images = 0, this is the set of all categories
    if min_images == 0 or all_species:
        category_list = total_category_list
    else:
        category_list = find_species(min_images)
        if len(category_list)<min_species:
            raise Exception("There aren't enough species with images more than min_images")

    # If not all species are selected, we need to isolate categories
    if not all_species:
        category_list = np.setdiff1d(np.array(category_list), np.array(exclude_categories))
        random.shuffle(category_list)
        selected_category_list = category_list[0:min_species]
        df = dataset.loc[dataset["category_id"].isin(selected_category_list)]
        if max_img_per_species>0:
            df.loc[: ,("cat_id")] = df.loc[: ,("category_id")]
            df = df.groupby("cat_id")[["id","image_id", "file_name", "category_id"]].apply(lambda s: s if s.shape[0]<=max_img_per_species else s.sample(max_img_per_species))         
            df = df.reset_index()
    else:
        # if all species are selected, we ignore min_images restriction
        # we select max_img_per_species or lower from each species
        #df = dataset.groupby("cat_id").apply(lambda s: s if s.shape[0]<=max_img_per_species else s.sample(max_img_per_species))
        df = dataset.groupby("category_id")[["image_id", "file_name"]].apply(lambda s: s if s.shape[0]<=max_img_per_species else s.sample(max_img_per_species))
        df = df.reset_index()
    selected_images = df.shape[0]
    num_species = len(df["category_id"].unique())
    return {"dataframe": df, "num images":selected_images, "num species":num_species}

    

In [None]:
# out = image_fetcher(0, 30, -1)
# print_stats(100, True, out["dataframe"])

In [None]:
# print_stats(100)

In [None]:
q0imgs = image_fetcher(0, 30, 2, dataset)["dataframe"]
catsq = list(q0imgs["category_id"].unique())
q1imgs = image_fetcher(0, 70, 4, dataset, catsq)["dataframe"]
catsq += q1imgs["category_id"].to_list()
q2imgs = image_fetcher(3, 100, 9, dataset, catsq)["dataframe"]
catsq += q2imgs["category_id"].to_list()
q3imgs = image_fetcher(8, 100, 27, dataset, catsq)["dataframe"]
catsq += q3imgs["category_id"].to_list()
q4imgs = image_fetcher(26, 100, 1500, dataset, catsq)["dataframe"]

In [None]:
train_df = pd.concat([q0imgs, q1imgs, q2imgs, q3imgs, q4imgs]).loc[:, ("image_id", "id", "file_name", "category_id")]

In [None]:
train_num_cats = len(train_df["category_id"].unique())

In [None]:
print(f"A small representative subset has been created. \nIt has {train_num_cats} species and {train_df.shape[0]} images ")

Time to create models and run experiments!

# Creating the DataLoader

We need to extract a subset of images to train the model, as training on the entire set will by extremely time consuming. 
The test set's metadata only contains the images dictionary. Thus, the "images" dictionary of the metadata.json is going to be the input.
To train the model, we need to identify a subset of images, and feed in both the images and annotation dictionaries of that subset. 

There are probably multiple ways of doing this with fastaiv2 library. However, we are going to take a straightforward approach here. This involves creating a new pandas dataframe which contains 3 columns, "image-id", "image-path" and "category-id". 



## The subset dataframe
We will create a dataframe of ~100 species and all the corresponding images. We'll try to restrict the images to ~10k

In [None]:
# fetch a set of images with min 250 species and min 10000 images
# all of these species have at least 2 image samples
# out = image_fetcher(0, 3000, 5)

In [None]:
# out["num images"], out["num species"]

In [None]:
# train_df = out["dataframe"]

### Functions of the DataLoader

`get_x` takes in rows from the dataframe and returns the file path of the corresponding image

`get_y` takes in rows from the dataframe and returns the label

`splitter` takes in the entire dataframe and returns two lists; one with the indices of training datapoints and the second with the indices of valid datapoints

In [None]:
def train_labeller(df):
    prop_valid = 0.2
    sampledlist = df.groupby("category_id")["image_id"].apply(lambda s: s.sample(1)).to_list()
    def is_training(x) :
        if x["image_id"] in sampledlist: return True
        else: return random.random() > prop_valid
    df["in_training"] = df.apply(is_training, axis=1)
    return df

In [None]:
df_to_train = train_labeller(train_df)

In [None]:
df_to_train = image_fetcher(0, -1, 5)["dataframe"]

In [None]:
df_to_train = train_labeller(df_to_train)

In [None]:
df_to_train

In [None]:

def get_x(r): return (train_path/r["file_name"])

def get_y(r): return r["category_id"]

def data_splitter(df):
    # This splitter will select at least one of each species and add it to the training set
    # it'll split the remaining images into training and valid based on the value of prop_valid
    # prop_valid = percentage of images to be placed in valid set
    train_idx = df.query('in_training').index.tolist() 
    valid_idx = np.setdiff1d(np.array(range(df.shape[0])), np.array(train_idx))
    
    ## Debug
    valid_cats = set([df["category_id"].iloc[i] for i in valid_idx])
    train_cats = set([df["category_id"].iloc[i] for i in train_idx])
    if not valid_cats.issubset(train_cats):
        raise Exception("There is a category in valid which is not present in train")
    
    return train_idx, list(valid_idx)

In [None]:
# # get one row from the dataframe and show all the details
# one_row = train_df.iloc[5]
# imgpath = get_x(one_row)
# cat_id = get_y(one_row)
# sampleimg = Image.open(imgpath)
# print(cat_id)
# show_image(sampleimg)

# Automating Experimentation
There are too many experiments to run. The following code makes that easy.

In [None]:
# To quick iteracte over diffrent learners, with different batch sizes, aug transforms, etc
# AIM: To set up a loop which will iterate over different things I want to try, calculate the results and print them out

class trainExperiments():
    default_nn = resnet18
    default_dataset = df_to_train
    default_cbs = []
    default_metrics = F1Score(average="macro")
    default_bs = 64
    default_item_tfms = [Resize(600)]
    default_batch_tfms = [*aug_transforms(size=600, min_scale=0.75)]
    experiment_records = "ExpRecords"
    
    def __init__(self, experiment_num, run_params = {}):
        self.cbs = []
        self.learner = None
        self.dataset = run_params["dataset"] if "dataset" in run_params.keys() else self.default_dataset
        self.cbs = run_params["cbs"] if "cbs" in run_params.keys() else self.default_cbs
        self.net = run_params["nn"] if "nn" in run_params.keys() else self.default_nn
        self.metrics = run_params["metrics"] if "metrics" in run_params.keys() else self.default_metrics
        self.bs = run_params["bs"] if "bs" in run_params.keys() else self.default_bs
        self.item_tfms = run_params["item_tfms"] if "item_tfms" in run_params.keys() else self.default_item_tfms
        self.batch_tfms = run_params["batch_tfms"] if "batch_tfms" in run_params.keys() else self.default_batch_tfms
        self.exp_num = experiment_num
        self.log_name = "Experiment"+str(self.exp_num)
        self.text_log = None
        self.iter = 0
        self.record_in_one_file = True
        self.record_file_name = self.experiment_records
    
    def log_after_epoch(self, *args, **kwargs):
        print("One epoch complete")
        pass
#         with open(self.log_name+"log.txt", "a+") as log_file:
#             log_file.write(get_hardware_info())
#             log_file.write("--------------\n")
            
    def epoch_writer_wrap(self):        
        return Callback(after_epoch=self.log_after_epoch)
        
    def log_initiate(self, train_settings):
        fields = ['Experiment ID', 'Train Loop', 'Architecture', 'Dataset Size', 'Number of Species', 'Method', 'Epochs', 'Learning Rate', 'Batch Size', 'Item Tfms', 'Batch Tfms', 'Callbacks']
        self.log_name = "Experiment "+str(self.exp_num)
        self.csvlog = CSVLogger(fname=self.log_name, append=True)
        self.cbs.append(self.csvlog)
        self.cbs.append(self.epoch_writer_wrap())
        self.record_file_name = self.record_file_name if self.record_in_one_file else self.log_name + "log"
        if not os.path.isfile(self.record_file_name+".csv"):
            with open(self.record_file_name+".csv", "w") as log_file:
                csvwriter = csv.writer(log_file)
                csvwriter.writerow(fields)
        
    def log_train_loop(self, train_setup):
        fpl = [self.exp_num, self.iter, self.net.__name__, self.dataset.shape[0], len(self.dataset["category_id"].unique()), train_setup["method"], train_setup["n"], train_setup["lr"], self.bs]
        str_fpl = [str(y) for y in fpl]
        fpl_none = [None]*9
        item_tfms_list = re.findall(r'\w+\s+--\s+.+}:', str(self.item_tfms))
        batch_tfms_list = re.findall(r'\w+\s+--\s+.+}:', str(self.batch_tfms))
        first_record = True
        with open(self.record_file_name+".csv", "a+") as log_file:
            csvwriter = csv.writer(log_file)
            if(len(item_tfms_list)==0 and len(batch_tfms_list)==0):
                csvwriter.writerow(ftr_fpl+[None, None, str(self.cbs)])
            else:
                for i in itertools.zip_longest(item_tfms_list, batch_tfms_list):
                    if first_record:
                        first_record = False
                        csvrow = str_fpl+[i[0], i[1], str(self.cbs)]
                    else: csvrow = fpl_none+[i[0], i[1], str(self.cbs)]
                    csvwriter.writerow(csvrow)
    
    def get_dls(self):
        dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
                   get_x=get_x,
                   get_y=get_y,
                   item_tfms = self.item_tfms,
                   splitter = data_splitter,
                   batch_tfms=self.batch_tfms)
        return dblock.dataloaders(self.dataset, batch_size=self.bs)
    
    def update_learner_dls(self, train_setup):
        self.iter += 1
        append = train_setup["append"] if "append" in train_setup.keys() else False
        if "bs" in train_setup.keys(): self.bs = train_setup["bs"]
        if "item_tfms" in train_setup.keys():
            if append:
                self.item_tfms += train_setup["item_tfms"]         
            else:    
                self.item_tfms = train_setup["item_tfms"]
        if "batch_tfms" in train_setup.keys():
            if append:
                self.batch_tfms += train_setup["batch_tfms"]            
            else:    
                self.batch_tfms = train_setup["batch_tfms"]
        self.learner.dls = self.get_dls()
    
    def create_learner(self):
        self.learner = cnn_learner(dls=self.get_dls(), arch=self.net, metrics=self.metrics)
        self.learner.add_cbs(self.cbs)
        
    def train_loop(self, train_setup={}):
        # fit, one_batch, fit_one_cycle, fine_tune
        if(len(train_setup)==0): return None
        self.log_train_loop(train_setup)
        n_epoch = train_setup["n"]
        lr = train_setup["lr"]
        if "freeze" in train_setup.keys(): self.learner.freeze()
        if train_setup["method"] == "fit":
            self.learner.fit(n_epoch, lr)
        if train_setup["method"] == "all_batches":
            self.learner.all_batches()
        if train_setup["method"] == "fit_one_cycle":
            self.learner.fit_one_cycle(n_epoch, lr)
        if train_setup["method"] == "fine_tune":
            self.learner.fine_tune(n_epoch, lr)
        if "freeze" in train_setup.keys(): self.learner.unfreeze()
                
    def run_experiment(self, train_settings=[{}]):
        self.log_initiate(train_settings)
        self.get_dls()
        self.create_learner()
        for train_setup in train_settings:
            self.update_learner_dls(train_setup)
            self.train_loop(train_setup)
        self.learner.remove_cbs(self.cbs)
        
    def continue_experiment(self, train_settings=[{}]):
        self.learner.add_cbs(self.cbs)
        if self.learner is None:
            raise Exception("Run an experiment first")
        else:
            for train_setup in train_settings:
                self.update_learner_dls(train_setup)
                self.train_loop(train_setup)
        self.learner.remove_cbs(self.cbs)

structure = [{"method":"fit_one_cycle",  "n":9, "lr": 0.02, "append": False, "change_dls": False, "item_tfms":[], "batch_tfms":[], "bs":32}, {}]

In [None]:
learning_program = [   
    [{"nn":resnet34, "dataset":df_to_train, "bs":60, "item_tfms":[Resize(600)], "batch_tfms":[*aug_transforms(size=224, min_scale=0.1, max_rotate=90)]}, 
     [ 
        {"method":"fine_tune", "lr":0.003, "n":7, "bs":60}
     ]
    ]
]

In [None]:
print("Starting the training process!")
ds1 = learning_program[0]
le = trainExperiments(randomstr(), ds1[0])
le.run_experiment(ds1[1])
le.learner.save('finalout')

In [None]:
# for x in learning_program:
#     le = trainExperiments(randomstr(), x[0])
#     le.run_experiment(x[1])

In [None]:
le.learner.show_training_loop()

In [None]:
# le.learner.lr_find()

In [None]:
break

In [None]:
# del le.learner
# gc.collect()
# torch.cuda.empty_cache()

In [None]:
gc.collect()
torch.cuda.empty_cache()

In [None]:
import shutil
shutil.make_archive('./expzip','zip','/kaggle/working')


In [None]:

# [{"nn":resnet34, "dataset":df_to_train, "bs":64, "item_tfms":[Resize(600)], "batch_tfms":[*aug_transforms(size=256, min_scale=0.1, max_rotate=90)]}, 
#  [ 
#     {"method":"fine_tune", "lr":0.003, "n":12, "bs":64}
#  ]
# ]