## First of All, credit goes to [Yaroslav Isaienkov](https://www.kaggle.com/ihelon/brain-tumor-eda-with-animations-and-modeling) for the great notebook to study and inspiration 

> ### His Original NB @ https://www.kaggle.com/ihelon/brain-tumor-eda-with-animations-and-modeling/notebook
> 1st run gave an 0.60 accuracy on LB w/ no changes


### The Format of This Notebook Will be as Follows:
*        Markup Code-Block Label
1.                   The Original Code Block
1.                   The Annotated Code Block
1.                 **[Bonus] And the occasional demonstration of what functions are doing in the main code block in a separate code block**

### The Goal/Motivation
> I dived head first into this notebook for several long hours trying to annotate this notebook so that I could improve my own skills and understanding

> By posting the culmination of my annotations of this notebook so far the idea is that other will be able to glean more than they would otherwise(especially beginners)

> And lastly, I left a few question within the Annotations that I couldn't find the answers to. Feedback by those who know what I am missing to connect the dots is SUPER appreciated

<div style="color:white;
           display:fill;
           border-radius:5px;
           background-color:#5642C5;
           font-size:110%;
           font-family:Verdana;
           letter-spacing:0.5px">

<p style="padding: 5px;
              color:white;">
                    Annotations Begin Here: 
</p>               
</div>


## imports

In [None]:
import os
#for file management
import json
#for standardized data storage
import glob
#for precise file selection(low verbosity)
import random
#random amount generator
import collections
#has premade datastructure objects that can be implemented
import numpy as np
#linear algebra
import pandas as pd
#succinct array and data handling
import pydicom
#c based dicom modulation tool
from pydicom.pixel_data_handlers.util import apply_voi_lut
#pixel handler that uses voi_lut to grab pixel data from within the frame of a window
import cv2
#image data handler
import matplotlib.pyplot as plt
#data visualization library
import seaborn as sns
#data visualization library

## dataloading

In [None]:
train_df = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv")
#load data into train_df
train_df

## Unannotated

In [None]:
def load_dicom(path):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    data = data - np.min(data)
    if np.max(data) != 0:
        data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data


def visualize_sample(
    brats21id, 
    slice_i,
    mgmt_value,
    types=("FLAIR", "T1w", "T1wCE", "T2w")
):
    plt.figure(figsize=(16, 5))
    patient_path = os.path.join(
        "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/", 
        str(brats21id).zfill(5),
    )
    for i, t in enumerate(types, 1):
        t_paths = sorted(
            glob.glob(os.path.join(patient_path, t, "*")), 
            key=lambda x: int(x[:-4].split("-")[-1]),
        )
        data = load_dicom(t_paths[int(len(t_paths) * slice_i)])
        plt.subplot(1, 4, i)
        plt.imshow(data, cmap="gray")
        plt.title(f"{t}", fontsize=16)
        plt.axis("off")

    plt.suptitle(f"MGMT_value: {mgmt_value}", fontsize=16)
    plt.show()

## Annotated

In [None]:
#load dicom file from a given path
def load_dicom(path):
    #variable dicom = data returned from pydicom object(see metadata for more)
    dicom = pydicom.read_file(path)
    #variable data = pixel data within the frame(returned in a np array)
    data = dicom.pixel_array
    #variable data = elements of variable data - minima of the array
    data = data - np.min(data)
    #conditional to check if the dicom isn't empty(all elements of pixel data == 0 would indicate an empty dicom)
    if np.max(data) != 0:
        #normalize data(part 1) by dividing by the maxima of the data array(largest pixel quantity), this will produce a fractional quantity that we will scale in the next step
        data = data / np.max(data)
    #normalize data(part 2) we can get our pixel data on the 0-255 scale and the store it as uint8 
    '''
    why uint8 instead of merely leaving it as np data?
        as it turns out none really, the only reason I could find is, "you can pass it directly to 
        functions requiring pointers to the bytes like C functions, whereas for Data you have to do
        a bunch more gymnastics."-Jacob King from stackoverflow post https://stackoverflow.com/questions/39873282/data-vs-uint8
        
        i reckon it may mesh better with the pydicom library but don't know if thats necessarily true as they would have likely
        accounted for merely 8 bit data and not necessarily what you called it whether it be byte data, uint8, np data, etc etc
        
       "Uint8 is specially used to store various images (including RGB, grayscale images, etc.), ranging from 0 to 255." 
    '''
    data = (data * 255).astype(np.uint8)
    #worth noting at this stage that you technically might have been okay without handling the pixel data so thoroughly(speaking from experience) but it is a great way to just avoid
    #possible issues in the future and allows for higher liklihood of reusability in future projects
    return data


def visualize_sample(
    brats21id, #id between 1 and 1006
    slice_i, #varying amount of slices in given file e.g. "Image-1.dcm"
    mgmt_value, #0.0-1.0 rating 
    types=("FLAIR", "T1w", "T1wCE", "T2w") #4 subfiles that contain slices of each different type of image
):
    #generates a matplot figure with given width and height
    plt.figure(figsize=(16, 5)) 
    #variable patient_path is set to the give file location and str(brats21id).zfill(5)
    patient_path = os.path.join(str(brats21id).zfill(5), "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/",
    )
    '''
    brats21id == value at given Brats21ID column location changed to string from int for functionality within the os.path.join()
        *note that the brats21id itself started out as a numeric value of say 1, thus necessitating the following zfill*
    .zfill(#) is a builtin python function that will return the operated value(e.g. 1) with the as many indices filled with 0 leftover and then the operated value(e.g. 00001)
    '''
      
        
    for i, t in enumerate(types, 1):
        '''
        for the indice/key i and the corresponding value t  within the dict 'types' starting from 1 do the following(stringent on the next few lines of logic)
            the types dict is made up of FLAIR, T1w, T1wCE, T2w
        '''
        #t-paths variable set to be the sorted values(ascending order numerically) t is abbreviating the types 
        '''
        #variable t_paths is set to be the path of the patient path + the current type str value + the rest of the files within that given type path
             the key= statement is a parameter of sorted, that dictates the order in which it reads the tuple itself
                 as the .split() function is used here we then know we must have created a list that is comprised of the file paths of all of the contents of the folder T2w
                    we have a comma after the key= statement to signify that we are not setting reverse= to True or False at all
                    !!!!!if anyone has a better interpretation PLEASE leave a comment and I will upvote to the moon!!!!!!
            '''
        t_paths = sorted(
            glob.glob(os.path.join(patient_path, t, "*")), 
            key=lambda x: int(x[:-4].split("-")[-1]),
        )
        #set data variable to the associated RIA_image object data that is a 3D array of pixel data of the file pointed to within the t_paths
        #array index. the length of the t_paths array which would be the total amount of slices so we probably dont want 258 images, so it is
        #then multiplied by a fractional slice_i component to have less to look at
        #*note that we have 2d pixel array data but it still will work with this library's backend work
        data = load_dicom(t_paths[int(len(t_paths) * slice_i)]) #slice_i was later set to 0.5 for anyone immediately curious
        
        # subplot function takes (m,n,p) where its an mxn grid and p is where the axis forms as specified by p(position)
        #as such, we get a 1 by 4 set of converted dcm images with an i 'dimension' positionally which is slappin(==good news)
        #because i goes from 1 upward so the depth of our visualization is handled effortlessly
        plt.subplot(1, 4, i)
        #plt.imshow is also from matplotlib and will return an image from the given 2d array and make it the color specified in the cmap parameter
        plt.imshow(data, cmap="gray")
        #style decisions for the title using matplotlib, uses a string literal(3.7>) to use the t value we set up so long ago to iterate with and sets a fontsize
        plt.title(f"{t}", fontsize=16)
        #removes the lining that is usually what we would see as the x and y axis intersecting
        plt.axis("off")

    #another title for each respective slice groupthat indicates whether the MGMT value is 1(present) or 0(not present)
    plt.suptitle(f"MGMT_value: {mgmt_value}", fontsize=16)
    #shows the plot
    plt.show()
    
'''
for those interested, load_dicom takes the following parameters as well as more, see the docs here: https://rdrr.io/cran/RIA/src/R/load_dicom.R 

load_dicom <- function(filename, mask_filename = NULL, keep_mask_values = 1, switch_z = TRUE, 
                       crop_in = TRUE, replace_in = TRUE, center_in = TRUE,  zero_value = NULL, min_to = -1024,
                       header_add = NULL, header_exclude = NULL, verbose_in = TRUE,
                       recursive_in = TRUE, exclude_in = "sql",
                       mode_in = "integer", transpose_in = TRUE, pixelData_in = TRUE,
                       mosaic_in = FALSE, mosaicXY_in = NULL, sequence_in = FALSE, ...
)

## [Quick Demonstration] of the sorted comprehension in the 'visualize_sample' function for those confused:

> consider an array of the following contents and length

In [None]:
new  =['00001','00002','00003','00004','00005']

> if we use the :-4 array manuever we will get the following

In [None]:
new[:-4]

> Therefore (x[:-4]) indicates 4 from the end inclusive of the final value you land one, in this case '00001'

> We can then conclude that int(x[:-4].split("-")[-1] is then grabbing the -4 value, which is T2w from the types (probably the last col Original author was evaluating)

>list and then using the split function to create an array of the paths of the files of the slices, which is thereby 

>a list that has all of the paths to the slices within the given type, T2w

see the next code cell for optional confirmation of the sorted comprehension analysis

In [None]:
''' Run this cell for confirmation of what values are being pulled out with our sorted comprehension'''
types=("FLAIR", "T1w", "T1wCE", "T2w")
# brats21id, #id between 1 and 1006
# slice_i, #varying amount of slices in given file e.g. "Image-1.dcm"
# mgmt_value, #0.0-1.0 rating 
for i, t in enumerate(types, 1):
    '''
    for the indice/key i and the corresponding value t  within the dict 'types' starting from 1 do the following(stringent on the next few lines of logic)
        the types dict is made up of FLAIR, T1w, T1wCE, T2w
    '''
    #t-paths variable set to be the sorted values(ascending order numerically) t is abbreviating the types 
    '''
    #variable t_paths is set to be the path of the patient path + the current type str value + the rest of the files within that given type path
         the key= statement is a parameter of sorted, that dictates the order in which it reads the tuple itself
             as the .split() function is used here we then know we must have created a list that is comprised of the file paths of all of the contents of the folder T2w
                we have a comma after the key= statement to signify that we are not setting reverse= to True or False at all
        '''
    t_paths = sorted(
        glob.glob(os.path.join(patient_path, t, "*")), 
        key=lambda x: int(x[:-4].split("-")[-1]),
    )

t_paths

## Unannotated

In [None]:
def load_dicom(path):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    data = data - np.min(data)
    if np.max(data) != 0:
        data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data


def visualize_sample(
    brats21id, 
    slice_i,
    mgmt_value,
    types=("FLAIR", "T1w", "T1wCE", "T2w")
):
    plt.figure(figsize=(16, 5))
    patient_path = os.path.join(
        "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/", 
        str(brats21id).zfill(5),
    )
    for i, t in enumerate(types, 1):
        t_paths = sorted(
            glob.glob(os.path.join(patient_path, t, "*")), 
            key=lambda x: int(x[:-4].split("-")[-1]),
        )
        data = load_dicom(t_paths[int(len(t_paths) * slice_i)])
        plt.subplot(1, 4, i)
        plt.imshow(data, cmap="gray")
        plt.title(f"{t}", fontsize=16)
        plt.axis("off")

    plt.suptitle(f"MGMT_value: {mgmt_value}", fontsize=16)
    plt.show()

#sample visualizations
for i in random.sample(range(train_df.shape[0]), 10):
    _brats21id = train_df.iloc[i]["BraTS21ID"]
    _mgmt_value = train_df.iloc[i]["MGMT_value"]
    visualize_sample(brats21id=_brats21id, mgmt_value=_mgmt_value, slice_i=0.5)

## Annotated

In [None]:
def create_animation(ims):
    #generates a figure with given width and height
    fig = plt.figure(figsize=(6, 6))
    #removes the x and y axis
    '''
    To get rid of whitespace around the border, we can set bbox_inches='tight' 
    in the savefig() method. Similarly, to remove the white border around the 
    image while we set pad_inches = 0 in the savefig() method.
    '''
    plt.axis('off')
    #sets im varibale to the current img in the ims array @ indice 0 with a grey color map
    im = plt.imshow(ims[0], cmap="gray")

    def animate_func(i):
        #set_array() function sets the color array of the given image(im assuming that it just set RGB, i have not used this functionality)
        im.set_array(ims[i])
        return [im]
    '''
    class matplotlib.animation.FuncAnimation(fig, func, frames=None, init_func=None, fargs=None, save_count=None, *, cache_frame_data=True, **kwargs)[source]
    the animation.FuncAnimation() is a matplot function that takes your fig parameters, the function you defined in animate_func() ->this is quite clever actually,
    the way it works is that for every time it is called it will load a new frame so you can effectively scroll through the entire slide of slices of a file in one condensed
    animation.
    
    the frames parameter is the data "Source of data to pass func and each frame of the animation" so what I am conceptualizing here is that the len(ims) indicates the 
    total number of frames that will exist in this animation and we are iterating through the ims array and providing a frame for each indice so that works well logically
    
    the interval is just the delay between each animation in seconds, as such, 1000//24 = 41 and the units are ms(milleseconds). not sure why exactly he chose to do 41 or why
    he set up the eqn to do so. the default would have been 200 ms for those curious.
    
    documentation for this function: https://matplotlib.org/stable/api/_as_gen/matplotlib.animation.FuncAnimation.html#matplotlib.animation.FuncAnimation
    '''
    return animation.FuncAnimation(fig, animate_func, frames = len(ims), interval = 1000//24)

## Unannotated

In [None]:
def load_dicom_line(path):
    t_paths = sorted(
        glob.glob(os.path.join(path, "*")), 
        key=lambda x: int(x[:-4].split("-")[-1]),
    )
    images = []
    for filename in t_paths:
        data = load_dicom(filename)
        if data.max() == 0:
            continue
        images.append(data)
        
    return images

## Annotated

In [None]:
#create an images array that can be used as ims in the create_animation funnction above. 
def load_dicom_line(path):
    #uses the same logic as def visualize_sample , except it grabs from the inserted path. so it will grab all contents of a given patient subfolder,
    #return the imgs in an array of file names
    t_paths = sorted(
        glob.glob(os.path.join(path, "*")), 
        key=lambda x: int(x[:-4].split("-")[-1]),
    )
    #create an images array to store the data values within each file path from t_paths
    images = []
    #a loop to convert all file paths to their respective data values and send to the images array
    for filename in t_paths:
        data = load_dicom(filename)
        #contingency for an empty data value(blank dcm file)from a filepath such that it won't save that dcm data to the img array and will move to the next value
        if data.max() == 0:
            continue
        images.append(data)
        
    return images

## [Quick Demonstration] What the " if data.max() == 0: " did in the background

In [None]:
import numpy as np
#quick note that the array.max() function works for numpy arrays, not regular python arrays
array = np.array([0,0,0,0])
new = []
for _ in array:
    if array.max() == 0:
        continue
    new.append(_)
print(new)

<h1> modeling imports </h1>

In [None]:
#he imports a reimplementation of the EfficientNet library that has pretrained models and saves the file path to the variable package_path
package_path = "../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master/"
#he imports the sys module. the sys module allows for the manipulation of versioning of python and the sys.path environment variable allows for the 
#searching of all of the python variables within the file
import sys 
#this effectively acts as an import statement except it is grabbing from a file within kaggle that I am guessing kaggle doesn't usually provide(thus
#necessitating this line to use the library of choice)
#another way to phrase it: you are placing the python module of choice into the same path that you are currently working with so that can access said functions universally
sys.path.append(package_path)

#the time library is a c-based library for python. The author uses time() frmom this library frequently in order to 
#return the time in seconds since the epoch as a floating point number. A good way to watch your training progress quantitatively 
import time

#torch is the pytorch library that acts as gpu level linear algebra
import torch

# the nn module is very extensive. it contains functions for initializing container, conv layer, pooling layers, padding layers, non-linear activations, and more
# i feel the need to highlight the importance of this module a bit more: this module eliminates any vectorization by the kaggler themself, but it wouldn't be a 
# bad idea to learn how the underlying principles work both from a backend code and statistics perspective. for this I recommend the nn from scratch by 
# sentdex(https://www.youtube.com/watch?v=Wo5dMEP_BbI&list=PLQVvvaa0QuDcjD5BAw2DxE6OF2tius3V3&ab_channel=sentdex) and 
# statquest(https://www.youtube.com/user/joshstarmer) respectively
from torch import nn 

#the module data from pytorch provides easily accessible functions that allow for batch creation(especially useful for large dataset handling) and other handy
#dataloading utility.
#documentation:https://pytorch.org/docs/stable/data.html
from torch.utils import data as torch_data

#sklearn model selection module that provides us with functions to easily split data into training data and testing data
#documentation: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html
from sklearn import model_selection as sk_model_selection

#imports the module with the torch nn library that is responsible for conv nets, pooling functions, loss functions, and more as also mentioned in torch nn import above
from torch.nn import functional as torch_functional

#efficientnet pytorch library import for faster conv net training speeds(i suppose that in order to get that edge you should aim to pay attention to groups like
#these's progress on their model creation and updates)
#documentation/website: https://pypi.org/project/efficientnet-pytorch/
!pip install --upgrade efficientnet-pytorch
import efficientnet_pytorch

#imports the stratified k fold functionality from the sklearn library(allows for easier paramaterization of data organization within splits)
#documentation: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html
from sklearn.model_selection import StratifiedKFold

## Unannotated

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True


set_seed(42)

## Annotated

In [None]:
#this function appears toset every available possible seed generator to whatever the input seed value is in order to guarantee reproducibility
#*this could be an incorrect interpretation of the intention, but if it is right its kinda humorous, like a very thorough wack-a-mole
def set_seed(seed):
    #pseudo-random number generator function from python standard library that is deterministic on the input of the 'seed' value in this case
    '''
    "Use a random.seed() function with other random module functions to reproduce their output again and again."
    source-> https://pynative.com/python-random-seed/
    '''
    random.seed(seed)
    
    '''
    "PYTHONHASHSEED. If this variable is not set or set to random, 
    a random value is used to seed the hashes of str, bytes and datetime objects.
    If PYTHONHASHSEED is set to an integer value, it is used as a fixed seed for 
    generating the hash() of the types covered by the hash randomization.
    " source-> https://askinglot.com/what-is-pythonhashseed
    '''
    os.environ["PYTHONHASHSEED"] = str(seed)
    
    #makes the random number predictable, the generated random numbers will be the same for the same seed value for np functions(see next code block for example)
    #src: https://stackoverflow.com/questions/21494489/what-does-numpy-random-seed0-do
    np.random.seed(seed)
    #makes the random number predictable, the generated random numbers will be the same for the same seed value for torch functions
    torch.manual_seed(seed)
    #if we have a gpu(s) available to us
    if torch.cuda.is_available():
        #Sets the seed for generating random numbers on all GPUs. Itâ€™s safe to call this function if CUDA is not available
        torch.cuda.manual_seed_all(seed)
        #CuDNN picks the same algorithms from the set they have available.
        #source: (user:tom) https://discuss.pytorch.org/t/what-is-the-differenc-between-cudnn-deterministic-and-cudnn-benchmark/38054/2
        torch.backends.cudnn.deterministic = True

#set every seed available for psuedo random number generation to the integer value 42
set_seed(42)

## [Quick Demonstration] of numpy.random.seed()

In [None]:
import numpy as numpy
numpy.random.seed(0) ; numpy.random.rand(4)

In [None]:
numpy.random.seed(0) ; numpy.random.rand(4)#outputs the same random values due to the same seed and same input array len

## Unannotated

In [None]:
df = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv")
df_train, df_valid = sk_model_selection.train_test_split(
    df, 
    test_size=0.2, 
    random_state=42, 
    stratify=train_df["MGMT_value"],
)

## Annotated

In [None]:
#sets the df variable to the cotents of the train_labels.csv(we create a dataframe that we will be training with)
df = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv")
#using the previously imported sklearn model selection library to split our dataframe and preprocess our data to be trained with
df_train, df_valid = sk_model_selection.train_test_split(
    df, #chosen dataframe
    test_size=0.2, #chosen fraction of test data to train with
    random_state=42, #randomization of the splitting of the data and putting it together into a random distribution of 
    #test data(we don't want to just read our data in order for risk of the data being collected in a bias way beforehand)
    stratify=train_df["MGMT_value"], #we split our train data proportionally to the amount of a given "MGMT_value". so like we have equal amounts of 0 and 1 values so that we don't create a skewed model
)

## [Context For the Next Few Code Blocks] 
In the following code block we will be creating a pytorch dataloader. it should be able to generate a dataset that takes the features we want and sets
up a pytorch compatible dataset.

the main concept is that we are creating ids and corresponding data and then making that into a dictionary (it wil also return the 'tensor' channel ID(our data path) and the label)

The reason we are making the dataset afresh from what we were initially given has to do with the fact that we want:
* modularity
* batching
* multiprocessing
* pytorch dataset utility functions

## Unannotated

In [None]:
class DataRetriever(torch_data.Dataset):
    def __init__(self, paths, targets):
        self.paths = paths
        self.targets = targets
          
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, index):
        _id = self.paths[index]
        patient_path = f"../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/{str(_id).zfill(5)}/"
        channels = []
        for t in ("FLAIR", "T1w", "T1wCE"): # "T2w"
            t_paths = sorted(
                glob.glob(os.path.join(patient_path, t, "*")), 
                key=lambda x: int(x[:-4].split("-")[-1]),
            )
            # start, end = int(len(t_paths) * 0.475), int(len(t_paths) * 0.525)
            x = len(t_paths)
            if x < 10:
                r = range(x)
            else:
                d = x // 10
                r = range(d, x - d, d)
                
            channel = []
            # for i in range(start, end + 1):
            for i in r:
                channel.append(cv2.resize(load_dicom(t_paths[i]), (256, 256)) / 255)
            channel = np.mean(channel, axis=0)
            channels.append(channel)
            
        y = torch.tensor(self.targets[index], dtype=torch.float)
        
        return {"X": torch.tensor(channels).float(), "y": y}

## Annotated

In [None]:
#characterizes a pytorch dataset such that it can be handled like one(src: https://stanford.edu/~shervine/blog/pytorch-how-to-generate-data-parallel)
class DataRetriever(torch_data.Dataset):#parameter is a map-style dataset that implements 
                                        #"__getitem__() and __len__() protocols, and represents a map from (possibly non-integral) indices/keys to data samples"
    def __init__(self, paths, targets):
        #initialization of the choice path and target for the data we will generate
        self.paths = paths
        self.targets = targets
          
    def __len__(self):
        #the total number of samples we will be generating
        return len(self.paths)
    
    def __getitem__(self, index):
        #generating a sample of the data
        _id = self.paths[index]
        #set the _id variable to the element of the index of the paths array
        patient_path = f"../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/{str(_id).zfill(5)}/"
        #patient_path variable is set using an f string in order to dynamically adjust the _id value and subsequently prepend zeros to create a str that represents a path from
        #our data set, i.e. "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00066/"
        channels = []
        for t in ("FLAIR", "T1w", "T1wCE"): # author comment [ "T2w" ]-> he said but it really output the contents of T1wCE as it is the last indice
            t_paths = sorted(
                glob.glob(os.path.join(patient_path, t, "*")), 
                key=lambda x: int(x[:-4].split("-")[-1]),
            )
            # author comment [ start, end = int(len(t_paths) * 0.475), int(len(t_paths) * 0.525) ] 
            # x variable holds the length of the array that contains the list that contains every slice from the T1wCE folder within patient #00000
            
            '''
            okay as kaggle user @Aman Arora so eloquently put it here: https://www.kaggle.com/ihelon/brain-tumor-eda-with-animations-and-modeling/comments#1388761
                the following data loading sequence grabs an effectively random small subsection of the data we have to work with, averages the chosen slice 
                and does so for 3 of the 4 image type and uses those 'channels' array in order to train with. this code block's dataloading class is also one of two. This one
                is used to train the data(thus why we take paths and targets) and the next one will exclude the targets and use the paths of the test data and predict off of that
                    absoluetly no disrespect to main author, however. this notebook has been fantastic to annotate thus far providing a ridiculous amount of insights
            '''
            
            x = len(t_paths)
            #if the length of t_paths array is less than 10
            if x < 10:
                r = range(x) #r =  the range of between 0 and whatever the sub 10 value length of the t_paths array
            else:
                #if x is in the double digits, d = 1 for 10-19, 2 for 20-29, etc...
                d = x // 10
                #the python range function takes parameters (start, stop(stop point not included), step)
                r = range(d, x - d, d) # so range( some arbitrary amount, length of t_path-arbirary amount, with the step being the size of the arbitrary amount)
                #current thought process is that the author might have made it such that the len of the r array will always be consistent but I haven't been able to check that yet, feel free to comment if it is clear to you
            channel = []
            # for i in range(start, end + 1):
            for i in r:
                channel.append(cv2.resize(load_dicom(t_paths[i]), (256, 256)) / 255)
                #add the data from the corresponding dicom data(that is converted to image data) at the indice i within the t_paths array. The cv2.resize function effectively 
                #changes the size of the data within the loaded data according to the input parameters, in this case:
                
            channel = np.mean(channel, axis=0) #takes the column-wise mean of the channel array
            channels.append(channel) # oh my lanta, I think this mad lad made a 2d representation of 3 dimensional data by considering each slice in one image as 'channels' with this process(correct me if misinterpreted that)
            #array made up of multiple 'channel' mean arrays
            
        y = torch.tensor(self.targets[index], dtype=torch.float)
        #sets the y value to the label of the chosen index that is passed with the precision of a float data type(some on the pytorch forumns state double is sufficient)
        
        return {"X": torch.tensor(channels).float(), "y": y}
        #returns the channels array in torch.tensor formatting and the y value

## [Quick Demonstration] 
#### Breaking down the following logic;

channel.append(cv2.resize(load_dicom(t_paths[i]), (256, 256)) / 255)

In [None]:
#raw dcm to image data
image_data = load_dicom('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00046/T2w/Image-300.dcm')#this is an indice that the t_path would be contain
size = image_data.size
dimensions = image_data.ndim
shape = image_data.shape
print("size: "+ str(size), "dimensions: "+ str(dimensions), "shape: "+ str(shape))

In [None]:
#raw image data resized to 256,256
image_data = cv2.resize(image_data, (256,256))
dimensions = image_data.ndim
shape = image_data.shape
print("size: "+ str(size), "dimensions: "+ str(dimensions), "shape: "+ str(shape))

In [None]:
#resized image data normalized with 255
image_data = (image_data / 255 )
dimensions = image_data.ndim
shape = image_data.shape
print("size: "+ str(size), "dimensions: "+ str(dimensions), "shape: "+ str(shape))

In [None]:
print(image_data, image_data[0])#2d array of empty image data(we are expecting values .003 and 1.003 [256/1 and 256/255])

## [Quick Demonstration] what is in t_paths?

In [None]:
#quick side-note: what is in t_paths?
patient_path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000' #set this to a temp variable for analysis
for t in ("FLAIR", "T1w", "T1wCE"): # "T2w"
    t_paths = sorted(
        glob.glob(os.path.join(patient_path, t, "*")), 
        key=lambda x: int(x[:-4].split("-")[-1]),
    )
print(t_paths)#outputs a list that contains every slice from the T1wCE folder within patient #00000

## [Quick Context] for next annotation code block;

In [None]:
df_train["BraTS21ID"].values

In [None]:
df_train["MGMT_value"].values

In [None]:
train_data_retriever = DataRetriever(
    df_train["BraTS21ID"].values, 
    df_train["MGMT_value"].values, 
)

valid_data_retriever = DataRetriever(
    df_valid["BraTS21ID"].values, 
    df_valid["MGMT_value"].values,
)

### A better look at the pytorch DataRetriever object;
> more information: https://stanford.edu/~shervine/blog/pytorch-how-to-generate-data-parallel

## [Quick Context] for next annotation code block;

In [None]:
print(train_data_retriever)#the pytorch dataset object memory location we created with the the DataRetriever Class

In [None]:
print(train_data_retriever[100])#access the train_data_retriever pytorch object and see the dictionary that it generates with the given BraTs21ID and MGMT_value values

In [None]:
print(train_data_retriever[100]["X"])#access the contents of the keys of the pytorch dataretriever object dictionary

In [None]:
train_data_retriever[100]["X"].numpy()[0]#access the layer of the object that is contains a numpy array of the image data at the index 0

## Unannotated

In [None]:
plt.figure(figsize=(16, 6))
for i in range(3):
    plt.subplot(1, 3, i + 1)
    plt.imshow(train_data_retriever[100]["X"].numpy()[i], cmap="gray")

## Annotated

In [None]:
#visualizing the data we have stored in the DataRetriever pytorch object
plt.figure(figsize=(16, 6))
#use matlab to create a figure that has the dim width 16 by height 6 (by the way, figsize by default is 6by4 with units in inches, src: https://www.pythonpool.com/matplotlib-figsize/)
for i in range(3):
    plt.subplot(1, 3, i+1)#create a subplot w/ following params(row=1,columns=3, index=i+1 or simply 4[bc 0,1,2,3])
    #subplot uses 1 based indexing and includes the last mention so i+1 equates to iterating through the contents of the indices 1,2,3 in the train_data_retriever 
    plt.imshow(train_data_retriever[100]["X"].numpy()[i], cmap="gray")

## Unannotated

In [None]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = efficientnet_pytorch.EfficientNet.from_name("efficientnet-b0")
        checkpoint = torch.load("../input/efficientnet-pytorch/efficientnet-b0-08094119.pth")
        self.net.load_state_dict(checkpoint)
        n_features = self.net._fc.in_features
        self.net._fc = nn.Linear(in_features=n_features, out_features=1, bias=True)
    
    def forward(self, x):
        out = self.net(x)
        return out

## Annotated

In [None]:
#"base class for all neural network modules"-> source: https://pytorch.org/docs/stable/generated/torch.nn.Module.html
# same source: "all models used should also be a subclass to this class"
class Model(nn.Module):
    def __init__(self):
        #initialization 
        super().__init__()
        #the super() method allows us to access methods from a parent class from within the child class 
        self.net = efficientnet_pytorch.EfficientNet.from_name("efficientnet-b0")
        #here the author has chosen the 'efficientnet-b0' from the EfficientNet library which, based on the graphing on 
        #this blog(https://ai.googleblog.com/2019/05/efficientnet-improving-accuracy-and.html)is inferior to the efficientnet-b7
        checkpoint = torch.load("../input/efficientnet-pytorch/efficientnet-b0-08094119.pth")
        #here we are using torches unpickling capabilities and loading an existing, deserialized model from the efficient net library
        self.net.load_state_dict(checkpoint)
        #here we load the state dict of the checkpoint variable which gives us access to the learnable params such as the conv layers used as well as the hyperparameters
        n_features = self.net._fc.in_features
        #here we set the variable n_features to the in_features contained in the state_dict of the loaded model(the size of each input sample)
        #more insight by user:hktxt @ https://discuss.pytorch.org/t/how-to-modify-the-final-fc-layer-based-on-the-torch-model/766/23
        #also see the docs for fc.in_features here: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/linear.py
        self.net._fc = nn.Linear(in_features=n_features, out_features=1, bias=True)
        #applies a linear transformation using the features we extracted from the state_dict earlier and set the bias to true
        #bias refers to the additive b in the  y = mX^n + b linear function 
        
    def forward(self, x):
        out = self.net(x) # an example of the forward feeding net we could use is F.relu(self.conv1(x)) , where F is what we imported torch.nn.functional as
        return out

### More info on the comments from the previous code block can be found here:

> self.net.load_state_dict(): [one](https://pytorch.org/tutorials/beginner/saving_loading_models.html) and [two](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict)

#### From the source: " a state_dict is a python dictionary object that maps each layer to its parameter tensor(where the parameters within layers are learnable, i.e. conv layers, linear layers, and running_mean for bathnorm). optimizer objects(torch.optim) also have information about the optimizers state and the hyperparameters used "


## Unannotated

In [None]:
class LossMeter:
    def __init__(self):
        self.avg = 0
        self.n = 0

    def update(self, val):
        self.n += 1
        # incremental update
        self.avg = val / self.n + (self.n - 1) / self.n * self.avg

        
class AccMeter:
    def __init__(self):
        self.avg = 0
        self.n = 0
        
    def update(self, y_true, y_pred):
        y_true = y_true.cpu().numpy().astype(int)
        y_pred = y_pred.cpu().numpy() >= 0
        last_n = self.n
        self.n += len(y_true)
        true_count = np.sum(y_true == y_pred)
        # incremental update
        self.avg = true_count / self.n + last_n / self.n * self.avg

## Annotated

In [None]:
#create a class responsible for accounting for the loss while training
class LossMeter:
    def __init__(self):
        #initialize values
        self.avg = 0 #set avg to 0 to begin with
        self.n = 0 #set n(count) to 0 to begin with

    def update(self, val):
        self.n += 1
        #author comment: [ incremental update ] 
        self.avg = val / self.n + (self.n - 1) / self.n * self.avg
        #sets the average loss to the value passed divided by what is effectively;
        '''
        val / self.n       +    [    (self.n - 1)/self.n      *        self.avg   ]
        !!!!presently not sure why we set self.avg like this, please leave a comment if it is something clear that I am missing here!!!!
        '''

#create a class responsible for updating the change in accuracy of the model
class AccMeter:
    def __init__(self):
        #initialize values
        self.avg = 0 #set avg to 0 to begin with
        self.n = 0 #set n(count) to 0 to begin with
        
        
    def update(self, y_true, y_pred):
        y_true = y_true.cpu().numpy().astype(int)#.cpu() moves the value to the cpu for the operation of changing the value of y_true to a numpy array as type int
        y_pred = y_pred.cpu().numpy() >= 0 #.cpu() moves the value to the cpu for the operation setting array values in pred to True or False for if they are greater
        # than or equal to 0
        last_n = self.n #sets value of last_n to be the most recent n value
        self.n += len(y_true)
        #sets the n value to itself + the length of the array y_true
        true_count = np.sum(y_true == y_pred)
        #true_count is set to the following statement that compares the y_true with the y_pred and equals 1 or 0 for true or false respectively
        # collects the matching amount between the y_true and y_pred array (as per the sum aspect) and returns the total matching amount
        # incremental update
        self.avg = true_count / self.n + last_n / self.n * self.avg
        #average set to the sum of the number of matching predictions divided by the count and the value of the latest count divided by the count multiplied by the current average
        # !!!once again this math has a basis in the docs but I need to look deeper, if you happen to know please leave a comment !!!

## [Quick Demonstration] Understanding the true_count variable from the AccMeter Class:


In [None]:
array1 = np.array([1,2,3,4])
array2 = np.array([1,2,3,3])
yo = np.sum(array1 == array2)
print(yo)

## Unannotated

In [None]:
class Trainer:
    def __init__(
        self, 
        model, 
        device, 
        optimizer, 
        criterion, 
        loss_meter, 
        score_meter
    ):
        self.model = model
        self.device = device
        self.optimizer = optimizer
        self.criterion = criterion
        self.loss_meter = loss_meter
        self.score_meter = score_meter
        
        self.best_valid_score = -np.inf
        self.n_patience = 0
        
        self.messages = {
            "epoch": "[Epoch {}: {}] loss: {:.5f}, score: {:.5f}, time: {} s",
            "checkpoint": "The score improved from {:.5f} to {:.5f}. Save model to '{}'",
            "patience": "\nValid score didn't improve last {} epochs."
        }
    
    def fit(self, epochs, train_loader, valid_loader, save_path, patience):        
        for n_epoch in range(1, epochs + 1):
            self.info_message("EPOCH: {}", n_epoch)
            
            train_loss, train_score, train_time = self.train_epoch(train_loader)
            valid_loss, valid_score, valid_time = self.valid_epoch(valid_loader)
            
            self.info_message(
                self.messages["epoch"], "Train", n_epoch, train_loss, train_score, train_time
            )
            
            self.info_message(
                self.messages["epoch"], "Valid", n_epoch, valid_loss, valid_score, valid_time
            )

            if True:
#             if self.best_valid_score < valid_score:
                self.info_message(
                    self.messages["checkpoint"], self.best_valid_score, valid_score, save_path
                )
                self.best_valid_score = valid_score
                self.save_model(n_epoch, save_path)
                self.n_patience = 0
            else:
                self.n_patience += 1
            
            if self.n_patience >= patience:
                self.info_message(self.messages["patience"], patience)
                break
            
    def train_epoch(self, train_loader):
        self.model.train()
        t = time.time()
        train_loss = self.loss_meter()
        train_score = self.score_meter()
        
        for step, batch in enumerate(train_loader, 1):
            X = batch["X"].to(self.device)
            targets = batch["y"].to(self.device)
            self.optimizer.zero_grad()
            outputs = self.model(X).squeeze(1)
            
            loss = self.criterion(outputs, targets)
            loss.backward()

            train_loss.update(loss.detach().item())
            train_score.update(targets, outputs.detach())

            self.optimizer.step()
            
            _loss, _score = train_loss.avg, train_score.avg
            message = 'Train Step {}/{}, train_loss: {:.5f}, train_score: {:.5f}'
            self.info_message(message, step, len(train_loader), _loss, _score, end="\r")
        
        return train_loss.avg, train_score.avg, int(time.time() - t)
    
    def valid_epoch(self, valid_loader):
        self.model.eval()
        t = time.time()
        valid_loss = self.loss_meter()
        valid_score = self.score_meter()

        for step, batch in enumerate(valid_loader, 1):
            with torch.no_grad():
                X = batch["X"].to(self.device)
                targets = batch["y"].to(self.device)

                outputs = self.model(X).squeeze(1)
                loss = self.criterion(outputs, targets)

                valid_loss.update(loss.detach().item())
                valid_score.update(targets, outputs)
                
            _loss, _score = valid_loss.avg, valid_score.avg
            message = 'Valid Step {}/{}, valid_loss: {:.5f}, valid_score: {:.5f}'
            self.info_message(message, step, len(valid_loader), _loss, _score, end="\r")
        
        return valid_loss.avg, valid_score.avg, int(time.time() - t)
    
    def save_model(self, n_epoch, save_path):
        torch.save(
            {
                "model_state_dict": self.model.state_dict(),
                "optimizer_state_dict": self.optimizer.state_dict(),
                "best_valid_score": self.best_valid_score,
                "n_epoch": n_epoch,
            },
            save_path,
        )
    
    @staticmethod
    def info_message(message, *args, end="\n"):
        print(message.format(*args), end=end)

## Annotated

In [None]:
#creating a trainer class for modularity
class Trainer:
    def __init__(
        #initializes variables 
        self, 
        model, #model of choice
        device, #cpu vs gpu
        optimizer, #optimizer, i.e. Adam
        criterion, #defines what determines if a y_pred is a y_true using cross_entropy w/ logits for example
        '''
        later defined as;
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        criterion = torch_functional.binary_cross_entropy_with_logits
        
        source for understanding how to use this module:https://sebastianraschka.com/faq/docs/pytorch-crossentropy.html
        what is a logit? src: https://www.sciencedirect.com/topics/mathematics/logit-link-function
        '''
        loss_meter, #loss tracker
        score_meter  #accuracy grading
    ):
        self.model = model
        self.device = device
        self.optimizer = optimizer
        self.criterion = criterion
        self.loss_meter = loss_meter
        self.score_meter = score_meter
        
        self.best_valid_score = -np.inf #RUN IN A CODE BLOCK BY ITSELF!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
        self.n_patience = 0 #number of epochs with no improvement after which learning rate will be reduced
        #initially set to zero, will eventually be used in a loop in order to facilitate the saving of the best model while training
        
        #setting up messages for corresponding model analytics while training. We can leverage a staticmethod decorator function populate our messages with relevant data
        #as our model trains
        self.messages = {
            "epoch": "[Epoch {}: {}] loss: {:.5f}, score: {:.5f}, time: {} s", # sets up epoch fstring
            "checkpoint": "The score improved from {:.5f} to {:.5f}. Save model to '{}'", #sets up epoch fstring
            "patience": "\nValid score didn't improve last {} epochs." #sets up pateince fstring
        }
    
    #training our data with our validation set and then saving the path of the model
    def fit(self, epochs, train_loader, valid_loader, save_path, patience):        
        for n_epoch in range(1, epochs + 1):
            #for each epoch within the inclusive range of epochs were working with initialize info_message variable as such
            self.info_message("EPOCH: {}", n_epoch)
            
            train_loss, train_score, train_time = self.train_epoch(train_loader)
            #another source that does a similar train_epoch process: https://github.com/Kulbear/pytorch-project-template/blob/master/trainers/mnist_trainer.py
            valid_loss, valid_score, valid_time = self.valid_epoch(valid_loader)
            
            self.info_message(
                self.messages["epoch"], "Train", n_epoch, train_loss, train_score, train_time
            )
            #it is not immediately clear to me why we initialize info_message twice here
            self.info_message(
                self.messages["epoch"], "Valid", n_epoch, valid_loss, valid_score, valid_time
            )

            if True: #will be true so long as the for loop continues
#            author's comment: [ if self.best_valid_score < valid_score: ]
                self.info_message(
                    self.messages["checkpoint"], self.best_valid_score, valid_score, save_path
                )
                self.best_valid_score = valid_score
                self.save_model(n_epoch, save_path)
                self.n_patience = 0
            else: #will occur when range of epochs is met and will increase n_patience value to 1
                self.n_patience += 1
            
            if self.n_patience >= patience:#if n_patience exceeds the input parameter patience, leave a message about it in the log and break the loop(end the function)
                self.info_message(self.messages["patience"], patience)
                break
            
    #training epoch function using the dataset loaded by train_loader
    def train_epoch(self, train_loader):
        self.model.train()#trains model with dropout layers and BatchNorm actively doing their functions in the background
        t = time.time()#t variable set to the displacement of time since last epoch completion
        train_loss = self.loss_meter() #sets train loss variable
        train_score = self.score_meter() #sets train accuracy score variable
        
        for step, batch in enumerate(train_loader, 1): #enumerate step and batch of the trainloader object starting at 1
            X = batch["X"].to(self.device) #set X to the corresponding numpy array of batch["X"] and do it on the available accelerator
            targets = batch["y"].to(self.device) #set y to the corresponding numpy array of batch["y"] and do it on the available accelerator
            self.optimizer.zero_grad()# "Sets the gradients of all optimized torch.Tensor s to zero" source: https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html
            outputs = self.model(X).squeeze(1) #flatten the row-wise contents of the model results when X(training data) is run through it and save it to outputs
            
            loss = self.criterion(outputs, targets) #use criterion to determine your loss between the outputs and the targets(true)
            loss.backward() #computes the gradient based on the calculated loss

            train_loss.update(loss.detach().item())#update our weight with the detached singular python value that is the loss we
            #calculated in the previous line based on the train_loss which was our  loss_meter we set earlier
            train_score.update(targets, outputs.detach())#update our score based on the targets and outputs(the detach() excludes the calculated gradient that ordinarily gets added on)

            self.optimizer.step() #update the current model by the predefined step with the gradient taken into account
            #great discussion of this her: https://discuss.pytorch.org/t/how-are-optimizer-step-and-loss-backward-related/7350
            
            _loss, _score = train_loss.avg, train_score.avg #set _loss and _score to the average train loss and train score respectively
            message = 'Train Step {}/{}, train_loss: {:.5f}, train_score: {:.5f}' #create a message variable to be read out later by our static method
            self.info_message(message, step, len(train_loader), _loss, _score, end="\r") #set message information to what we have collected thus far
        
        return train_loss.avg, train_score.avg, int(time.time() - t)#return the loss average, score average, and the time that has passed during this epoch
    
    #here we are just using the loaded model from earlier(transfer learning guy), nothing new is learned here(or at least that is what I am interpreting from this
    #like just in case just runnning inference was more effective as a model for our current data set than actually using the data to train with)
    def valid_epoch(self, valid_loader):
        self.model.eval()#eval() therefore no dropout layers or BatchNorm
        t = time.time()
        valid_loss = self.loss_meter()
        valid_score = self.score_meter()

        for step, batch in enumerate(valid_loader, 1):
            with torch.no_grad():#no_grad() therefore no 
                X = batch["X"].to(self.device)
                targets = batch["y"].to(self.device)

                outputs = self.model(X).squeeze(1)
                loss = self.criterion(outputs, targets)

                valid_loss.update(loss.detach().item())
                valid_score.update(targets, outputs)
                
            _loss, _score = valid_loss.avg, valid_score.avg
            message = 'Valid Step {}/{}, valid_loss: {:.5f}, valid_score: {:.5f}'
            self.info_message(message, step, len(valid_loader), _loss, _score, end="\r")
        
        return valid_loss.avg, valid_score.avg, int(time.time() - t)
    
    #function for saving model as zip-file based file format 
    def save_model(self, n_epoch, save_path):
        torch.save(
            { #object values; see docs: https://pytorch.org/docs/stable/generated/torch.save.html
                "model_state_dict": self.model.state_dict(),
                "optimizer_state_dict": self.optimizer.state_dict(),
                "best_valid_score": self.best_valid_score,
                "n_epoch": n_epoch,
            },
            save_path,#file name / location to save to
        )
    
    #staticmethod decorator(i.e. no ability to access or modify class state)
    @staticmethod
    def info_message(message, *args, end="\n"):#indiscriminately executes any message variables with their constituent fstring comprhensions for display in logs
        print(message.format(*args), end=end)#effectively prints out the message in the code logs when the notebook is run(?)

## [Quick Demonstration]
#### What happens when we detach() a tensor? Let's see:
> source: user: ptrblck @ https://discuss.pytorch.org/t/how-to-detach-specific-components-in-the-loss/13983/7

In [None]:
modelA = nn.Linear(10, 10)
modelB = nn.Linear(10, 10)
modelC = nn.Linear(10, 10)

x = torch.randn(1, 10)
a = modelA(x)
b = modelB(a.detach())
b.mean().backward()
print(modelA.weight.grad)
print(modelB.weight.grad)
print(modelC.weight.grad)

c = modelC(a)
c.mean().backward()
print(modelA.weight.grad)
print(modelB.weight.grad)
print(modelC.weight.grad)

## Unannotated

In [None]:
import efficientnet_pytorch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_data_retriever = DataRetriever(
    df_train["BraTS21ID"].values, 
    df_train["MGMT_value"].values, 
)

valid_data_retriever = DataRetriever(
    df_valid["BraTS21ID"].values, 
    df_valid["MGMT_value"].values,
)

train_loader = torch_data.DataLoader(
    train_data_retriever,
    batch_size=8,
    shuffle=True,
    num_workers=8,
)

valid_loader = torch_data.DataLoader(
    valid_data_retriever, 
    batch_size=8,
    shuffle=False,
    num_workers=8,
)

model = Model()
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch_functional.binary_cross_entropy_with_logits

trainer = Trainer(
    model, 
    device, 
    optimizer, 
    criterion, 
    LossMeter, 
    AccMeter
)

history = trainer.fit(
    10, 
    train_loader, 
    valid_loader, 
    f"best-model-0.pth", 
    100,
)

## Annotated

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#set accelerator to gpu if possible

#Retrieve training data
train_data_retriever = DataRetriever(
    df_train["BraTS21ID"].values, 
    df_train["MGMT_value"].values, 
)

#Retrieve validation data
valid_data_retriever = DataRetriever(
    df_valid["BraTS21ID"].values, 
    df_valid["MGMT_value"].values,
)

#load train dataset with desired batch size, data order(shuffle), and workers
train_loader = torch_data.DataLoader(
    train_data_retriever,
    batch_size=8,
    shuffle=True,
    num_workers=8,
)

#load validation dataset with desired batch size, data order(shuffle), and workers
valid_loader = torch_data.DataLoader(
    valid_data_retriever, 
    batch_size=8,
    shuffle=False,
    num_workers=8,
)

#define model
model = Model()
#coordinate device for model to use
model.to(device)

#set optimizer for learning rate of model
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#set criterion for determining true or false classification
criterion = torch_functional.binary_cross_entropy_with_logits

#Train with chosen specifications and save it to the trainer variable
trainer = Trainer(
    model, 
    device, 
    optimizer, 
    criterion, 
    LossMeter, 
    AccMeter
)

#for logging purposes?
history = trainer.fit( #uses fit function from the Train class on the trainer object; should effectively fit the trainer object with the best model's state_dict() (i think? please correct me if I am mis reading this)
    2, #specifying the loader amount(?)
    train_loader, 
    valid_loader, 
    f"best-model-0.pth", #stores the string that states that will alert you in the logs where the best model is stored 
    100, #flush_logs_every_n_steps where n = 100(?)

## Unannotated

In [None]:
models = []
for i in range(1):
    model = Model()
    model.to(device)
    
    checkpoint = torch.load(f"best-model-{i}.pth")
    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()
    
    models.append(model)

## Annotated

In [None]:
models = []
#set variable models to an empty array
for i in range(1):
    #one for loop for on evalue to generate a single model
    model = Model()
    #model variable = the chosen model function
    model.to(device)
    #load model into our choice accelerator
    
    checkpoint = torch.load(f"best-model-{i}.pth")
    #load the unserialized model from pkl to the variable checkpoint using torch library
    model.load_state_dict(checkpoint["model_state_dict"])
    #load the state_dict information that is correspondent to this model(the layers, the hyperparameters, the optim, etc.)
    model.eval()
    #eval() will effectively remove the state_dict info that is responsible for quality training but not inference, for example it removes the dropout layers and
    #the BatchNorm layers(we pair this with no_grad() later on as well as the source also suggests)
    #source: https://stackoverflow.com/questions/60018578/what-does-model-eval-do-in-pytorch
    models.append(model)
    #add our freshly loaded and coordinated model to the models array

## Un/Annotated

In [None]:
models = []
#set variable models to an empty array
for i in range(1):
    #one for loop for on evalue to generate a single model
    model = Model()
    #model variable = the chosen model function
    model.to(device)
    #load model into our choice accelerator
    
    checkpoint = torch.load(f"best-model-{i}.pth")
    #load the unserialized model from pkl to the variable checkpoint using torch library
    model.load_state_dict(checkpoint["model_state_dict"])
    #load the state_dict information that is correspondent to this model(the layers, the hyperparameters, the optim, etc.)
    model.eval()
    #eval() will effectively remove the state_dict info that is responsible for quality training but not inference, for example it removes the dropout layers and
    #the BatchNorm layers(we pair this with no_grad() later on as well as the source also suggests)
    #source: https://stackoverflow.com/questions/60018578/what-does-model-eval-do-in-pytorch
    models.append(model)
    #add our freshly loaded and coordinated model to the models array

#### As far as I understand it, the next code block will be used as a reference for our own submission file generation so that our submission file has the same amount of rows
> open to correction if I am misunderstanding main author's intentions here

## Unannotated

In [None]:
#same DataRetriever logic as before except we exclude the self.targets from the initialization phase as we aim to use this dataretriever for predictions
class DataRetriever(torch_data.Dataset):
    def __init__(self, paths):
        self.paths = paths
          
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, index):
        _id = self.paths[index]
        patient_path = f"../input/rsna-miccai-brain-tumor-radiogenomic-classification/test/{str(_id).zfill(5)}/"
        channels = []
        for t in ("FLAIR", "T1w", "T1wCE"): # "T2w"
            t_paths = sorted(
                glob.glob(os.path.join(patient_path, t, "*")), 
                key=lambda x: int(x[:-4].split("-")[-1]),
            )
            # start, end = int(len(t_paths) * 0.475), int(len(t_paths) * 0.525)
            x = len(t_paths)
            if x < 10:
                r = range(x)
            else:
                d = x // 10
                r = range(d, x - d, d)
                
            channel = []
            # for i in range(start, end + 1):
            for i in r:
                channel.append(cv2.resize(load_dicom(t_paths[i]), (256, 256)) / 255)
            channel = np.mean(channel, axis=0)
            channels.append(channel)
        
        return {"X": torch.tensor(channels).float(), "id": _id}

## Unannotated

In [None]:
submission = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")

test_data_retriever = DataRetriever(
    submission["BraTS21ID"].values, 
)

test_loader = torch_data.DataLoader(
    test_data_retriever,
    batch_size=4,
    shuffle=False,
    num_workers=8,
)

## Annotated

In [None]:
#sets submission to sample_submission.csv
submission = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")

#retrieves sample_submission BraTS21ID values
test_data_retriever = DataRetriever(
    submission["BraTS21ID"].values, 
)

#uses DataLoader to load the sample_submission data as we would with our submission data
test_loader = torch_data.DataLoader(
    test_data_retriever,
    batch_size=4,
    shuffle=False,
    num_workers=8,
)

## Unannotated

In [None]:
y_pred = []
ids = []

for e, batch in enumerate(test_loader):
    print(f"{e}/{len(test_loader)}", end="\r")
    with torch.no_grad():
        tmp_pred = np.zeros((batch["X"].shape[0], ))
        for model in models:
            tmp_res = torch.sigmoid(model(batch["X"].to(device))).cpu().numpy().squeeze()
            tmp_pred += tmp_res
        y_pred.extend(tmp_pred)
        ids.extend(batch["id"].numpy().tolist())

## Annotated

In [None]:
y_pred = [] #set an array for prediction values
ids = [] #set an array for the corresponding ids

for e, batch in enumerate(test_loader):
    #for loop that can reference the element indice(e) and the corresponding key(batch) from the object test_loader
    print(f"{e}/{len(test_loader)}", end="\r") # the end="\r" is just a new line statement
    #prints an f-string that gives the current batch the model is predicting on
    with torch.no_grad():
        #no_grad() indicates that we are disabling gradient descent which is good for inference where we don't want to consume extra memory by avoiding the Tensor.backward() call
        #docs: https://pytorch.org/docs/stable/generated/torch.no_grad.html
        tmp_pred = np.zeros((batch["X"].shape[0], ))
        #initializing a temporary predictions numpy array that has the shape of the batch array
        for model in models:
            tmp_res = torch.sigmoid(model(batch["X"].to(device))).cpu().numpy().squeeze()
            #runs inference using the chosen model on the current batch, gets the predictions, and uses the squeeze function to put the preds into a 1d array(flattens the tensor)
            tmp_pred += tmp_res
            #appends to the tmp_pred array
        y_pred.extend(tmp_pred)
        #extends the y_pred by adding the contents of the tmp_pred array contents to it
        ids.extend(batch["id"].numpy().tolist())
        #extends the numpy id array with the current batch of id's and changes the numpy array to a python list with pytorch tolist() function

## Unannotated

In [None]:
submission = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": y_pred})
submission.to_csv("submission.csv", index=False)

## Annotated

In [None]:
#creating a dataframe out of our selected ids and their constituent predictions
submission = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": y_pred})
#converting the dataframe to a csv with the proper indexing for submission
submission.to_csv("submission.csv", index=False)

## Unannotated

In [None]:
plt.figure(figsize=(5, 5))
plt.hist(submission["MGMT_value"]);

## Annotated

In [None]:
submission