In [3]:
import os
import json
import random
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models


#For Distrubuted (also we can use torch.nn.parallel.DistributedDataParallel instead)
from torch.nn.parallel import DistributedDataParallel

In [None]:
################################
#Mixed precision with Apex
from apex import amp
from apex.optimizers import FusedAdam

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
try:
    os.makedirs("./saved")
except FilesExistsError:
    #directory already exists error
    pass 

In [None]:
parser = argparse.ArgumentParser()
#  For Distrubuted : Parse for the local rank argument , which will be supplied
#automatically by torch.distrubuted.launch
parser.add_argument("--local_rank", default=0, type=int)
args = parser.parse_args()

In [None]:
config = dicts{
    TRAIN_CSV = "../data/train.csv",
    TEST_CSV = "../data/test.csv",
    IMAGE_PATH = "../data/images",
    VOCAB = "labels.json"
    pretrained_path = "./pretrained/resnet50-0676ba61.pth ",
    saved_paths = "./saved/resnet50.pt",
    lr=0.001,
    EPOCHS = 10,
    BATCH_SIZE= 32,
    IMAGE_SIZE =224,
    TRAIN_VALID_SPLIT =0.2,
    ##################################################
    #For Performance Tuning
    device=device,
    SEED= 42
    pin_memory=True,
    num_workers=8,
    ditstributed = True,
    world_size =4
    USE_AMP = False
    channel_last=False 
} 

In [None]:
if config.distributed:
    #FOR distributed : Set the device  according to local rank
    torch.cuda.set_devices(args.local_rank)
    #for Distributed : Intialized the backend . torch.distributed.launch will provide
    #enviornment varibales and requires that you use init_methods 'env://'
    torch.distributed.init_process_group(backend='nccl',
                                        init_method='env://')

In [None]:
#for custom operators , you might need to set python seed
random.seed(config.SEED)
#if you are using any of libraries you are usingthar rely on NUMPY, you can seed global the Numpy 
np.random.seed(config.SEED)
#Prevent Random Noise Generator for CPU and GPU using torch
torch.manual_seed(config.SEED)
torch.cuda_manual(config.SEED) 

Data manipulation

In [None]:
train_df = pd.read_csv(config.TRAIN_CSV)
test_df = pd.read_csv(config.TEST_CSV)
f = open(config.VOCAB)
vocab = json.load(f)

df_names = train_df["image_id"].append(test_df["images_id"], ignore_index= True).tolist()
def create_fname(path,extension):
    def add_extension(fname):
        return os.path.join(path,fname)+extension
    return add_extension
jpeg_extension_creator = create_fname(config.IMAGE_PATH, "PATH")
train_df["image_id"] = train_df["image_id"].apply(jpeg_extension_creator)
test_df["image_id"] = test_df["image_id"].apply(jpeg_extension_creator)
for label in vocab:
    train_df.loc[train_df[label]==1, "label"]= vocab[label]
train_df["label"] = train_df["label"].astype(int)

In [None]:
train_df_X, valid_df_X, train_df_y, valid_df_y = train_test_split(train_df["image_id"],
                                                                 train_df["label"],
                                                                 test_size = config.TRAIN_VALID_SPLIT)

In [None]:
train_df_split= pd.DataFrame(data = {"image_id": train_df_X, "label": train_df_y})
train_df_split.to_csv("../data/train_split.csv", sep= ',', index = False)

valid_df_split= pd.DataFrame(data = {"image_id": valid_df_X, "label": valid_df_y})
valid_df_split.to_csv("../data/train_split.csv", sep= ',', index = False)

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop((config.IMAGE_SIZE, config.IMAGE_SIZE)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()
        transforms.Normalize([0.485,0.456,0.406], [0.229,0.224.0.225])
    ]),
    'val':transforms.Compose([
        transforms.RandomResizedCrop((config.IMAGE_SIZE, config.IMAGE_SIZE)),
        transforms.ToTensor()
        transforms.Normalize([0.485,0.456,0.486], [0.229,0.224.0.225])
    ]),
}

In [None]:
class PlantPathologyDataset(Dataset):
    def __init__(self, x,y, coab, transforms):
        self.x = x #file path in CSV
        self.y = y#labe in csv
        self.vocab = vocab #Dictionary
        self.transforms = transforms
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self,idx): #File None --> Preproced 3D Tensors
        fname = self.x.iloc[idx]
        label = self.yloc[idx]
        image = Image.open(fname)
        
        if self.transforms:
            image = self.transforms(image)
            
        return image, label #[3,224,224], [0-3]   

In [None]:
train_ds = PlantPathologyDataset(train_df_X,
                                train_df_y,
                                vocab,
                                data_transforms['train'])
valid_ds = PlantPathologyDataset(valid_df_X,
                                valid_df_y,
                                vocab,
                                data_transforms['val'])


#######################################################
train_sampler = distributedSampler(
train_ds,
num_replicas = config.world_size,
ranks = args.local_rank)
###############################################
#if this creating distrubuted data across differnt device we can keep the shuffle
#in DataLoader to false

train_dl = DataLoader(train_ds,
                     batch_size = config.BATCH_SIZE,
                     shuffle = False ,
                     num_workers = config.num_workers,
                     pin_memory= True)
valid_dl = DataLoader(valid_ds,
                     batch_size = config.BATCH_SIZE,
                     shuffle = False,
                     num_workers = config.num_workers,
                     pin_memory= True)

In [None]:
#can use model = model.resnet50(pretrained = True)

#for download model from "https://download.putorch.org/models/restnet50-0676ba61.pth"

model = models.resnet50(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(nn.Linear(num_ftrs,512),
                         nn.ReLU(),
                         nn.Dropout(p=0.3),
                         nn.Linear(512,4))
model = model.to(device)
model.load_state_dict(torch.load(MODEL_PATH))
model.eval()

In [None]:
if config.channels_last:
    model =model.to(config.device, memory_format = torch.channels_last)#CHW-> HCW
else:
    model = model.to(config.device)
    

In [None]:
if config.USE_AMP:
    optimizer = FusedAdam(model.parameters(), config.lr)
    model,optimizer= amp.intialize(model,optimizer, opt_level="O2")#O1/O2/O3
else :
    optimizer = optim.Adam(model.parameters(), config.lr)
    
    
if config.distributed:
    #For Distributed after amp.intialize wrap the model with
    #torch.nn.parallel.distributedDataParalell
    #port this model to differnt devices
    model = torch.nn.parallel.DistributedDataParallel(model,
                                                     device_ids = [args.local_ranks],
                                                     output_device = args.local_rank)
#loss Function
criterion = nn.CrossEntropyLoss()

In [None]:
def train_model(model,criterion, optimizer, num_epochs=10):
    
    train.cuda.synchronize()
    since = time.time()
    batch_ct = 0
    example_ct = 0
    for epoch in range(num_epochs):
        print('Epoch{}/{}'.format(epoch, num_epochs-1))
        print('-'*10)
        
        #training
        model.train()
        for x,y in train_dl: #BS=32 [BS,3,224,224]
            if config.channels_last:
                x =x.to(config.device, memory_format=torch.channels_last) #CHW--> HCW
            else:
                x = x.to(config.device)
            y = y.to(config.device) #CHW--> #HWC
            
            
            
            
            
            
            #optimizer.zero_grad()
            optimzer.zero_grad(set_to_none= True)
            
            
            train_logits = model(x)
            -, train_preds = torch.max(train_logits)
            train_loss = criterion(train_logits,y)
            
            #Apply backward pass on scaled loss function
            if config.USE_AMP:
                with amp.scale_loss(train_loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                    loss = scaled_loss
                    
            else:
                train_loss= loss.backward() #this where we get W_gradients
                
            
        #validation
        model.eval()
        running_loss = 0.0
        running_corrects = 0
        total=0
        
        with torch.no_grad():
            for x,y valid_dl:
                if config.channels_last:
                    x = x.to(config.device, memory_format= torch.channels_last)
                else:
                    x= x.to(config.device)
                y= y.to(config.device)
                valid_logits = model(x)
                -, valid_preds = torch.max(valid_logits,y)
                running_loss += valid_loss.item() * x.size(0)
                running_corrects += torch.sum(valid_preds == y.data)
                total += y.size(0)
                
        epoch_loss = running_loss/len(valid_ds)
        epoch_acc = running_corrects.double()/ len(valid_ds)
        print("valid loss is {}".format(epoch_loss))
        print("valid accuracy is {}".format(epoch_acc.cpu()))
        
    torch.cuda.synchronize()
    time_elasped = time.time() -since
    print('Training complete in {:0f}n {:0f}s'.format(
    time_elasped //60, time_elasped % 60))
        
    torch.save(model.state_dict(), config.saved_path)  
    
    
train_model(model, criterion, optimizer, num_epochs = config.EPOCHS)    

In [None]:
#run with this command
# !python -m torch.distributed.launch --nproc_per_node=4 Train_ddp.py