In [None]:
!pip3 install git+https://github.com/shijianjian/EfficientNet-PyTorch-3D
!pip3 install torchsummary

In [None]:
from efficientnet_pytorch_3d import EfficientNet3D
from torchsummary import summary

In [None]:
import torch
import torchvision
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import time
import glob
import os
import torch
from torch import nn
import random
from torch.utils import data as torch_data
from sklearn import model_selection as sk_model_selection
from torch.nn import functional as torch_functional
import torch.nn.functional as F
import pandas
np.random.seed(1)
torch.backends.cudnn.benchmark = True
from torch.utils.tensorboard import SummaryWriter

In [None]:
from tqdm import tqdm
import math

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

set_seed(12)

In [None]:
class sum_writer:
    def __init__(self,folder):
        if os.path.exists(folder):
            print('Removing old events..')
            for fil in os.listdir(folder):
                os.remove(os.path.join(folder, fil))

        self.writer =  SummaryWriter(folder)
        
    def close(self):
        self.writer.flush()
        self.writer.close()

        
    def add_info_new(self,msg,value1,value2):
        self.writer.add_scalar(msg, value1, value2)
        
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def save_models(e,model,out_path,name):
    save = {'model':model.state_dict(),'epoch':e+1}
    #torch.save(save, os.path.join(out_path, '%s_%s_e%02d.pth' % (name,time.strftime("%d-%m-%Y-%H-%M-%S"), e+1)))
    torch.save(save, os.path.join(out_path, '%s_e%02d.pth' % (name, e+1)))

def accuracy(output, target, topk=(1,)):
    """Computes the accuracy@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [None]:
def get_all_slices(df,base_dir): 
    all_paths = []
    for i in list(df['folder_id']):
        i = os.path.join(base_dir,i)
        all_paths.append(len(glob.glob(i+'/flair/*')))
    return all_paths

def split_train_test(slices_list,folders_list,label_list,split_ratio=0.1):
    test_size = int(len(slices_list)*split_ratio)
    test_slices_list = slices_list[:test_size]
    test_folders_list = folders_list[:test_size]
    test_label_list = label_list[:test_size]
    train_slices_list = slices_list[test_size:]
    train_folders_list = folders_list[test_size:]
    train_label_list = label_list[test_size:]
    return train_slices_list,train_folders_list,train_label_list,test_slices_list,test_folders_list,test_label_list



In [None]:
df = pd.read_csv('../input/rsnasubmissionresult/result.csv',dtype='str')
base_dir = '../input/classify-tumor-best/DATATUMORONLY_TRAIN/train'

In [None]:
train_df = df.iloc[:525,:]
test_df = df.iloc[526:,:]

In [None]:
train_slices_list = np.array(get_all_slices(train_df,base_dir))
test_slices_list = np.array(get_all_slices(test_df,base_dir))
#slices_list = np.array(list(df['flair']))
train_folders_list = np.array(list(train_df['folder_id']))
test_folders_list = np.array(list(test_df['folder_id']))
train_label_list = np.array(list(train_df['MGMT_value']))
test_label_list = np.array(list(test_df['MGMT_value']))
indexes = np.where((train_slices_list > 0 )&(train_slices_list < 51))
train_slices_list = np.take(train_slices_list,indexes)[0]
train_folders_list = np.take(train_folders_list,indexes)[0]
train_label_list = np.take(train_label_list,indexes)[0]
indexes = np.where((test_slices_list > 0 )&(test_slices_list < 51))
test_slices_list = np.take(test_slices_list,indexes)[0]
test_folders_list = np.take(test_folders_list,indexes)[0]
test_label_list = np.take(test_label_list,indexes)[0]

In [None]:
class DataGenerator(torch_data.IterableDataset):
    def __init__(self,slices_list,folders_list,label_list,width=256,height=256,batch_size=16,shuffle=True):
        self.batch_size = batch_size
        self.base_dir = '../input/classify-tumor-best/DATATUMORONLY_TRAIN/train'
        self.width = width
        self.crop_length = 224
        self.height = height
        self.tolerance = 5
        self.shuffle = shuffle
        self.intial_slices_list = slices_list
        self.intial_folders_list = folders_list
        self.intial_label_list = label_list
        self.class_map = {'MGMT':1,'NONMGMT':0}
        self.min_slice = 64 
        self.normdata = 1
        self.offset = 50 #for __len__
        self.index_map = {v:k for k,v in self.class_map.items()}
        self.get_groups()
        self._get_size()
        
        print('Loader==>',len(self.intial_slices_list),self.class_map,self.index_map)
        print('Grps:',[(K,len(V)) for K,V in self.group_indexes.items()])
        print('Size:',self.size-self.offset)
        
    def get_groups(self):
        limit = len(self.intial_slices_list)
        tmp = np.copy(self.intial_slices_list)
        i = 0
        self.group_indexes = {}
        
        while i<limit:
            if len(tmp)==0:
                print('Empty TMP')
                break
            num = self.intial_slices_list[i]
            ul = num+self.tolerance
            ll = max(num-self.tolerance,0)
            key = f'{ll}-{ul}'
            lindexes = np.where(tmp >=ll)[0]
            uindixes = np.where(tmp <= ul)[0]
            neg_indexes = np.where(tmp==-1)[0]
            indexes = np.intersect1d(lindexes,uindixes)
            indexes = np.array(list(set(indexes)-set(neg_indexes)))

            if len(indexes) == 0:
                #print('Empty',i,key)
                pass
            else:
                #print(i,key)
                self.group_indexes[key] = indexes
                #tmp = np.delete(tmp,indexes)
                tmp[indexes] = -1
            i+=1
    

    def __len__(self):
        return self.size

    def __iter__(self):
        self.i = 0
        #print('ITER....')
        self.train_indexes = []
        #print('Grps:',[(K,len(V)) for K,V in self.group_indexes.items()])
        for key in self.group_indexes.keys():
            bindexes = self.group_indexes[key]
            if self.shuffle:
                bindexes = np.random.choice(bindexes,len(bindexes),replace=False)
            self.train_indexes.extend([bindexes[i:i+self.batch_size] for i in range(0,len(bindexes),self.batch_size)])
                   
        self.size = len(self.train_indexes)
        #print('self.size',self.size,self.train_indexes)
        return self
    

    def __next__(self):
        if self.i >= self.size:
            raise StopIteration
        self.i += 1
        #print('NEXT....')
        return self.prepare_data(self.train_indexes[self.i-1])
    
    def prepare_data(self,random_indexes):
        start =time.time()
        labels = []
        random_folder = np.take(self.intial_folders_list,random_indexes)
        random_slices = np.take(self.intial_slices_list,random_indexes)
        random_labels = np.take(self.intial_label_list,random_indexes)
        self.max_depth = random_slices.max()
        batch_x = self.__data_gen_batch(random_folder)
        if self.normdata:
            batch_x = batch_x/255.
        #print(batch_x.shape)
        batch_x = torch.tensor(batch_x.transpose(0,4,3,1,2)).float() #B,CH,D,H,W
        #batch_x = torch.tensor(batch_x.transpose(0,4,1,2,3)).float() #B,CH,H,W,D
        #print(batch_x.shape)
        #padslic = self.min_slice-batch_x.shape[2]#B,CH,D,H,W
        #padslic = self.min_slice-batch_x.shape[-1]#B,CH,H,W,D
        #batch_x = F.pad(batch_x,(0,0,0,0,0,padslic),'constant',0)#B,CH,D,H,W
        #batch_x = F.pad(batch_x,(0,padslic),'constant',0)#B,CH,H,W,D
        
        labels = torch.tensor(random_labels.astype(int),dtype=torch.long)
        return batch_x,labels
    

    def __data_gen_image(self,folder_name):
        flair_path = glob.glob(os.path.join(self.base_dir,folder_name,'flair/*'))
        flair_path = sorted(flair_path,key=lambda x:x.split('-')[-1].split('.')[-2].zfill(3))
        all_images = []
        all_images = np.zeros(shape=(self.max_depth,self.height,self.height,1),dtype=np.float32)
        for i,img_path in enumerate(flair_path):
            img = cv2.imread(img_path,1)
            img = cv2.resize(img,(self.width,self.height))
            #img = image.img_to_array(img)
            #all_images[i,] = np.expand_dims(img,axis=-1)
        return np.transpose(all_images,(1,2,0,3))

    def __data_gen_batch(self,folder_names):
        batch_data = np.empty(shape=(len(folder_names),self.height,self.width,self.max_depth,3))
        for i,patient_id in enumerate(folder_names):
            batch_data[i,] = self.__data_gen_image(patient_id)
        return batch_data
    
    def _get_size(self):
        self.size = 0
        self.size += self.offset
        for k in self.group_indexes:
            div, mod = divmod(len(self.group_indexes[k]), self.batch_size)
            if mod!=0:
                div +=1
            self.size += div
        
        

In [None]:
 def worker_init_fn(_):
    worker_info = torch.utils.data.get_worker_info()
    dataset = worker_info.dataset
    worker_id = worker_info.id
    for K in dataset.group_indexes:
        split_size = math.ceil(len(dataset.group_indexes[K]) / worker_info.num_workers)
        dataset.group_indexes[K] = dataset.group_indexes[K][worker_id*split_size:(worker_id +1)*split_size]
        #print('worker_id:',worker_id,',K',K,',Data',len(dataset.group_indexes[K]),'\n')
    

In [None]:
train_datagen = DataGenerator(train_slices_list,train_folders_list,train_label_list,batch_size=3,height=224,width=224,shuffle=True)
test_datagen = DataGenerator(test_slices_list,test_folders_list,test_label_list,batch_size=2,height=224,width=224,shuffle=True)

In [None]:
train_loader = torch.utils.data.DataLoader(train_datagen, batch_size=None,pin_memory=False,num_workers=4,worker_init_fn=worker_init_fn)
val_loader = torch.utils.data.DataLoader(test_datagen, batch_size=None,pin_memory=False,num_workers=2,worker_init_fn=worker_init_fn)

In [None]:
print(len(train_loader))
for i,sample in enumerate(train_loader):
    if i>10:
        break
    print(i,sample[0].shape,sample[1].shape)

In [None]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        #self.net = EfficientNet3D.from_name("efficientnet-b0", override_params={'num_classes': 2}, in_channels=1)
        #self.net = torchvision.models.r2plus1d_18(pretrained=False, progress=True, num_classes=2)
        self.net = torchvision.models.video.mc3_18(pretrained=False, progress=True, num_classes=2)
        #n_features = self.net._fc.in_features
        #self.net._fc = nn.Linear(in_features=n_features, out_features=1, bias=True)
    
    def forward(self, x):
        out = self.net(x)
        return out
    
model = None
model = Model()
model.cuda()
print('Done')

In [None]:
summary(model, input_size=(3,100, 224,224))

In [None]:
criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50,90,95], gamma=0.1, last_epoch=-1, verbose=False)

In [None]:
for i in train_loader:
    print(i[0].shape,i[1].shape)
    break
#inputs = torch.randn((5, 1, 224, 224, 64)).cuda()
inputs = torch.randn((5, 3, 1,224, 224)).cuda()
labels = torch.tensor([0,1,0,0,1],dtype=torch.long).cuda()
out = model(inputs)
print(out.shape)
img = i[0].cuda()
print(img.shape,img.dtype)
out = model(img)
print(criterion(out,i[1].cuda()))

In [None]:
def eval_model(model,valloader,epoch,writer,val_global_iter):
    model.eval()
    dset = tqdm(enumerate(valloader),total=len(valloader))
    avg_acc = 0
    avg_loss = 0
    
    val_writer_freq = 10
    val_total_loss = 0
    val_total_acc = 0

    for i,sample in dset:
        val_global_iter+=1
        #if i>10:
        #    break
        batch,label = sample
        batch = batch.cuda()
        label = label.cuda()
        out = model(batch)
        loss = criterion(out, label)
        accs = accuracy(out.data, label, topk=(1,))
        
        val_total_loss += loss.item()
        val_total_acc += accs[0].item()
        avg_loss += loss.item()
        avg_acc += accs[0].item()
        if val_global_iter%val_writer_freq == 0:
            writer.add_info_new("Loss/val", value1=val_total_loss/val_writer_freq, value2=val_global_iter)
            writer.add_info_new("ACC_Ori/val",value1=val_total_acc/val_writer_freq, value2=val_global_iter)
            val_total_loss = 0
            val_total_acc = 0
            
        dset.set_description(f'ValEpoch:{epoch} | Loss:{loss.item():.4f} | Acc:{accs[0].item():.1f}')
    print(f"Avg Loss:{avg_loss/(i+1):.4f} | Avg Acc:{avg_acc/(i+1):.1f}")
    return val_global_iter

In [None]:
name = 'Eff_3d'
out_path = './saved_model'
os.makedirs(out_path,exist_ok=True)
folder = f'./logs/{name}'
os.makedirs(folder,exist_ok=True)
writer = sum_writer(folder)

train_global_iter = 0
train_writer_freq = 10
train_total_loss = 0
val_global_iter = 0
train_total_acc = 0

for epoch in range(100):
    model.train()
    dset = tqdm(enumerate(train_loader),total=len(train_loader))
    avg_acc = 0
    avg_loss = 0
    for i,sample in dset:
        train_global_iter+=1
        #if i>10:
        #    break
        optimizer.zero_grad()
        batch,label = sample
        batch = batch.cuda()
        label = label.cuda()
        out = model(batch)
        loss = criterion(out, label)
        loss.backward()
        accs = accuracy(out.data, label, topk=(1,))
        
        train_total_loss += loss.item()
        train_total_acc += accs[0].item()
        avg_loss += loss.item()
        avg_acc += accs[0].item()
        
        optimizer.step()
        optlr = get_lr(optimizer)
        
        if train_global_iter%train_writer_freq == 0:
            writer.add_info_new("Loss/train", value1=train_total_loss/train_writer_freq, value2=train_global_iter)
            writer.add_info_new("LR/train",value1=optlr, value2=train_global_iter)
            writer.add_info_new("ACC_Ori/train",value1=train_total_acc/train_writer_freq, value2=train_global_iter)
            train_total_loss = 0
            train_total_acc = 0
            
        dset.set_description(f'Epoch:{epoch} | LR:{optlr:.5f} | Loss:{loss.item():.4f} | Acc:{accs[0].item():.1f}')
        #break
    print(f"Avg Loss:{avg_loss/(i+1):.4f} | Avg Acc:{avg_acc/(i+1):.1f}")
    if epoch%2 == 0:
        save_models(epoch,model,out_path,name)
    val_global_iter = eval_model(model,val_loader,epoch,writer,val_global_iter)
    scheduler.step()
    writer.add_info_new("EPOCH/train",value1=epoch+1, value2=i*(epoch+1))
        