In [None]:
# Importing all the modules 
import torch 
import torch.nn as nn 
import torch.optim as optim

import torchvision 
from torch.utils.data import DataLoader,Dataset,TensorDataset
import torch.utils.data as utils
from torchvision import transforms 
import pandas as pd




In [None]:
import matplotlib.image as mpimg
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import TensorDataset

In [None]:
from sklearn import model_selection
from sklearn.metrics import roc_auc_score

In [None]:
# Defining the dir
data_dir = "aerial-cactus-identification/"
train_dir = data_dir + "train/"
test_dir = data_dir + "test"

In [None]:
# Loading the CSV
labels = pd.read_csv("aerial-cactus-identification/train.csv")
labels.head()

In [None]:
# K- Fold

df = labels.sample(frac=1).reset_index(drop = True)                      # Reset Index
df['kfold'] = -1                                                         # Intitialize new col
y = labels.has_cactus.values                                             
kf = model_selection.StratifiedKFold(n_splits = 5,shuffle = True)        # Intitialize Fold no.
idx = kf.get_n_splits(X=df,y=y)                                          # Splitting the dataset
print(idx)
for fold,(x,y) in enumerate(kf.split(X=df,y=y)):
    df.loc[y,'kfold'] = fold


In [None]:
import os 

class ImageData(Dataset):
    def __init__(self,df,df_target,data_dir,transform):
        super().__init__()
        
        self.df = df
        self.transform = transform
        self.data_dir = data_dir
        self.df_target = df_target
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,index):
        print(len(self.df),index)
        img_name = self.df[index]
        label = self.df_target[index]
        
        img_path = os.path.join(self.data_dir,img_name)
        image = mpimg.imread(img_path)
        image = self.transform(image)
        return image,label
        
    

In [None]:
data_trans = transforms.Compose([
                                transforms.ToPILImage(),
                                transforms.ToTensor()
])

#train_data = ImageData(df = labels,data_dir=train_dir,transform=data_trans)



In [None]:
#!pip install efficientnet_pytorch


In [None]:
from efficientnet_pytorch import EfficientNet
model = EfficientNet.from_name('efficientnet-b1')


In [None]:
for param in model.parameters():
    param.requires_grad = True

In [None]:
num_ftrs = model._fc.in_features
model._fc = nn.Linear(num_ftrs,1)

optimizer = optim.Adam(model.parameters())
loss_func = nn.BCELoss()

In [None]:
def train(fold):
    batch_t = 2
    batch_v = 2
    best_score = 0
    train_df = df[df.kfold!=fold].reset_index(drop=True)
    valid_df = df[df.kfold==fold].reset_index(drop=True)
    
    train_im = train_df.id.values.tolist()
    train_y = train_df.has_cactus.values
    valid_im = valid_df.id.values.tolist()
    valid_y = valid_df.has_cactus.values
    
    
    train_data = ImageData(df = train_im,df_target = train_y,data_dir=train_dir,transform=data_trans)
    trainloader = DataLoader(dataset = train_data,batch_size = 2)
    
    valid_data = ImageData(df = valid_im,df_target = valid_y,data_dir=train_dir,transform=data_trans)
    validloader = DataLoader(dataset = train_data,batch_size = 2)
    
    loss_log = []
    valid_loss = []
    valid_loss = np.Inf


    for epoch in range(5):
        model.train()
        batch = 0
        for ii ,(data,target) in enumerate(trainloader):
            target = target.float()

            optimizer.zero_grad()
            output = model(data)

            m = nn.Sigmoid()
            loss = loss_func(m(output),target)
            loss.backward()

            optimizer.step()
            batch+=1
            
            if batch%100==0 : 
                print("EPOCH {}  Loss {}  batch  {}".format(epoch,loss.item(),batch))
            if ii%1000 == 0:
                loss_log.append(loss.item())
        print('Epoch: {} - Loss: {:.6f} v_Loss: {:.6f}'.format(epoch + 1, loss.item(),v_loss.item))


        model.eval()
        batch = 0
        for ii,(data,target) in enumerate(validloader):
            target = target.float()
            output = model(data)
            
            batch+=1
            m = nn.Sigmoid()
            v_loss = loss_func(m(output),target)
            if ii%1000 == 0:
                valid_loss.append(v_loss.item())
                
            if batch%100==0 : 
                print("EPOCH {}  Loss {}  batch  {}".format(epoch,loss.item(),batch))


        print('Epoch: {} - Loss: {:.6f} v_Loss: {:.6f}'.format(epoch + 1, loss.item(),v_loss.item))
    

In [None]:
train(0)
train(1)
train(2)
train(3)
train(4)

In [None]:
#TTA - Test Time Augementation

submit = pd.read_csv('../input/sample_submission.csv')
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
test_data = ImageData(df = submit, data_dir = test_dir, transform = test_transform)
test_loader = DataLoader(dataset = test_data, shuffle=False)

In [None]:
predict1 = []
model.eval()
for i, (data, _) in enumerate(test_loader):
    output = model(data)    

    pred = torch.sigmoid(output)
    predicted_vals = pred > 0.5
    predict1.append(int(predicted_vals))
    
    
    
    
    
predict2 = []
for i, (data, _) in enumerate(test_loader):
    output = model(data)    

    pred = torch.sigmoid(output)
    predicted_vals = pred > 0.5
    predict2.append(int(predicted_vals))
    
    
    
predict3 = []
model.eval()
for i, (data, _) in enumerate(test_loader):
    output = model(data)    

    pred = torch.sigmoid(output)
    predicted_vals = pred > 0.5
    predict3.append(int(predicted_vals))
    
    
    
predict4 = []
model.eval()
for i, (data, _) in enumerate(test_loader):
    output = model(data)    

    pred = torch.sigmoid(output)
    predicted_vals = pred > 0.5
    predict4.append(int(predicted_vals))
    
    
    
predict = (predict1 + predict2 + predict3 + predict4) / 4.0
    

submit['has_cactus'] = predict
submit.to_csv('submission.csv', index=False)