In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
TrainingLabels= pd.read_csv('/kaggle/input/aerial-cactus-identification/train.csv')
TrainingLabels

In [None]:
#Extract the Zip Files Containing The Training Images and Test Images
import zipfile

DataSetArray=[]
DataSetArray.append("test")
DataSetArray.append("train")


for i in range(2):
    # Will unzip the files so that you can see them..
    with zipfile.ZipFile("/kaggle/input/aerial-cactus-identification/"+DataSetArray[i]+".zip","r") as z:
        z.extractall("./"+DataSetArray[i]+"/")

In [None]:
import cv2
import matplotlib.pyplot as plt
import glob


ListOfImagesInTrainFolder= glob.glob("./train/train/*.jpg")
ListOfImagesInTrainFolder

In [None]:
def DisplayImageAndLabel(i):
    RandomImage=ListOfImagesInTrainFolder[i]
    print(RandomImage)
    im= cv2.imread(RandomImage)
    plt.imshow(im)
    print(TrainingLabels.iloc[i])
    

In [None]:
def GetImageNameFromPath(Path):
    Index=Path.find("./train/train/")
    Name=Path[Index+len("./train/train/"):]
    return Name
    

In [None]:
GetImageNameFromPath(ListOfImagesInTrainFolder[0])

In [None]:
import os
import numpy as np
import torch

import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt

In [None]:
# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

In [None]:
# VGG-16 Takes 224x224 images as input, so we resize all of them
data_transform = transforms.Compose([transforms.RandomResizedCrop(224), 
                                      transforms.ToTensor()])

In [None]:
train_data = datasets.ImageFolder("./train/", transform=data_transform)
test_data = datasets.ImageFolder("./test/", transform=data_transform)
# print out some data stats
print('Num training images: ', len(train_data))
print('Num test images: ', len(test_data))

In [None]:
for i in range(len(train_data.imgs)):
    Val1=train_data.imgs[i][0]
    Label_=Labels[i]
    train_data.imgs[i]=(Val1,Label_)
    
for img in train_data.imgs:
    print(img)
    


In [None]:
# define dataloader parameters
batch_size = 20
num_workers=0

# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, 
                                           num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
                                          num_workers=num_workers)

In [None]:
Labels=[]

def GetLabelOfThatImage():
    for img in train_data.imgs:
        imgPath=img[0]
        Name= GetImageNameFromPath(imgPath)
        row= TrainingLabels.loc[TrainingLabels['id'] == Name]
        Labels.append(row.iloc[0]['has_cactus'])
    
GetLabelOfThatImage()

In [None]:
print("Length of Labels is "+str(len(Labels)))

In [None]:
Labels[0]

In [None]:
# Visualize some sample data

# obtain one batch of training images
dataiter = iter(train_loader)
images, labels = dataiter.next()
images = images.numpy() # convert images to numpy for display

# plot the images in the batch, along with the corresponding labels
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(20):
    ax = fig.add_subplot(2, 20/2, idx+1, xticks=[], yticks=[])
    plt.imshow(np.transpose(images[idx], (1, 2, 0)))
    ax.set_title(Labels[idx])

In [None]:
# Load the pretrained model from pytorch
vgg16 = models.vgg16(pretrained=True)

# print out the model structure
print(vgg16)

In [None]:
# Freeze training for all "features" layers
for param in vgg16.features.parameters():
    param.requires_grad = False

In [None]:
import torch.nn as nn

n_inputs = vgg16.classifier[6].in_features

#it's a cactus or not a cactus
last_layer = nn.Linear(n_inputs, 2)

vgg16.classifier[6] = last_layer

# if GPU is available, move the model to GPU
if train_on_gpu:
    vgg16.cuda()

# check to see that your last layer produces the expected number of outputs
print(vgg16.classifier[6].out_features)

In [None]:
import torch.optim as optim

# specify loss function (categorical cross-entropy)
criterion = nn.CrossEntropyLoss()

# specify optimizer (stochastic gradient descent) and learning rate = 0.001
optimizer = optim.SGD(vgg16.classifier.parameters(), lr=0.001)

In [None]:
n_epochs = 5

for epoch in range(1, n_epochs+1):

    # keep track of training and validation loss
    train_loss = 0.0
    
    ###################
    # train the model #
    ###################
    # model by default is set to train
    for batch_i, (data, target) in enumerate(train_loader):
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = vgg16(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss 
        train_loss += loss.item()
        
        if batch_i % 20 == 19:    # print training loss every specified number of mini-batches
            print('Epoch %d, Batch %d loss: %.16f' %
                  (epoch, batch_i + 1, train_loss / 20))
            train_loss = 0.0

In [None]:
i=0
MyData = pd.DataFrame()
for TestImg in test_data:
    
    img=test_data.imgs[i]
    imgPath=img[0]
    Name= GetImageNameFromPath(imgPath)
    
    Img=TestImg[0].unsqueeze(0)
    output= vgg16(Img.cuda())
    o=output.topk(1)
    Indicies=o[1]
    MyData= MyData.append({'id': str(Name) , 'has_cactus': int(Indicies)},ignore_index=True)

MyData.to_csv('my_submission.csv', index=False)
print("Your submission was successfully saved!")

In [None]:
MyData

In [None]:

MyData.to_csv('./test/Result.csv', index=False)