### Dependencies 

In [1]:
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
import os 
import matplotlib.pyplot as plt



### Split Train/Val data
###### si '.ipynb_checkpoints' est present, 
rm -r jetracer/notebooks/classification_TRAIN/.ipynb_checkpoints 

In [2]:
from torch.utils.data.sampler import SubsetRandomSampler
from meanstd import get_meanstd

data_dir = 'road_following_orange_line'


def load_split_train_test(datadir, valid_size = .2):
    
    

    train_transforms = transforms.Compose([
                transforms.ColorJitter(0.2, 0.2, 0.2, 0.2),
                transforms.Resize((224, 224)),
                #transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(p=0.4),
                #transforms.RandomGrayScale(p=0.2),
                transforms.RandomRotation(degrees=5),
                transforms.ToTensor()
                #transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                #transforms.Normalize(mean, std)
            ])

    test_transforms = transforms.Compose([
                #transforms.ColorJitter(0.2, 0.2, 0.2, 0.2),
                transforms.Resize((224, 224)),
                transforms.ToTensor()
                #transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                #transforms.Normalize(mean, std)
            ])

    train_data = datasets.ImageFolder(datadir,       
                    transform=train_transforms)
    test_data = datasets.ImageFolder(datadir,
                    transform=test_transforms)

    print(train_data.classes)
    num_train=len(train_data)
    indices = list(range(num_train)) #on cree une liste de 0 a nbdonnées
    split = int(np.floor(valid_size * num_train)) #si valid size = 0.2 alors split = 20% du nombre de données
    np.random.shuffle(indices) #on melange la liste d'indices des donnés 
    train_idx, test_idx = indices[split:], indices[:split] #on sépare les indices de données selon le pourcentage de validation
    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(test_idx)
    
    trainloader = torch.utils.data.DataLoader(
                    train_data,
                    sampler=train_sampler, 
                    batch_size=32
                )
    testloader = torch.utils.data.DataLoader(
                    test_data,
                    sampler=test_sampler, 
                    batch_size=32
                )
    return trainloader, testloader

trainloader, testloader = load_split_train_test(data_dir, .2)
mean,std = get_meanstd(data_dir)
print(mean)
print(std)


['apex']
tensor([0.5335, 0.4134, 0.4262])
tensor([0.1162, 0.0894, 0.0978])


### Try GPU avaibality 

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### (function to know how much pictures for classification)

In [4]:
somme=0
for i in os.listdir(data_dir):
    liste_cate = os.listdir(data_dir+'/'+i)
    somme+=len(liste_cate)
    print('number pictures in',str(i), len(liste_cate))
          
print('nb total image',somme)

number pictures in apex 495
nb total image 495


### Choose model

In [5]:
liste_cat = os.listdir(data_dir) #avoir une liste des dataset de classification (nombre categorie)
output_dim=len(liste_cat)

model = models.resnet18(pretrained=False)
model.fc = torch.nn.Linear(512, output_dim)
#model.fc= nn.Sequential(nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.2), nn.Linear(256, output_dim),nn.LogSoftmax(dim=1))

loss_function=nn.CrossEntropyLoss()
#loss_function=nn.NLLLoss()
optimizer = optim.Adam(model.fc.parameters(),lr=0.003)
model=model.to(device)
#print(model)

In [6]:
import time
from utils import preprocess
start_time=time.time()
epochs = 50
steps = 0
running_loss = 0
print_every_batch = 5
train_losses, test_losses = [], []

for epoch in range(epochs):
    for inputs, labels in trainloader:
        steps += 1
        
        
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        logps = model.forward(inputs)
        
        # run backpropogation to accumulate gradients
        loss = loss_function(logps, labels)
        loss.backward()
        
         # step optimizer to adjust parameters
        optimizer.step()
        
        running_loss += loss.item()
        
        if steps % print_every_batch == 0:
            test_loss = 0
            accuracy = 0
            model.eval() 
            with torch.no_grad():
                for inputs, labels in testloader:
                    inputs, labels = inputs.to(device),labels.to(device)
                    logps = model.forward(inputs)
                    batch_loss = loss_function(logps, labels)
                    test_loss += batch_loss.item()
                    
                    ps = torch.exp(logps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
            train_losses.append(running_loss/len(trainloader))
            test_losses.append(test_loss/len(testloader))                    
            print(f"Epoch {epoch+1}/{epochs}.. "
                  f"Avancement_epoch {(((steps/len(trainloader))*100)-(100*epoch)):.1f}%.."
                  f"Train_loss: {running_loss/print_every_batch:.3f}.. "
                  f"Test_loss: {test_loss/len(testloader):.3f}.. "
                  f"Test_accuracy: {accuracy/len(testloader):.3f}")
            
            running_loss = 0
            model.train()
            
print('temps total d\'exectution : ', time.time()-start_time)

ImportError: cannot import name 'funct_meanstd'

In [None]:
plt.plot(np.linspace(0,epochs,len(train_losses)),train_losses, label='Training loss')
plt.plot(np.linspace(0,epochs,len(test_losses)),test_losses, label='Validation loss')
plt.xticks(range(0,epochs+1,10))
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(frameon=False)
plt.show()
from statistics import mean
print(mean(train_losses))
print(mean(test_losses))

In [None]:
torch.save(model.state_dict(), 'classification_nonormalize_mix_5cat_50epochs_TennisRlineHandStaplerMug.pth')