# PROJET DEEP LEARNING

## IMPORTATION DES MODULES

In [18]:
import os
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
import torchvision
import numpy as np

## PRE-TRAITEMENT DES DONNEES

In [2]:
print('total training damage images: ',len(os.listdir("./train_another/damage")))
print('total validation damage images: ',len(os.listdir("./validation_another/damage")))
print('total test damage images: ',len(os.listdir("./test_another/damage")))
print('total training no damage images: ',len(os.listdir("./train_another/no_damage")))
print('total validation no damage images: ',len(os.listdir("./validation_another/no_damage")))
print('total test no damage images: ',len(os.listdir("./test_another/no_damage")))

total training damage images:  5000
total validation damage images:  1000
total test damage images:  8000
total training no damage images:  5000
total validation no damage images:  1000
total test no damage images:  1000


### Importation des images en séparant en jeu train / test / validation

In [3]:
transform = transforms.Compose([transforms.Resize((150,150)),
                                 transforms.ToTensor()])
batch_size = 20

df_train = datasets.ImageFolder("./train_another",transform=transform)
train_dataloader = DataLoader(dataset=df_train, batch_size=batch_size, shuffle=True, num_workers = 4 , pin_memory = True)

df_test = datasets.ImageFolder("./test_another",transform=transform)
test_dataloader = DataLoader(dataset=df_test, batch_size=batch_size, shuffle=True,num_workers = 4 , pin_memory = True)

df_validation = datasets.ImageFolder("./validation_another",transform=transform)
validation_dataloader = DataLoader(dataset=df_validation, batch_size=batch_size, shuffle=True,num_workers = 4 , pin_memory = True)

Essayons de visualiser les classes de nos images 

In [4]:
df_validation.classes

['damage', 'no_damage']

## MODELISATION

### MODELE 1 : Reproduction du modèle de CAO & CHOE

#### Création de la classe qui va nous permettre de faire notre classification d'images

In [5]:
class ImageClassificationBase(nn.Module):
    
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['train_loss'], result['val_loss'], result['val_acc']))

#### Création de la classe qui va permettre de créer notre premier modèle

In [6]:
class HurricaneClassification(ImageClassificationBase):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            nn.Conv2d(3,32, kernel_size = (3,3)),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(32,64, kernel_size = (3,3)),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
        
            nn.Conv2d(64, 128, kernel_size = (3,3)),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(128,128, kernel_size = (3,3)),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(6272, 512),
            nn.ReLU(),
            nn.Linear(512, 2),
            nn.ReLU(),
        )
    
    def forward(self, xb):
        return self.network(xb)

#### Création des fonctions qui vont permettre d'évaluer notre modèle

In [7]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

  
@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    nb_etape = 0
    outputs = []
    for batch in val_loader:
        nb_etape = nb_etape + 1
        outputs.append(model.validation_step(batch))
        if nb_etape == 50:
            break
            
    return model.validation_epoch_end(outputs)

  
def fit(epochs, lr, model, train_loader, val_loader, opt_func = torch.optim.SGD):
    
    history = []
    optimizer = opt_func(model.parameters(),lr)
    for epoch in range(epochs):
        
        model.train()
        train_losses = []
        nb_etape = 0
        
        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            nb_etape = nb_etape+1
            if nb_etape == 10:
                break
                
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    
    return history, train_losses

In [17]:
def val_CNN(net, valLoader):
    correct, total = 0, 0
    predictions = []
    net.eval()
    for i, data in enumerate(valLoader, 0):
        inputs, labels = data
        outputs = net(inputs)    
        _, predicted = torch.max(outputs.data, 1)
        predictions.append(outputs)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    print('The testing set accuracy of the network is: %d %%' % (100 * correct / total))

In [22]:
model = HurricaneClassification()
num_epochs = 5
opt_func = torch.optim.Adam
lr = 1e-4
#fitting the model on training data and record the result after each epoch
history, loss = fit(num_epochs, lr, model, train_dataloader, validation_dataloader, opt_func)

Epoch [0], train_loss: 0.6948, val_loss: 0.6929, val_acc: 0.5090
Epoch [1], train_loss: 0.6928, val_loss: 0.6926, val_acc: 0.5020
Epoch [2], train_loss: 0.6928, val_loss: 0.6919, val_acc: 0.4880
Epoch [3], train_loss: 0.6928, val_loss: 0.6929, val_acc: 0.5030
Epoch [4], train_loss: 0.6923, val_loss: 0.6901, val_acc: 0.5040


In [23]:
summary(model,(3, 150, 150))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 148, 148]             896
              ReLU-2         [-1, 32, 148, 148]               0
         MaxPool2d-3           [-1, 32, 74, 74]               0
            Conv2d-4           [-1, 64, 72, 72]          18,496
              ReLU-5           [-1, 64, 72, 72]               0
         MaxPool2d-6           [-1, 64, 36, 36]               0
            Conv2d-7          [-1, 128, 34, 34]          73,856
              ReLU-8          [-1, 128, 34, 34]               0
         MaxPool2d-9          [-1, 128, 17, 17]               0
           Conv2d-10          [-1, 128, 15, 15]         147,584
             ReLU-11          [-1, 128, 15, 15]               0
        MaxPool2d-12            [-1, 128, 7, 7]               0
          Flatten-13                 [-1, 6272]               0
          Dropout-14                 [-

#### Sauvegarde du modèle

In [None]:
torch.save(model.state_dict(),"model1")

En commentaire car ça bug

In [24]:
loss_tensor = torch.stack(loss)
loss_np=torch.detach(loss_tensor).numpy()

#plt.plot(loss)
#plt.xlabel('Itération')
#plt.ylabel('Perte')
#plt.show()

: 

: 

In [None]:
val_model1 = val_CNN(model,validation_dataloader)

### MODELE 2 : Fine tuning VGG-19

In [11]:
model2 = torchvision.models.vgg19(pretrained=True)
print(model2)

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to C:\Users\spica/.cache\torch\hub\checkpoints\vgg19-dcbb9e9d.pth


  0%|          | 0.00/548M [00:00<?, ?B/s]

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [13]:
num_classes = 1000

# Verrouillage des couches du modèle (sauf la dernière) pour éviter leur mise à jour lors de l'entraînement
for param in model2.parameters():
    param.requires_grad = False

# Modification du nombre de classes en sortie (par défaut 1000) si nécessaire
model2.classifier[6] = torch.nn.Linear(in_features=4096, out_features=num_classes)

# Déverrouillage de la dernière couche pour permettre son entraînement
for param in model2.classifier[6].parameters():
    param.requires_grad = True

# Conversion en mode "entraînement"
model2.train()

# Préparation des transformateurs pour les données d'entraînement
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

batch_size = 20
num_epochs = 5

trainset = torchvision.datasets.ImageFolder(root='./train_another', transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
loss_values = []

# Boucle d'entraînement
for epoch in range(num_epochs):
    for i, data in enumerate(trainloader, 0):
        # Chargement des entrées et des étiquettes
        inputs, labels = data
        
        # Calcul des sorties à l'aide de la méthode forward
        outputs = model2(inputs)
        
        # Calcul de la perte
        criterion = torch.nn.CrossEntropyLoss()
        loss = criterion(outputs, labels)
        
        # Ajout de la perte à la liste
        loss_values.append(loss.item())
        # Calcul des gradients pour la rétropropagation
        optimizer = torch.optim.SGD(model2.classifier[6].parameters(), lr=0.001, momentum=0.9)
        optimizer.zero_grad()
        loss.backward()
        
        # Mise à jour des paramètres du modèle
        optimizer.step()
        
        # Affichage de la progression de l'entraînement toutes les 100 itérations
        if i ==10:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, loss.item()))
            break

[1,    11] loss: 5.797
[2,    11] loss: 4.272
[3,    11] loss: 2.825
[4,    11] loss: 1.972
[5,    11] loss: 1.696


On sauvegarde le modèle

In [14]:
torch.save(model2.state_dict(),"model2")

#### Courbe de gain

In [None]:
#plt.plot(loss_values)
#plt.xlabel('Itération')
#plt.ylabel('Perte')
#plt.show()

In [16]:
val_model2 = val_CNN(model2,validation_dataloader)

The testing set accuracy of the network is: 62 %


### MODELES DE MACHINE LEARNING

#### Extraction des features du modèles

In [19]:
def extract_features(dataloader, model, n_sample, n_features):
    features = np.zeros(shape=(n_sample,n_features))
    batchSize = dataloader.batch_size
    labels = np.zeros(shape = (n_sample))
    i = 0
    for inputs_batch, labels_batch in dataloader:
        model.eval()
        features_batch = model(inputs_batch).detach().numpy()
        features[i * batchSize: (i + 1) * batchSize] = features_batch
        labels[i * batchSize: (i + 1) * batchSize] = labels_batch
        i += 1
        if i * batchSize >= n_sample:
            break
    return features, labels

In [21]:
x_train, y_train = extract_features(train_dataloader,model2,10000,1000)
x_test, y_test = extract_features(validation_dataloader, model2, 2000, 1000)


#### Modèle 1 : Arbres de décisions

In [25]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf.fit(x_train,y_train)

DecisionTreeClassifier()

In [39]:
from sklearn.metrics import accuracy_score, confusion_matrix 
y_pred = clf.predict(x_test)
print(confusion_matrix(y_test,y_pred))
print("Précision : " + str(accuracy_score(y_test,y_pred)))

[[746 254]
 [240 760]]
Précision : 0.753


#### Modèle 2 : Fôrets aléatoires

In [28]:
from sklearn.ensemble import RandomForestClassifier
clf2 = RandomForestClassifier()
clf2.fit(x_train,y_train)

RandomForestClassifier()

In [29]:
y_pred = clf2.predict(x_test)
print(confusion_matrix(y_test,y_pred))
print("Précision : " + str(accuracy_score(y_test,y_pred)))

0.8365

#### Modèles 3 : AdaBoost

In [30]:
from sklearn.ensemble import AdaBoostClassifier
clf3 = AdaBoostClassifier()
clf3.fit(x_train,y_train)

AdaBoostClassifier()

In [31]:
y_pred = clf3.predict(x_test)
print(confusion_matrix(y_test,y_pred))
print("Précision : " + str(accuracy_score(y_test,y_pred)))

0.8235

#### Modèle 4 : Lightgbm

In [43]:
import lightgbm as lgb
clf4 = lgb.LGBMClassifier()
clf4.fit(x_train,y_train)

LGBMClassifier()

In [44]:
y_pred = clf4.predict(x_test)
print(confusion_matrix(y_test,y_pred))
print("Précision : " + str(accuracy_score(y_test,y_pred)))

[[844 156]
 [117 883]]
Précision : 0.8635


#### Modèle 5 : Bagging

In [None]:
from sklearn.ensemble import BaggingClassifier
clf5 = BaggingClassifier()
clf5.fit(x_train,y_train)

In [None]:
y_pred = clf5.predict(x_test)
print(confusion_matrix(y_test,y_pred))
print("Précision : " + str(accuracy_score(y_test,y_pred)))

#### Conclusion

Notre meilleur modèle est :

## Modèle 3 : Création de notre propre CNN