<a href="https://colab.research.google.com/github/nguyenduyhieu201/machineLearning/blob/main/MachineLearningMidTerm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from PIL import Image
import torch
from torchvision import datasets, models, transforms
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torchvision
from torch.autograd import Variable
import time
 
input_path = "../content/DATA_CHAMBER_2021/"
use_gpu = torch.cuda.is_available()
if use_gpu:
    print("Using CUDA")


Using CUDA


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Mục mới

In [5]:
!unzip -uq "/content/drive/My Drive/DATA_CHAMBER_2021.zip" -d "./"

In [6]:
class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [25]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

data_transforms = {
    'train':
    transforms.Compose([
        transforms.Resize(64),
        transforms.CenterCrop(64),
#         transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),
#         transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize
    ]),
    'validation':
    transforms.Compose([
        transforms.Resize(64),
        transforms.CenterCrop(64),
        transforms.ToTensor(),
        normalize
    ]),
}

image_datasets = {
    'train': 
    ImageFolderWithPaths(input_path + 'train', data_transforms['train']),
    'validation': 
    ImageFolderWithPaths(input_path + 'test', data_transforms['validation'])
}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train','validation']}

dataloaders = {
    'train':
    torch.utils.data.DataLoader(image_datasets['train'],
                                batch_size=8,
                                shuffle=True,
                                num_workers=2),  # for Kaggle
    'validation':
    torch.utils.data.DataLoader(image_datasets['validation'],
                                batch_size=8,
                                shuffle=True,
                                num_workers=2)  # for Kaggle
}

In [26]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [9]:
!pip install efficientnet_pytorch 

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16446 sha256=9668f0e9c3fbb7e9124817732e5bacb5bf98e7f926ef71e4d905c9eca3d664c1
  Stored in directory: /root/.cache/pip/wheels/0e/cc/b2/49e74588263573ff778da58cc99b9c6349b496636a7e165be6
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.1


In [27]:
from efficientnet_pytorch import EfficientNet

In [28]:
model = models.resnet50(pretrained = True).cuda()
for param in model.parameters():
    param.requires_grad = False   
    
model.fc = nn.Sequential(
               nn.Linear(2048, 128),
               nn.ReLU(inplace=True),
               nn.Linear(128, 3)).to(device)

In [29]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.fc.parameters(),lr=0.001,momentum=0.9)

In [30]:
def train_model(model, criterion, optimizer, num_epochs=3):
    train_batches = len(dataloaders['train'])
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        for phase in ['train']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for i,data in enumerate(dataloaders[phase]):
                inputs, labels,_ = data
                print("\rTraining batch {}/{}".format(i+1, train_batches), end='', flush=True)
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                _, preds = torch.max(outputs, 1)
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(image_datasets[phase])
            epoch_acc = running_corrects.double() / len(image_datasets[phase])

            print('{} loss: {:.4f}, acc: {:.4f}'.format(phase,
                                                        epoch_loss,
                                                        epoch_acc))
    return model

In [31]:
model_trained = train_model(model, criterion, optimizer, num_epochs=20)

Epoch 1/20
----------
Training batch 840/840train loss: 0.7770, acc: 0.6551
Epoch 2/20
----------
Training batch 840/840train loss: 0.5888, acc: 0.7561
Epoch 3/20
----------
Training batch 840/840train loss: 0.5137, acc: 0.7931
Epoch 4/20
----------
Training batch 840/840train loss: 0.4325, acc: 0.8315
Epoch 5/20
----------
Training batch 840/840train loss: 0.3705, acc: 0.8605
Epoch 6/20
----------
Training batch 840/840train loss: 0.3392, acc: 0.8700
Epoch 7/20
----------
Training batch 840/840train loss: 0.3017, acc: 0.8869
Epoch 8/20
----------
Training batch 840/840train loss: 0.2777, acc: 0.8962
Epoch 9/20
----------
Training batch 840/840train loss: 0.2380, acc: 0.9120
Epoch 10/20
----------
Training batch 840/840train loss: 0.2238, acc: 0.9128
Epoch 11/20
----------
Training batch 840/840train loss: 0.2102, acc: 0.9199
Epoch 12/20
----------
Training batch 840/840train loss: 0.2108, acc: 0.9214
Epoch 13/20
----------
Training batch 840/840train loss: 0.2024, acc: 0.9238
Epoch 14

In [15]:
def test_model(model, criterion, optimizer):
    labels_input=list()
    labels_output=list()
    vid_id = list()
    for phase in ['validation']:
        model.eval()

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels, fname in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)
            labels_input= labels_input + labels.tolist()
            for f in fname:
                vid_id.append(f.split('/')[-1].split('.')[0].split('_')[0])
            outputs = model(inputs)
            
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            
            labels_output= labels_output + preds.tolist()
    return labels_input,labels_output,vid_id
            
y_true,y_pred,vid_id = test_model(model, criterion, optimizer)

In [16]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
print(classification_report(y_true,y_pred))
accuracy_score(y_true, y_pred)

              precision    recall  f1-score   support

           0       0.69      0.68      0.68       409
           1       0.44      0.83      0.58       367
           2       0.78      0.49      0.60       831

    accuracy                           0.61      1607
   macro avg       0.64      0.66      0.62      1607
weighted avg       0.68      0.61      0.61      1607



0.612321095208463

In [17]:
import pandas as pd

df = pd.DataFrame(list(zip(y_true,y_pred,vid_id)),
               columns =['y_true','y_pred','vid_id'])
df.to_csv('df.csv',encoding='utf-8',index=False)

In [24]:
vid_list = list(set(df['vid_id'].values))

y_true = []
y_pred = []
for vid in vid_list:
    #print(vid)
    tmp_df = df[df['vid_id']==vid]
    #print(len(tmp_df))
    vid_pred = tmp_df['y_pred'].mode().values[0]
    vid_label = tmp_df['y_true'].mode().values[0]
    y_true.append(vid_label)
    y_pred.append(vid_pred)
    #print(vid_label,"\n",vid_pred)
    
    print('vid: {} label: {} pred: {}'.format(vid,vid_label,vid_pred))


vid: 181 label: 0 pred: 0
vid: 177 label: 0 pred: 0
vid: 180 label: 2 pred: 2
vid: 174 label: 1 pred: 1
vid: 192 label: 0 pred: 0
vid: 173 label: 2 pred: 2
vid: 187 label: 2 pred: 2
vid: 178 label: 0 pred: 0
vid: 183 label: 0 pred: 2
vid: 161 label: 1 pred: 1
vid: 189 label: 1 pred: 1
vid: 163 label: 2 pred: 2
vid: 195 label: 2 pred: 2
vid: 194 label: 1 pred: 1
vid: 159 label: 1 pred: 1
vid: 184 label: 2 pred: 2
vid: 165 label: 0 pred: 0
vid: 168 label: 0 pred: 0
vid: 158 label: 0 pred: 0
vid: 176 label: 0 pred: 0
vid: 188 label: 2 pred: 2
vid: 182 label: 2 pred: 2
vid: 193 label: 2 pred: 1
vid: 179 label: 1 pred: 1
vid: 175 label: 1 pred: 1
vid: 191 label: 0 pred: 0
vid: 172 label: 2 pred: 2
vid: 160 label: 2 pred: 1
vid: 157 label: 1 pred: 1
vid: 186 label: 1 pred: 1
vid: 167 label: 2 pred: 0
vid: 190 label: 1 pred: 1
vid: 164 label: 2 pred: 2
vid: 171 label: 0 pred: 2
vid: 166 label: 1 pred: 1
vid: 185 label: 1 pred: 1
vid: 170 label: 2 pred: 1
vid: 162 label: 1 pred: 1
vid: 169 lab

In [19]:
accuracy_score(y_true,y_pred)

0.8461538461538461