## Setup Code
First, run this cell load the autoreload extension. This allows us to edit .py source files, and re-import them into the notebook for a seamless editing and debugging experience.

In [1]:
%load_ext autoreload
%autoreload 2

##Google Colab Setup
Run the following cell to mount your Google Drive. Follow the link, sign in to your Google account (the same account you used to store this notebook!) and copy the authorization code into the text box that appears below.

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Now recall the path in your Google Drive where you uploaded this notebook, fill it in below. If everything is working correctly then running the folowing cell should print the filenames from the project:
```
['face.py', 'Project Proposal.gdoc', 'Data', 'Face Detection.ipynb', 'Project Flow Chart.gjam', 'Train_net.ipynb']
```


In [3]:
import os

# TODO: Fill in the Google Drive path where you uploaded the assignment
# Example: If you create a 2020FA folder and put all the files under A5 folder, then '2020FA/A5'
GOOGLE_DRIVE_PATH_AFTER_MYDRIVE = 'EECS504'
GOOGLE_DRIVE_PATH = os.path.join('drive', 'My Drive', GOOGLE_DRIVE_PATH_AFTER_MYDRIVE)
print(os.listdir(GOOGLE_DRIVE_PATH))

import sys
sys.path.append(GOOGLE_DRIVE_PATH)

['Project Proposal.gdoc', 'Data', '.ipynb_checkpoints', 'Project Flow Chart.gjam', 'CelebA', 'Models', 'mask.py', 'Masked or Not_train_net.ipynb', 'Demo.mov', 'video_masked_or_not.py', 'Experimenting', 'face_masked.py', 'Project_video', 'Augmented_train_net.ipynb']


###Get some models and datasets




Set up the end of the model to be able to classify the three classes: proper mask, improper mask, no mask

In [4]:
import torch
from torch import nn
from torch import optim
from torch.optim import lr_scheduler
import torch.nn.functional as F

from torchvision import datasets, transforms, models

import copy

# Get a pretrained resnet 50 model from 
net = models.resnet50(pretrained=True)


# Modify the last layer of that net for three classes
in_features = net.fc.in_features
hidden_size = 500 # 256
num_classes = 3 # mask, improper mask, no mask

classifier = nn.Sequential(
    nn.Linear(in_features, hidden_size),
    nn.ReLU(),
    nn.Dropout(.2),
    nn.Linear(hidden_size, num_classes)
)

# Update the last layer to be several classifications layers
net.fc = classifier
#print(net)
'''
criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(net.parameters(), lr=.001, weight_decay=0)

scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

'''

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))




'\ncriterion = nn.CrossEntropyLoss()\n\noptimizer = optim.Adam(net.parameters(), lr=.001, weight_decay=0)\n\nscheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)\n\n'

In [5]:
for name, param in net.named_parameters():
    if("bn" not in name):
        param.requires_grad = False

net.fc.requires_grad_()

Sequential(
  (0): Linear(in_features=2048, out_features=500, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.2, inplace=False)
  (3): Linear(in_features=500, out_features=3, bias=True)
)

In [6]:
criterion = nn.CrossEntropyLoss()

learning_rate = 1e-5#3e-4 #5e-5

optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=0)

scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [7]:
if torch.cuda.is_available:
  print('Good to go!')
  device = 'cuda'
else:
  print('Please set GPU via Edit -> Notebook Settings.')
  device = 'cpu'
net = net.to(device=device)

Good to go!


In [8]:
def get_accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [9]:
pre_process_transforms = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        #transforms.ColorJitter(),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
        #transforms.RandomAffine(degrees=(-20, 20),translate=(.5,.5),scale=(.33,3) ),
        #transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize((256,256)),
])

def pre_process(X):
  '''
  normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
  X_norm = normalize(X)
  '''
  X_norm = pre_process_transforms(X)
  return X_norm

In [11]:

import os
import cv2
import torch
import random

GOOGLE_DRIVE_PATH_AFTER_MYDRIVE = 'EECS504'
GOOGLE_DRIVE_PATH = os.path.join('drive', 'My Drive', GOOGLE_DRIVE_PATH_AFTER_MYDRIVE, 'Data')

NO_MASK_PATH = os.path.join(GOOGLE_DRIVE_PATH, 'No Mask Images')
PROPER_MASK_PATH = os.path.join(GOOGLE_DRIVE_PATH, 'Proper Mask Images')
IMPROPER_MASK_PATH = os.path.join(GOOGLE_DRIVE_PATH, 'Improper Mask Images')

def find_files(root):
  all_files = []
  for name in os.listdir(root):
    path = os.path.join(root, name)
    if os.path.isfile(path):
      all_files.append(path)
    else:
      all_files.extend(find_files(path))
  return all_files

NO_MASK_FILES = find_files(NO_MASK_PATH)
PROPER_MASK_FILES = find_files(PROPER_MASK_PATH)
IMPROPER_MASK_FILES = find_files(IMPROPER_MASK_PATH)

def load_and_format_image(path, resolution):
  img = cv2.imread(path)

  # Just added this
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

  width = resolution
  height = resolution
  dim = (width, height)
 
  # resize image
  resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
  return resized

def load_images(all_files, count, resolution):
  selected_files = []
  while len(selected_files) < count:
    next = random.choice(all_files)
    if next in selected_files: continue
    selected_files.append(next)
    
  #print(f'selected {len(selected_files)} files')
  
  loaded = [load_and_format_image(f, resolution) for f in selected_files]

  #print(f'loaded {len(selected_files)} files')

  return loaded

def load_batch(batch_size, resolution):
  no_mask_images = load_images(NO_MASK_FILES, batch_size / 3, resolution)
  proper_mask_images = load_images(PROPER_MASK_FILES, batch_size / 3 + batch_size % 3, resolution)
  improper_mask_images = load_images(IMPROPER_MASK_FILES, batch_size / 3, resolution)

  images = [
    *no_mask_images,
    *proper_mask_images,
    *improper_mask_images,
  ]
  labels = [
    *([2] * len(no_mask_images)),
    *([0] * len(proper_mask_images)),
    *([1] * len(improper_mask_images)),
  ]

  images = torch.tensor(images).permute(0, 3, 1, 2).to(dtype=torch.float32) / 255.0

  labels = torch.tensor(labels)

  return (images, labels)



In [None]:
num_epochs = 1000
batch_size = 15 # 60 was 3.7 gb RAM #  30 # 150 too big (not enough ram)
total_num_images = 30000 #70000 + 70000 + 10000 #2000 # update this
show_every = 5 # 25 # was 50
batches_per_epoch = total_num_images // batch_size
image_size = 256
count = 0

best_model_path = 'drive/My Drive/EECS504/Models/'


#X_val, y_val = load_data(total_num_images, image_size) # Can change this for cleaner data management
#X_val, y_val = load_data(batch_size*2, image_size) # Can change this for cleaner data management
#transform_val = data_transforms['val']
#X_val = transform_val(X_val).to(device=device)
#X_val = pre_process(X_val).to(device=device)
#y_val = y_val.to(device=device)

best_acc = 0
best_model_wts = None

loss_history = []
acc_history = []

for epoch in range(num_epochs):
  print("============= Start of Epoch: " , epoch+1, "/", num_epochs,  "=============")
  # Training
  net.train()

  epoch_loss_history = []
  epoch_acc_history = []
  
  for batch in range(batches_per_epoch):
    # Load in the data
    X,y = load_batch(batch_size, image_size)

    #print("loaded")

    # Transform the data
    #transform = data_transforms['train']
    #X = transform(X).to(device=device)
    #X = torch.tensor(X).permute(0,3,1,2).to(device=device, dtype=torch.float32) / 255.0
    X = X.to(device=device)
    X = pre_process(X)
    #y = torch.tensor(y).to(device=device)

    y = y.to(device=device)

    # Make predictions and determine loss
    out = net(X)
    loss = criterion(out,y)

    # Check the accuracy of the output
    acc = get_accuracy(out,y)

    # Update the weights, and the optimizer scheduler
    loss.backward()
    optimizer.step()
    scheduler.step()

    # Append to epoch list
    epoch_loss_history.append(loss)
    epoch_acc_history.append(acc)

    # See if it is time to display results of batch
    count += 1
    if show_every is not None and count % show_every == 0:
      print("Epoch: ", epoch+1, "/", num_epochs, ". Batch: ", batch+1, "/", batches_per_epoch)
      print("Batch Acc: ", acc.item(), "Batch Loss: ", loss.item())
      #torch.save(net.state_dict(), best_model_path + 'in_progress_proper_or_improper_model_' + str(acc.item()) +  '.pt')

  # Val at end of each epoch
  net.eval()
  with torch.no_grad():
    #out = net(X_val)
    #loss = criterion(out, y_val)
    #acc = get_accuracy(out, y_val)
    
    loss = torch.tensor(epoch_loss_history).mean()
    acc =  torch.tensor(epoch_acc_history).mean()

    loss_history.append(loss)
    acc_history.append(acc)

    print("=============  End of Epoch: ", epoch+1, "/", num_epochs, "  =============")
    print("Train Acc: ", acc.item(), " Val Loss: ", loss.item())
    if True: # acc > best_acc:
      print("New best model!")
      best_model_wts = copy.deepcopy(net.state_dict())
      best_acc = acc
      torch.save(net.state_dict(), best_model_path + 'augmented_model_proper_or_improper' + str(acc.item()) +  '.pt')
    print()

Epoch:  1 / 1000 . Batch:  5 / 2000
Batch Acc:  1.0 Batch Loss:  0.12079548090696335
Epoch:  1 / 1000 . Batch:  10 / 2000
Batch Acc:  1.0 Batch Loss:  0.14790280163288116
Epoch:  1 / 1000 . Batch:  15 / 2000
Batch Acc:  0.6666666865348816 Batch Loss:  0.6850444078445435
Epoch:  1 / 1000 . Batch:  20 / 2000
Batch Acc:  0.7333333492279053 Batch Loss:  0.5904043912887573
Epoch:  1 / 1000 . Batch:  25 / 2000
Batch Acc:  1.0 Batch Loss:  0.11196085065603256
Epoch:  1 / 1000 . Batch:  30 / 2000
Batch Acc:  1.0 Batch Loss:  0.11278940737247467
Epoch:  1 / 1000 . Batch:  35 / 2000
Batch Acc:  0.8666666746139526 Batch Loss:  0.32658424973487854
Epoch:  1 / 1000 . Batch:  40 / 2000
Batch Acc:  1.0 Batch Loss:  0.13143453001976013
Epoch:  1 / 1000 . Batch:  45 / 2000
Batch Acc:  0.8666666746139526 Batch Loss:  0.41589662432670593
Epoch:  1 / 1000 . Batch:  50 / 2000
Batch Acc:  1.0 Batch Loss:  0.16680391132831573
Epoch:  1 / 1000 . Batch:  55 / 2000
Batch Acc:  1.0 Batch Loss:  0.122656755149364

In [None]:
best_model_path = 'drive/My Drive/EECS504/Models/'

print(acc)
torch.save(net.state_dict(), best_model_path + 'augmented_incomplete_model_' + str(acc.item()) +  '.pt')

In [14]:
for name, param in net.named_parameters():
      param.requires_grad = True

net.fc.requires_grad_()

Sequential(
  (0): Linear(in_features=2048, out_features=500, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.2, inplace=False)
  (3): Linear(in_features=500, out_features=3, bias=True)
)

In [13]:
model_path = 'drive/My Drive/EECS504/Models/' + 'augmented_model_proper_or_improper0.9016667008399963.pt' #'augmented_model_proper_or_improper0.801166832447052.pt' # 'in_progress_m_or_n_model_0.855555534362793.pt'


def load_model():
    net.load_state_dict(torch.load(model_path, map_location=device))
    return net

net = load_model()