# Finding Logos in Screenshots

Our method is

1. train a neural net to recognize company logos
2. use selective search for region proposal on screenshots
3. feed all proposed regions into neural net; if highest probability beats some threshold, count as positive ID









In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import time
import os

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Loading a Neural Net

We train neural nets in the notebook TransferLearningLogoDetection and save the models in

> /content/drive/MyDrive/LogoDetection/Models

The models are saved using PyTorch, and they only save the state dicts, so we need to define an appropriate model first, then load the learnable parameters from the file.

Here is the Siamese NN class.

In [None]:
class Siamese(nn.Module):

    def __init__(self, conv, lin_in, lin_out):
        super(Siamese, self).__init__()
        self.conv = conv
        self.linear = nn.Sequential(nn.Linear(lin_in, lin_out), nn.Sigmoid())
        self.out = nn.Linear(lin_out, 1)

    def forward_one(self, x):
        x = self.conv(x)
        x = x.view(x.size()[0], -1)
        x = self.linear(x)
        return x

    def forward(self, x1, x2):
        out1 = self.forward_one(x1)
        out2 = self.forward_one(x2)
        dis = torch.abs(out1 - out2)
        out = self.out(dis)
        return out

In [3]:
class Siamese(nn.Module):

    def __init__(self, conv, lin_in, lin_out):
        super(Siamese, self).__init__()
        self.conv = conv
        self.linear1 = nn.Sequential(nn.Linear(lin_in, lin_out), nn.Sigmoid())
        self.linear2 = nn.Sequential(nn.Linear(lin_out, 32), nn.Sigmoid())

    def forward_one(self, x):
        x = self.conv(x)
        x = x.view(x.size()[0], -1)
        x = self.linear1(x)
        x = self.linear2(x)
        return x

    def forward(self, x1, x2):
        out1 = self.forward_one(x1)
        out2 = self.forward_one(x2)
        dis = torch.linalg.norm(out1 - out2, ord=2, dim=1).unsqueeze(1)
        #dis = torch.sum(torch.abs(out1 - out2), dim=1, keepdim=True)
        return dis

In [5]:
conv = None
lin_in = None
lin_out = None
img_size = None

# change CNN_type to choose from list below
CNN_type = 1

# example with the convolutional neural net from the one-shot Omniglot paper
#   Koch, Gregory, Richard Zemel, and Ruslan Salakhutdinov.
#   "Siamese neural networks for one-shot image recognition."
#   ICML deep learning workshop. Vol. 2. 2015.
# (modified to take 3-channel RGB input)
if CNN_type == 0 :
  conv = nn.Sequential(
          nn.Conv2d(3, 64, 10),  # 64@96*96
          nn.ReLU(inplace=True),
          nn.MaxPool2d(2),  # 64@48*48
          nn.Conv2d(64, 128, 7),
          nn.ReLU(),    # 128@42*42
          nn.MaxPool2d(2),   # 128@21*21
          nn.Conv2d(128, 128, 4),
          nn.ReLU(), # 128@18*18
          nn.MaxPool2d(2), # 128@9*9
          nn.Conv2d(128, 256, 4),
          nn.ReLU(),   # 256@6*6
          )
  lin_in = 9216
  lin_out = 4096
  img_size = (105, 105)

elif CNN_type == 1 :
  conv = models.resnet50(pretrained=True)
  for param in conv.parameters() :
    param.requires_grad = False

  lin_in = 1000
  lin_out = 500
  img_size=(244, 244)

else :
  raise ValueError("CNN_type = " + str(CNN_type) + " not valid.")

model = Siamese(conv, lin_in, lin_out)

state_dict = torch.load('/content/drive/MyDrive/LogoDetection/Models/siameseNet05.pt')
model.load_state_dict(state_dict)
model.cuda()
model.eval()
print('Model loaded.')

Model loaded.


## Region Proposal and Evaluation

For each image, we apply region proposal followed by feeding each region into the neural network.
For region proposal, we use the selective search algorithm as implemented in OpenCV with the YOLO (you only look once) strategy.
To use the neural network, we must compare the proposed region to each true logo.

The screenshots of websites and the true logos are stored on the Google drive.

In [6]:
ss_path = '/content/drive/MyDrive/LogoDetection/phishIRIS_DL_Dataset-logos_only/train'
logos_path = '/content/drive/MyDrive/LogoDetection/Logos'

In [7]:
# first, run the net on each true logo
logo_vecs = {}

transform = transforms.Compose([transforms.Resize(img_size),
                                transforms.ToTensor()])

for img_file in os.listdir(logos_path) :
  filepath = os.path.join(logos_path, img_file)
  img = Image.open(filepath).convert('RGB')
  img_t = transform(img).cuda()
  logo_vecs[img_file] = model.forward_one(img_t.unsqueeze(0))

Here's the main loop! We save results in /content/drive/MyDrive/LogoDetection/Results/siameseNetnn_results.csv.

In [9]:
display_progress = 1
test_mode = 0

# here is the file we'll be writing results to
f = None
if test_mode == 0 :
  f = open('/content/drive/MyDrive/LogoDetection/Results/siameseNet05_results.csv', 'w')
  f.write('img_name,pred_class,pred_prob,true_class\n')

# here's OpenCV's selective search object
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()

# will need to map numpy images to tensors
to_tensor = transforms.ToTensor()

# here is the required resize transform for our model
#   NOTE: size is defined with the model
transform = transforms.Resize(img_size)

# loop through all folders in the dataset (note: the folder defines the
# classification)
i = 0
for dir_name in os.listdir(ss_path) :
  print('')
  print('Working in directory ' + dir_name)

  # loop through images (only 10 if in test mode)
  img_names = os.listdir(os.path.join(ss_path, dir_name))
  if test_mode :
    R = 10
  else :
    R = len(img_names)
  for j in range(R) :
    # load image
    img_name = img_names[j]
    img = plt.imread(os.path.join(ss_path, dir_name, img_name))
    img_cv = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
    img_t = to_tensor(img_cv)
    img_t = img_t.cuda()
    if display_progress :
      i += 1
      print('Loaded image ' + str(i) + ': ' + img_name +'.', end='... ')

    # run region proposal
    a = time.time()

    ss.setBaseImage(img_cv)
    ss.switchToSingleStrategy()
    rects = ss.process()

    b = time.time()
    if display_progress :
      print('Completed region proposal. %.2f sec.' % (b - a), end='... ')

    # because of memory issues, let's split the collection up by doing K
    # rectangles at a time
    K = 10
    num_rects_total = len(rects)
    #num_rects_total = min(len(rects), 200)
    num_groups = int(num_rects_total/K)
    num_extra = num_rects_total % K

    best_dsim = np.inf
    best_class = -1

    for j in range(num_groups) :

      # grab subset of the rectangles
      rects_temp = rects[j*K:(j+1)*K]
      if j == num_groups - 1 :
        rects_temp = rects[j*K:(j+1)*K + num_extra]

      # set up tensor to hold rectangles
      imgs = torch.zeros(len(rects_temp), 3, img_size[0], img_size[1])
      imgs = imgs.cuda()

      # load rectangles
      for n, (x, y, w, h) in enumerate(rects_temp) :
        img = img_t[:, y:y+h, x:x+w]
        img = img.cuda()
        img = transform(img)
        imgs[n, :, :, :] = img

      # get feature vecotrs for images
      img_vecs = model.forward_one(imgs)

      # compare against each logo
      for key, value in logo_vecs.items() :
        #dis = torch.abs(img_vecs - value)
        #out = model.out(dis)
        #prob_preds = torch.sigmoid(out)
        #prob_pred = torch.max(prob_preds)

        dis = torch.linalg.norm(img_vecs - value, ord=2, dim=1).unsqueeze(1)
        #dis = torch.sum(torch.abs(img_vecs - value), dim=1, keepdim=True)
        min_dsim = torch.min(dis)

        if min_dsim < best_dsim :
          best_dsim = min_dsim
          best_class = key

    c = time.time()
    if display_progress :
      print('Completed prediction. %.2f sec.' % (c - b))

    result_message = 'Aww :('
    if best_class.split('.')[0].split('_')[0] == dir_name :
      result_message = 'Woo! :D'

    # save prediction and probability
    if display_progress :
      print('Dissimilarity score: ' + str(best_dsim.item()) + ',  Class predicted: ' + best_class + ',   ' + result_message)
    if test_mode == 0 :
      f.write(img_name + ',' + best_class + ',' + str(best_dsim.item()) + ',' + dir_name + '\n')

if test_mode == 0:
  f.close()


Working in directory apple
Loaded image 1: apple (48).png.... Completed region proposal. 0.84 sec.... Completed prediction. 0.42 sec.
Dissimilarity score: 0.411774218082428,  Class predicted: microsoft_3.png,   Aww :(
Loaded image 2: apple (49).png.... Completed region proposal. 0.77 sec.... Completed prediction. 0.23 sec.
Dissimilarity score: 0.4734462797641754,  Class predicted: alibaba_2.png,   Aww :(
Loaded image 3: apple (1).png.... Completed region proposal. 1.05 sec.... Completed prediction. 0.44 sec.
Dissimilarity score: 0.44330814480781555,  Class predicted: microsoft_1.png,   Aww :(
Loaded image 4: apple (2).png.... Completed region proposal. 2.00 sec.... Completed prediction. 1.49 sec.
Dissimilarity score: 0.37645450234413147,  Class predicted: amazon_1.png,   Aww :(
Loaded image 5: apple (3).png.... Completed region proposal. 1.11 sec.... Completed prediction. 0.56 sec.
Dissimilarity score: 0.6068193912506104,  Class predicted: adobe_2.png,   Aww :(
Loaded image 6: apple (