## Install & import required libraries / repos

In [30]:
import torch
from pathlib import Path
import os
from skimage import io
from PIL import Image
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
!pip install pytorch_pretrained_vit
!pip3 install timm
from pytorch_pretrained_vit import ViT
import timm
from torch.utils import model_zoo



In [2]:
!git clone https://github.com/rgeirhos/texture-vs-shape.git

Cloning into 'texture-vs-shape'...
remote: Enumerating objects: 2359, done.[K
remote: Counting objects: 100% (149/149), done.[K
remote: Compressing objects: 100% (139/139), done.[K
remote: Total 2359 (delta 23), reused 67 (delta 9), pack-reused 2210[K
Receiving objects: 100% (2359/2359), 148.65 MiB | 31.52 MiB/s, done.
Resolving deltas: 100% (249/249), done.


need to move files in `code` directory to `/content/` to be able to import probabilities_to_decision

In [23]:
import probabilities_to_decision

In [3]:
STIMULI = "texture-vs-shape/stimuli/style-transfer-preprocessed-512/"

## Function to calculate shape bias

In [24]:
#get the images collectively
def calculate_shape_bias(dir, preprocess, model):
  root_dir = dir
  images = []
  labels = []

  shape = 0
  texture = 0

  model.eval()

  for label in os.listdir(root_dir): #for every folder in the directory
    data_dir = Path(root_dir) / label #go into that folder
    data_files = data_dir.glob('*.png') #gather the images by .png name
    
    for image in data_files: #for every image in the folder
        images.append(image) #add the image path to the list
        labels.append(label) #add the folder name to the list of labels

        shape_type = label 

        types = str(image).split('/')
        types = types[4]
        typenum = types.split('-')
        typenum = typenum[1]
        texture_type = typenum.split('.')
        texture_type = texture_type[0]
        texture_type = texture_type[:-1]

        input = Image.open(image)
        input_tensor = preprocess(input)
        input_batch = input_tensor.unsqueeze(0)
        input_batch = input_batch.to('cuda')

        with torch.no_grad():
          output_probs = model(input_batch)

          ##############################################
          ## Code from Robert Geirhos: https://github.com/rgeirhos/texture-vs-shape#code ##

          softmax_output = torch.nn.functional.softmax(output_probs[0], dim=0)

          # convert to numpy
          softmax_output_numpy = softmax_output.cpu().numpy() # replace with conversion

          # create mapping
          mapping = probabilities_to_decision.ImageNetProbabilitiesTo16ClassesMapping()
          
          # obtain decision 
          decision_from_16_classes = mapping.probabilities_to_decision(softmax_output_numpy)
          
          ##############################################

          if decision_from_16_classes == shape_type:
            shape += 1
          
          if decision_from_16_classes == texture_type:
            texture += 1

  print("SHAPE CORRECT TOTAL")
  print(shape)

  print("TEXTURE CORRECT TOTAL")
  print(texture)

  print("SHAPE BIAS")
  print(shape / (shape + texture))

# Vision Transformers

## Data-efficient Image Transformer ([DeiT]())

In [26]:
#deit model
model = torch.hub.load('facebookresearch/deit:main', 'deit_base_patch16_224', pretrained=True)
model = model.to('cuda:0')

preprocess = transforms.Compose([
      transforms.Resize(256),
      transforms.CenterCrop(224),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
  ])

calculate_shape_bias(STIMULI, preprocess, model)

Using cache found in /root/.cache/torch/hub/facebookresearch_deit_main


SHAPE CORRECT TOTAL
389
TEXTURE CORRECT TOTAL
530
SHAPE BIAS
0.4232861806311208


## Vision Transformer ([ViT](https://github.com/google-research/vision_transformer))

In [27]:
# pretrained model from https://github.com/lukemelas/PyTorch-Pretrained-ViT
model = ViT('B_16_imagenet1k', pretrained=True)
model = model.to('cuda')

preprocess = transforms.Compose([
      transforms.Resize(386),
      transforms.CenterCrop(386),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
  ])

calculate_shape_bias(STIMULI, preprocess, model)

Loaded pretrained weights.
SHAPE CORRECT TOTAL
378
TEXTURE CORRECT TOTAL
533
SHAPE BIAS
0.4149286498353458


# Convolutional Neural Networks

## ResNet50

In [28]:
#ResNet50
preprocess = transforms.Compose([
      transforms.Resize(256),
      transforms.CenterCrop(224),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
  ])

model = models.resnet50(pretrained=True)
model = model.to('cuda')

calculate_shape_bias(STIMULI, preprocess, model)

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))


SHAPE CORRECT TOTAL
246
TEXTURE CORRECT TOTAL
694
SHAPE BIAS
0.26170212765957446


## ResNet50 trained on Stylized-ImageNet [Geirhos, et. al., 2019](https://github.com/rgeirhos/texture-vs-shape)

In [31]:
#SIN
preprocess = transforms.Compose([
      transforms.Resize(256),
      transforms.CenterCrop(224),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
  ])

model = torchvision.models.resnet50(pretrained=False)
model = torch.nn.DataParallel(model).cuda()
checkpoint = model_zoo.load_url('https://bitbucket.org/robert_geirhos/texture-vs-shape-pretrained-models/raw/6f41d2e86fc60566f78de64ecff35cc61eb6436f/resnet50_train_60_epochs-c8e5653e.pth.tar')
model.load_state_dict(checkpoint["state_dict"])

model = model.to('cuda')

calculate_shape_bias(STIMULI, preprocess, model)

Downloading: "https://bitbucket.org/robert_geirhos/texture-vs-shape-pretrained-models/raw/6f41d2e86fc60566f78de64ecff35cc61eb6436f/resnet50_train_60_epochs-c8e5653e.pth.tar" to /root/.cache/torch/hub/checkpoints/resnet50_train_60_epochs-c8e5653e.pth.tar


HBox(children=(FloatProgress(value=0.0, max=204732682.0), HTML(value='')))


SHAPE CORRECT TOTAL
678
TEXTURE CORRECT TOTAL
208
SHAPE BIAS
0.7652370203160271


## ResNet50 trained on Stylized-ImageNet and ImageNet [Geirhos, et. al., 2019](https://github.com/rgeirhos/texture-vs-shape)

In [32]:
#SIN + IN
preprocess = transforms.Compose([
      transforms.Resize(256),
      transforms.CenterCrop(224),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
  ])

model = torchvision.models.resnet50(pretrained=False)
model = torch.nn.DataParallel(model).cuda()
checkpoint = model_zoo.load_url('https://bitbucket.org/robert_geirhos/texture-vs-shape-pretrained-models/raw/60b770e128fffcbd8562a3ab3546c1a735432d03/resnet50_train_45_epochs_combined_IN_SF-2a0d100e.pth.tar')
model.load_state_dict(checkpoint["state_dict"])

model = model.to('cuda')

calculate_shape_bias(STIMULI, preprocess, model)

Downloading: "https://bitbucket.org/robert_geirhos/texture-vs-shape-pretrained-models/raw/60b770e128fffcbd8562a3ab3546c1a735432d03/resnet50_train_45_epochs_combined_IN_SF-2a0d100e.pth.tar" to /root/.cache/torch/hub/checkpoints/resnet50_train_45_epochs_combined_IN_SF-2a0d100e.pth.tar


HBox(children=(FloatProgress(value=0.0, max=204733028.0), HTML(value='')))


SHAPE CORRECT TOTAL
363
TEXTURE CORRECT TOTAL
575
SHAPE BIAS
0.38699360341151384
