# Load modules, define model and detector

In [24]:
import torch
import torchvision.transforms as T
torch.set_grad_enabled(False);
from PIL import Image

# standard PyTorch mean-std input image normalization
transform = T.Compose([
  T.Resize(800),
  T.ToTensor(),
  T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

def detect(im, model, transform):
  # mean-std normalize the input image (batch-size: 1)
  img = transform(im).cuda().unsqueeze(0)
  
  # propagate through the model
  outputs = model(img)
  
  # keep only predictions with 0.7+ confidence
  probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
  keep = probas.max(-1).values > 0.7
  
  return probas[keep]

# import model from pytorch model hub
model = torch.hub.load('facebookresearch/detr', 'detr_resnet50', pretrained=True)
model = model.cuda()
print(f'CUDA: {torch.cuda.is_available()} -> {torch.cuda.get_device_name(0)}')

Using cache found in /home/max/.cache/torch/hub/facebookresearch_detr_master


CUDA: True -> GeForce GTX 1050 Ti


In [25]:
import numpy as np
import pandas as pd
import json
import requests
import time
import os
from SPARQLWrapper import SPARQLWrapper, JSON

def get_sparql_dataframe(service, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    sparql = SPARQLWrapper(service)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

q = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX : <http://h-da.de/fbi/artontology/>

SELECT ?artwork ?url ?name ?abstract (group_concat(?motif;separator=';') as ?motifs)
WHERE {
  ?artwork rdf:type :artwork;
     rdfs:label ?name;
     :image ?url;
     :abstract ?abstract;
     :motif/rdfs:label ?motif.
}
group by ?artwork ?url ?name ?abstract
"""

ds = "http://neuds.de:3030/artontology"
df = get_sparql_dataframe(ds, q)
df['id'] = df['artwork'].map(lambda x: x.split('/')[-1])
df.head()

Unnamed: 0,artwork,url,name,abstract,motifs,id
0,http://www.wikidata.org/entity/Q29019486,https://upload.wikimedia.org/wikipedia/commons...,Saint Proculus of Pozzuoli and his mother Sant...,Saints Proculus and Nicea is a 1636-1637 paint...,Nicea;Saint Procolo;Saint Procolo and Nicea,Q29019486
1,http://www.wikidata.org/entity/Q7527543,https://upload.wikimedia.org/wikipedia/commons...,Statue of John A. Macdonald,The Sir John A. Macdonald statue is a bronze s...,John A. Macdonald,Q7527543
2,http://www.wikidata.org/entity/Q3944494,https://upload.wikimedia.org/wikipedia/commons...,The Holy Family with Saint Catherine of Alexan...,Holy Family with Saint Catherine of Alexandria...,Virgin Mary;Catherine of Alexandria;boy;Child ...,Q3944494
3,http://www.wikidata.org/entity/Q368788,https://upload.wikimedia.org/wikipedia/commons...,Pietà,Pietà is a painting by the Italian Renaissance...,Virgin Mary;Jesus Christ;man;woman,Q368788
4,http://www.wikidata.org/entity/Q3630743,https://upload.wikimedia.org/wikipedia/commons...,Self-Portrait at the Age of 63,Self-Portrait at the Age of 63 is a self-portr...,Rembrandt;man,Q3630743


# Inference

inference function

In [34]:
def get_scores(aid, df):
  dir = 'res'
  ext = df[df['id'] == aid]['url'].values[0].split('.')[-1].lower()
  file = f'{dir}/{aid}.{ext}'
  try:
    im = Image.open(file)
    scores = detect(im, model, transform)
    if scores.shape[0] > 0:
      scores = scores.cpu().detach().numpy()
      bow = {}
      for detection in scores:
        for i, score in enumerate(detection):
          if score > 0.6:
            if i in bow:
              bow[i] += 1
            else:
              bow[i] = 1
    else:
      bow = {99:1}
  except:
    bow = {}
  return(bow)

fill bocv

In [36]:
from IPython.display import clear_output

bocv = {}
path_bocv = 'art_bocv.json'
aids = df['id'].values
counter = 0
for aid in aids:
  bow = get_scores(aid, df)
  bocv[aid] = bow
  counter += 1
  print(f'Done: {aid}')
  if counter % 200 == 0:
    with open(path_bocv, 'w') as f:
      json.dump(bocv, f)
    clear_output()
    print(f'dumped bocv at counter: {counter}')
with open(path_bocv, 'w') as f:
  json.dump(bocv, f)
clear_output()
print('dumped bocv. end.')

dumped bocv. end.


## tests

In [16]:
aid = 'Q3944494'
dir = 'res'
ext = df[df['id'] == aid]['url'].values[0].split('.')[-1].lower()
file = f'{dir}/{aid}.{ext}'
im = Image.open(file)
scores = detect(im, model, transform)
print(scores)

tensor([[6.0672e-11, 8.8828e-01, 1.7237e-06, 2.3262e-05, 2.5375e-06, 1.5594e-07,
         2.2218e-07, 5.9408e-07, 8.0758e-07, 2.9170e-06, 1.6377e-07, 5.5979e-07,
         8.3768e-11, 9.2648e-08, 1.8620e-07, 3.8622e-05, 9.6981e-06, 8.3246e-06,
         4.4807e-05, 7.0775e-06, 1.7637e-07, 1.2115e-07, 1.5006e-06, 1.7303e-06,
         7.2405e-07, 2.0216e-06, 8.5371e-11, 1.1463e-04, 1.8959e-05, 7.7757e-11,
         9.1370e-11, 4.2805e-05, 4.9098e-06, 1.8728e-06, 4.6223e-07, 3.5926e-06,
         4.2620e-06, 2.9611e-06, 1.2458e-06, 1.1497e-07, 3.6998e-07, 1.9736e-06,
         1.0526e-05, 4.8951e-07, 9.7581e-07, 1.2987e-10, 3.8933e-07, 8.6437e-07,
         3.5001e-07, 8.8538e-08, 1.1718e-07, 4.6208e-07, 3.5008e-07, 1.9233e-07,
         3.0089e-08, 8.6911e-09, 3.2628e-07, 8.3010e-08, 5.2446e-08, 1.4404e-07,
         3.0458e-08, 2.3982e-07, 3.6140e-05, 3.2448e-05, 8.0372e-06, 1.6591e-05,
         8.0968e-11, 1.5859e-06, 8.7593e-11, 8.0315e-11, 1.0391e-06, 6.6255e-11,
         4.1922e-06, 4.7534e

In [30]:
bocv = get_scores(aid, df)