# Load modules, define model and detector

In [1]:
import torch
import torchvision.transforms as T
torch.set_grad_enabled(False);
from PIL import Image

# standard PyTorch mean-std input image normalization
transform = T.Compose([
  T.Resize(800),
  T.ToTensor(),
  T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

def detect(im, model, transform):
  # mean-std normalize the input image (batch-size: 1)
  img = transform(im).cuda().unsqueeze(0)
  
  # propagate through the model
  outputs = model(img)
  
  # keep only predictions with 0.7+ confidence
  probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
  keep = probas.max(-1).values > 0.7
  
  return probas[keep]

# import model from pytorch model hub
model = torch.hub.load('facebookresearch/detr', 'detr_resnet50', pretrained=True)
model = model.cuda()
print(f'CUDA: {torch.cuda.is_available()} -> {torch.cuda.get_device_name(0)}')

Using cache found in /home/max/.cache/torch/hub/facebookresearch_detr_master


CUDA: True -> GeForce GTX 1050 Ti


In [2]:
import numpy as np
import pandas as pd
import json
import requests
import time
import os
from SPARQLWrapper import SPARQLWrapper, JSON

def get_sparql_dataframe(service, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    sparql = SPARQLWrapper(service)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

q = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX : <http://h-da.de/fbi/artontology/>

SELECT ?artwork ?url ?name ?abstract (group_concat(?motif;separator=';') as ?motifs)
WHERE {
  ?artwork rdf:type :artwork;
     rdfs:label ?name;
     :image ?url;
     :abstract ?abstract;
     :motif/rdfs:label ?motif.
}
group by ?artwork ?url ?name ?abstract
"""

ds = "http://neuds.de:3030/artontology"
df = get_sparql_dataframe(ds, q)
df['id'] = df['artwork'].map(lambda x: x.split('/')[-1])
df.head()

Unnamed: 0,artwork,url,name,abstract,motifs,id
0,http://www.wikidata.org/entity/Q29019486,https://upload.wikimedia.org/wikipedia/commons...,Saint Proculus of Pozzuoli and his mother Sant...,Saints Proculus and Nicea is a 1636-1637 paint...,Nicea;Saint Procolo;Saint Procolo and Nicea,Q29019486
1,http://www.wikidata.org/entity/Q7527543,https://upload.wikimedia.org/wikipedia/commons...,Statue of John A. Macdonald,The Sir John A. Macdonald statue is a bronze s...,John A. Macdonald,Q7527543
2,http://www.wikidata.org/entity/Q3944494,https://upload.wikimedia.org/wikipedia/commons...,The Holy Family with Saint Catherine of Alexan...,Holy Family with Saint Catherine of Alexandria...,Virgin Mary;Catherine of Alexandria;boy;Child ...,Q3944494
3,http://www.wikidata.org/entity/Q368788,https://upload.wikimedia.org/wikipedia/commons...,Pietà,Pietà is a painting by the Italian Renaissance...,Virgin Mary;Jesus Christ;man;woman,Q368788
4,http://www.wikidata.org/entity/Q3630743,https://upload.wikimedia.org/wikipedia/commons...,Self-Portrait at the Age of 63,Self-Portrait at the Age of 63 is a self-portr...,Rembrandt;man,Q3630743


# Inference

run inference

In [12]:
def get_scores(aid, df):
  dir = 'res'
  ext = df[df['id'] == aid]['url'].values[0].split('.')[-1].lower()
  file = f'{dir}/{aid}.{ext}'
  try:
    im = Image.open(file)
    scores = detect(im, model, transform)
    if scores.shape[0] > 0:
      scores = scores.cpu().detach().numpy()[0]
      bow = {}
      for i, score in enumerate(scores):
        if score > 0.6:
          if i in bow:
            bow[i] += 1
          else:
            bow[i] = 1
    else:
      bow = {99:1}
  except:
    bow = {}
  return(bow)

fill bocv

In [13]:
from IPython.display import clear_output

bocv = {}
path_bocv = 'art_bocv.json'
with open(path_bocv) as f:
  bocv = json.load(f)
counter = 0
for key in bocv:
  if len(bocv[key]) == 0:
    bow = get_scores(key, df)
    bocv[key] = bow
    counter += 1
    print(f'Done: {key}')
  if counter == 100:
    with open(path_bocv, 'w') as f:
      json.dump(bocv, f)
    counter = 0
    clear_output()
    print('dumped bocv')
with open(path_bocv, 'w') as f:
  json.dump(bocv, f)
clear_output()
print('dumped bocv')

dumped bocv
