In [18]:
import configparser
import requests
import json
import collections
import random

In [2]:
config = configparser.ConfigParser()
config.read('secrets/keys.ini')
list(config['clarifai'].keys())

['api_key', 'app_name']

In [24]:
with open('festival-of-giants-2017-hand-tags.json') as infile:
    hand_tagged = json.load(infile)
hand_tagged[0]

{'hand-tags': ['elephant'],
 'url': 'https://scripts.njae.me.uk/festive-road-pictures/small/2017/festival-of-giants-2017/19247943_1571294846222629_7357675221196322624_n-small.jpg'}

In [25]:
all_tags = set([k for i in hand_tagged for k in i['hand-tags']])
all_tags

{'bees',
 'butterfly',
 'canvas',
 'catepillar (sp?!)',
 'costume',
 'cycle bus',
 'dragon',
 'drummer',
 'elephant',
 'event space',
 'fly',
 'generic',
 'goose',
 'hummingbirds',
 'installation',
 'mechanism',
 'puppet',
 'triceratops'}

In [17]:
collections.Counter([k for i in hand_tagged for k in i['hand-tags']]).most_common(11)

[('generic', 82),
 ('costume', 29),
 ('puppet', 20),
 ('elephant', 18),
 ('goose', 16),
 ('hummingbirds', 13),
 ('drummer', 7),
 ('installation', 7),
 ('fly', 6),
 ('mechanism', 6),
 ('triceratops', 5)]

In [26]:
train_set = random.sample(hand_tagged, 128)
train_set_urls = [i['url'] for i in train_set]
test_set = [i for i in hand_tagged if i['url'] not in train_set_urls]
len(hand_tagged), len(train_set), len(test_set), len(train_set) + len(test_set)

(215, 128, 87, 215)

In [27]:
train_set[1]

{'hand-tags': ['puppet'],
 'url': 'https://scripts.njae.me.uk/festive-road-pictures/small/2017/festival-of-giants-2017/IMG_9573-small.jpg'}

In [28]:
# with open('festival-of-giants-2017-training.json', 'w') as outfile:
#     json.dump(train_set, outfile, indent=2)
# with open('festival-of-giants-2017-testing.json', 'w') as outfile:
#     json.dump(test_set, outfile, indent=2)    

In [29]:
with open('festival-of-giants-2017-training.json') as infile:
    train_set = json.load(infile)
with open('festival-of-giants-2017-testing.json') as infile:
    test_set = json.load(infile)    

In [12]:
collections.Counter([k for i in hand_tagged for k in i['hand-tags']]).most_common(11)

[('generic', 83),
 ('costume', 27),
 ('puppet', 20),
 ('harminder', 18),
 ('goose', 16),
 ('hummingbirds', 13),
 ('drummer', 7),
 ('fly', 6),
 ('installation', 6),
 ('mechanism', 6),
 ('triceratops', 5)]

In [31]:
all_concepts = [c[0] for c in 
                collections.Counter([k for i in hand_tagged for k in i['hand-tags']]).most_common(11)[1:]]
all_concepts, len(all_concepts)

(['costume',
  'puppet',
  'elephant',
  'goose',
  'hummingbirds',
  'drummer',
  'installation',
  'fly',
  'mechanism',
  'triceratops'],
 10)

In [32]:
def concepts_present(tags, concepts):
    return [c for c in concepts if c in tags], [c for c in concepts if c not in tags]

In [33]:
for i in train_set[:5]:
    print(i['hand-tags'], concepts_present(i['hand-tags'], all_concepts))

['generic'] ([], ['costume', 'puppet', 'elephant', 'goose', 'hummingbirds', 'drummer', 'installation', 'fly', 'mechanism', 'triceratops'])
['puppet'] (['puppet'], ['costume', 'elephant', 'goose', 'hummingbirds', 'drummer', 'installation', 'fly', 'mechanism', 'triceratops'])
['puppet'] (['puppet'], ['costume', 'elephant', 'goose', 'hummingbirds', 'drummer', 'installation', 'fly', 'mechanism', 'triceratops'])
['generic'] ([], ['costume', 'puppet', 'elephant', 'goose', 'hummingbirds', 'drummer', 'installation', 'fly', 'mechanism', 'triceratops'])
['generic'] ([], ['costume', 'puppet', 'elephant', 'goose', 'hummingbirds', 'drummer', 'installation', 'fly', 'mechanism', 'triceratops'])


In [36]:
from clarifai import rest
from clarifai.rest import ClarifaiApp
from clarifai.rest import Image as ClImage

app = ClarifaiApp(api_key=config['clarifai']['api_key'])

In [None]:
# img1 = ClImage(url="https://samples.clarifai.com/puppy.jpeg", concepts=['boscoe'], not_concepts=['our_wedding'])
# img2 = ClImage(url="https://samples.clarifai.com/wedding.jpg", concepts=['our_wedding'], not_concepts=['cat','boscoe'])

# app.inputs.bulk_create_images([img1, img2])

In [37]:
train_image_info = []
for i in test_set:
    url = i['url']
    concs, not_concs = concepts_present(i['hand-tags'], all_concepts)
    train_image_info.append(ClImage(url=url, concepts=concs, not_concepts=not_concs))
train_image_info[:2]

[<clarifai.rest.client.Image at 0x7feace84b128>,
 <clarifai.rest.client.Image at 0x7feace84b160>]

In [38]:
app.inputs.bulk_create_images(train_image_info)

[<clarifai.rest.client.Image at 0x7feace8a2320>,
 <clarifai.rest.client.Image at 0x7feae176ec88>,
 <clarifai.rest.client.Image at 0x7feace8325c0>,
 <clarifai.rest.client.Image at 0x7feace832908>,
 <clarifai.rest.client.Image at 0x7feace83bc88>,
 <clarifai.rest.client.Image at 0x7feace7bf470>,
 <clarifai.rest.client.Image at 0x7feace7bf080>,
 <clarifai.rest.client.Image at 0x7feace7bf0f0>,
 <clarifai.rest.client.Image at 0x7feace7bf1d0>,
 <clarifai.rest.client.Image at 0x7feace7bf0b8>,
 <clarifai.rest.client.Image at 0x7feace7bf320>,
 <clarifai.rest.client.Image at 0x7feace7bf400>,
 <clarifai.rest.client.Image at 0x7feace7bf278>,
 <clarifai.rest.client.Image at 0x7feace7bf630>,
 <clarifai.rest.client.Image at 0x7feace7bf7b8>,
 <clarifai.rest.client.Image at 0x7feace7bf5f8>,
 <clarifai.rest.client.Image at 0x7feace7bf710>,
 <clarifai.rest.client.Image at 0x7feace7bf828>,
 <clarifai.rest.client.Image at 0x7feace7bf6a0>,
 <clarifai.rest.client.Image at 0x7feace7bf978>,
 <clarifai.rest.clie

In [39]:
model = app.models.create('festiveroad', concepts=all_concepts)
model

<clarifai.rest.client.Model at 0x7feace83b828>

In [43]:
model.train()

<clarifai.rest.client.Model at 0x7feace732828>

In [45]:
trained_model = app.models.get('festiveroad')
trained_model.predict_by_url(test_set[0]['url'])

{'outputs': [{'created_at': '2017-11-12T12:27:19.824049395Z',
   'data': {'concepts': [{'app_id': 'b8033f6111bb45eba4796dd3c5f0f173',
      'id': 'triceratops',
      'name': 'triceratops',
      'value': 0.94750464},
     {'app_id': 'b8033f6111bb45eba4796dd3c5f0f173',
      'id': 'puppet',
      'name': 'puppet',
      'value': 0.039329555},
     {'app_id': 'b8033f6111bb45eba4796dd3c5f0f173',
      'id': 'goose',
      'name': 'goose',
      'value': 0.006065057},
     {'app_id': 'b8033f6111bb45eba4796dd3c5f0f173',
      'id': 'costume',
      'name': 'costume',
      'value': 0.00553496},
     {'app_id': 'b8033f6111bb45eba4796dd3c5f0f173',
      'id': 'fly',
      'name': 'fly',
      'value': 0.0043024104},
     {'app_id': 'b8033f6111bb45eba4796dd3c5f0f173',
      'id': 'elephant',
      'name': 'elephant',
      'value': 0.0033425384},
     {'app_id': 'b8033f6111bb45eba4796dd3c5f0f173',
      'id': 'hummingbirds',
      'name': 'hummingbirds',
      'value': 6.677292e-05},
     {'a

In [46]:
test_set[0]

{'hand-tags': ['triceratops'],
 'url': 'https://scripts.njae.me.uk/festive-road-pictures/small/2017/festival-of-giants-2017/19260709_1906157629708906_941293702425961197_n-small.jpg'}

In [47]:
predictions = [trained_model.predict_by_url(i['url']) for i in test_set]
len(predictions)

87

In [60]:
[(c['name'], c['value']) for c in predictions[1]['outputs'][0]['data']['concepts'][:3]]

[('hummingbirds', 0.935379), ('goose', 0.03203959), ('costume', 0.0069925818)]

In [59]:
test_set[1]

{'hand-tags': ['hummingbirds'],
 'url': 'https://scripts.njae.me.uk/festive-road-pictures/small/2017/festival-of-giants-2017/19399619_1906157669708902_5236259890165828680_n-small.jpg'}

In [63]:
predictions[1]['outputs'][0]['input']['data']['image']['url']

'https://scripts.njae.me.uk/festive-road-pictures/small/2017/festival-of-giants-2017/19399619_1906157669708902_5236259890165828680_n-small.jpg'

In [64]:
def hand_tags_by_url(url, tagged_images):
    return [i['hand-tags'] for i in tagged_images if i['url'] == url][0]

In [65]:
hand_tags_by_url(predictions[1]['outputs'][0]['input']['data']['image']['url'], test_set)

['hummingbirds']

In [71]:
def url_of_prediction(prediction):
    return prediction['outputs'][0]['input']['data']['image']['url']

In [68]:
def tags_of_prediction(prediction):
    return [(c['name'], c['value']) for c in prediction['outputs'][0]['data']['concepts']]

In [75]:
with open('clarifai-model-1-predictions.json', 'w') as outfile:
    json.dump(predictions, outfile, indent=2)

In [73]:
for p in predictions:
    url = url_of_prediction(p)
    found_tags = [t[0] for t in tags_of_prediction(p) if t[1] > 0.8]
    print(hand_tags_by_url(url, test_set), tags_of_prediction(p)[:3])

['triceratops'] [('triceratops', 0.94750464), ('puppet', 0.039329555), ('goose', 0.006065057)]
['hummingbirds'] [('hummingbirds', 0.935379), ('goose', 0.03203959), ('costume', 0.0069925818)]
['bees'] [('costume', 0.10188919), ('puppet', 0.002372923), ('goose', 0.0014373008)]
['elephant'] [('elephant', 0.9815099), ('goose', 0.018219769), ('triceratops', 0.013795066)]
['cycle bus'] [('elephant', 0.016256927), ('costume', 0.012157428), ('triceratops', 0.009599909)]
['elephant'] [('elephant', 0.9861275), ('triceratops', 0.044785663), ('mechanism', 0.0030194276)]
['canvas'] [('goose', 0.0067580296), ('installation', 0.0012529811), ('triceratops', 0.0006780008)]
['generic'] [('puppet', 0.015682885), ('drummer', 0.0033000733), ('triceratops', 0.0005197768)]
['hummingbirds'] [('hummingbirds', 0.9576477), ('costume', 0.016936693), ('fly', 0.0048118504)]
['generic'] [('puppet', 0.0038093843), ('mechanism', 0.0016142704), ('installation', 0.00045316262)]
['generic'] [('costume', 0.27737358), ('fl

In [77]:
for p in predictions:
    url = url_of_prediction(p)
    found_tags = [t[0] for t in tags_of_prediction(p) if t[1] > 0.8]
    given_tags = [t for t in hand_tags_by_url(url, test_set) if t in all_concepts]
    print(given_tags, found_tags)

['triceratops'] ['triceratops']
['hummingbirds'] ['hummingbirds']
[] []
['elephant'] ['elephant']
[] []
['elephant'] ['elephant']
[] []
[] []
['hummingbirds'] ['hummingbirds']
[] []
[] []
['costume'] ['costume']
[] []
['costume'] ['costume']
[] []
[] []
[] []
['goose'] ['goose']
[] []
[] []
['costume'] ['costume']
['costume'] ['costume']
[] []
['elephant', 'costume'] ['elephant', 'costume']
['costume'] ['costume']
['puppet'] ['puppet']
['costume'] ['costume']
[] []
['puppet'] ['puppet']
['drummer'] ['drummer']
['hummingbirds'] ['hummingbirds']
[] []
[] []
['hummingbirds'] ['hummingbirds']
['hummingbirds'] ['hummingbirds']
['puppet'] ['puppet']
['triceratops'] ['triceratops']
['installation'] ['installation']
[] []
['puppet'] ['puppet']
['puppet'] ['puppet']
['elephant', 'mechanism'] ['mechanism', 'elephant']
['hummingbirds'] ['hummingbirds']
[] []
[] []
['goose', 'costume'] ['costume', 'goose']
['goose', 'costume'] ['goose', 'costume']
['costume'] ['costume']
['goose'] ['goose']
[] []


In [78]:
with open('random_images_with_tags.json') as infile:
    random_set = json.load(infile)

In [79]:
random_set[0]

{'name': 'santa_and_the_seriously_magical_xmas_ride_1_(36)-small.jpg',
 'tags': ['child',
  'group',
  'son',
  'people',
  'family',
  'indoors',
  'boy',
  'woman',
  'girl',
  'man',
  'togetherness',
  'fun',
  'portrait',
  'education',
  'offspring',
  'four',
  'sibling',
  'elementary school',
  'friendship',
  'enjoyment'],
 'url': 'https://scripts.njae.me.uk/festive-road-pictures/small/2008/santa_and_the_magical_christmas_2008/santa_and_the_seriously_magical_xmas_ride_1_(36)-small.jpg'}

In [80]:
for i in random_set:
    print(i['url'])

https://scripts.njae.me.uk/festive-road-pictures/small/2008/santa_and_the_magical_christmas_2008/santa_and_the_seriously_magical_xmas_ride_1_(36)-small.jpg
https://scripts.njae.me.uk/festive-road-pictures/small/2008/santa_and_the_magical_christmas_2008/santa_and_the_seriously_magical_xmas_ride_1_(48)-small.jpg
https://scripts.njae.me.uk/festive-road-pictures/small/2008/santa_and_the_magical_christmas_2008/santa_and_the_seriously_magical_xmas_ride_1_(5)-small.jpg
https://scripts.njae.me.uk/festive-road-pictures/small/2008/seriously_magical_xmas/festive_road__28-small.jpg
https://scripts.njae.me.uk/festive-road-pictures/small/2009/brighton_tea_party_jul_2009/DSC_0102-small.jpg
https://scripts.njae.me.uk/festive-road-pictures/small/2009/brighton_tea_party_jul_2009/DSC_0107-small.jpg
https://scripts.njae.me.uk/festive-road-pictures/small/2009/brighton_tea_party_jul_2009/DSC_0227-small.jpg
https://scripts.njae.me.uk/festive-road-pictures/small/2009/brighton_tea_party_jul_2009/DSC_0252-small

In [81]:
with open('random-images-hand-tags.json') as infile:
    random_set = json.load(infile)

In [82]:
random_set[0]

{'hand-tags': ['generic'],
 'url': 'https://scripts.njae.me.uk/festive-road-pictures/small/2008/santa_and_the_magical_christmas_2008/santa_and_the_seriously_magical_xmas_ride_1_(36)-small.jpg'}

In [91]:
predictions_r = [trained_model.predict_by_url(i['url']) for i in random_set]
len(predictions_r)

99

In [92]:
with open('clarifai-model-1-predictions-random.json', 'w') as outfile:
    json.dump(predictions, outfile, indent=2)

In [95]:
predictions_r[0]

{'outputs': [{'created_at': '2017-11-12T15:32:08.952073Z',
   'data': {'concepts': [{'app_id': 'b8033f6111bb45eba4796dd3c5f0f173',
      'id': 'puppet',
      'name': 'puppet',
      'value': 0.00069406204},
     {'app_id': 'b8033f6111bb45eba4796dd3c5f0f173',
      'id': 'fly',
      'name': 'fly',
      'value': 0.00053718884},
     {'app_id': 'b8033f6111bb45eba4796dd3c5f0f173',
      'id': 'installation',
      'name': 'installation',
      'value': 0.00038191583},
     {'app_id': 'b8033f6111bb45eba4796dd3c5f0f173',
      'id': 'hummingbirds',
      'name': 'hummingbirds',
      'value': 0.00017661577},
     {'app_id': 'b8033f6111bb45eba4796dd3c5f0f173',
      'id': 'mechanism',
      'name': 'mechanism',
      'value': 0.00017456114},
     {'app_id': 'b8033f6111bb45eba4796dd3c5f0f173',
      'id': 'goose',
      'name': 'goose',
      'value': 0.00010243388},
     {'app_id': 'b8033f6111bb45eba4796dd3c5f0f173',
      'id': 'costume',
      'name': 'costume',
      'value': 0.00010037

In [99]:
for p in predictions_r:
    url = url_of_prediction(p)
    found_tags = [t[0] for t in tags_of_prediction(p) if t[1] > 0.8]
    given_tags = [t for t in hand_tags_by_url(url, random_set) if t in all_concepts]
    print(given_tags, found_tags)

[] []
[] []
[] []
[] []
['costume'] []
['costume'] []
['costume'] []
['costume'] []
[] []
['costume', 'puppet'] []
[] []
['mechanism', 'puppet'] []
['puppet'] []
['mechanism'] []
['puppet'] []
[] []
['puppet'] []
['puppet'] []
['costume'] []
['puppet'] []
['puppet'] []
[] []
[] []
['costume'] ['costume']
[] []
[] []
['costume'] []
[] []
['costume'] []
[] []
['costume'] ['costume']
[] []
[] []
['costume'] []
[] []
[] []
['costume'] []
['costume'] []
['puppet'] []
[] []
['puppet'] []
['costume'] []
[] []
[] []
['costume'] []
[] []
[] []
['costume'] []
['costume'] []
[] []
['costume'] []
['costume'] []
[] []
['puppet'] []
['costume', 'puppet'] []
['puppet'] []
['costume'] ['costume']
[] []
[] []
[] []
[] []
['puppet'] []
['costume'] []
[] []
[] []
['costume'] []
['costume'] ['costume']
[] []
['puppet'] ['costume']
[] []
['mechanism'] []
[] []
['puppet'] []
['goose', 'puppet'] []
['costume', 'puppet'] []
['puppet'] []
['puppet'] ['costume']
['puppet'] []
['costume'] []
['costume', 'puppet'

In [108]:
tag_count = found_tag_count = extra_tag_count = 0
for p in predictions_r:
    url = url_of_prediction(p)
    found_tags = [t[0] for t in tags_of_prediction(p) if t[1] > 0.8]
    given_tags = [t for t in hand_tags_by_url(url, random_set) if t in all_concepts]
    found_tag_count += sum(1 for g in given_tags if g in found_tags)
    extra_tag_count += sum(1 for g in found_tags if g not in given_tags)
    tag_count += len(given_tags)
tag_count, found_tag_count, extra_tag_count, found_tag_count / tag_count

(68, 6, 3, 0.08823529411764706)

In [105]:
tag_count = found_tag_count = extra_tag_count = 0
for p in predictions:
    url = url_of_prediction(p)
    found_tags = [t[0] for t in tags_of_prediction(p) if t[1] > 0.8]
    given_tags = [t for t in hand_tags_by_url(url, test_set) if t in all_concepts]
    found_tag_count += sum(1 for g in given_tags if g in found_tags)
    extra_tag_count += sum(1 for g in found_tags if g not in given_tags)
    tag_count += len(given_tags)
tag_count, found_tag_count, extra_tag_count, found_tag_count / tag_count

(57, 57, 0, 1.0)

In [109]:
train_image_info = []
for i in random_set:
    url = i['url']
    concs, not_concs = concepts_present(i['hand-tags'], all_concepts)
    train_image_info.append(ClImage(url=url, concepts=concs, not_concepts=not_concs))
train_image_info[:2]

[<clarifai.rest.client.Image at 0x7feace682518>,
 <clarifai.rest.client.Image at 0x7feace682550>]

In [110]:
app.inputs.bulk_create_images(train_image_info)

[<clarifai.rest.client.Image at 0x7feace677470>,
 <clarifai.rest.client.Image at 0x7feace6b0e80>,
 <clarifai.rest.client.Image at 0x7feace6b0320>,
 <clarifai.rest.client.Image at 0x7feace74a748>,
 <clarifai.rest.client.Image at 0x7feace74a7b8>,
 <clarifai.rest.client.Image at 0x7feace74aeb8>,
 <clarifai.rest.client.Image at 0x7feace74ae48>,
 <clarifai.rest.client.Image at 0x7feace74a198>,
 <clarifai.rest.client.Image at 0x7feace74a898>,
 <clarifai.rest.client.Image at 0x7feace74a518>,
 <clarifai.rest.client.Image at 0x7feace74a5f8>,
 <clarifai.rest.client.Image at 0x7feace616da0>,
 <clarifai.rest.client.Image at 0x7feace616c88>,
 <clarifai.rest.client.Image at 0x7feace616c50>,
 <clarifai.rest.client.Image at 0x7feace616518>,
 <clarifai.rest.client.Image at 0x7feace6164a8>,
 <clarifai.rest.client.Image at 0x7feace616fd0>,
 <clarifai.rest.client.Image at 0x7feace616358>,
 <clarifai.rest.client.Image at 0x7feace616f98>,
 <clarifai.rest.client.Image at 0x7feace616dd8>,
 <clarifai.rest.clie

In [112]:
model2 = app.models.create('festiveroad', concepts=all_concepts)
model2

ApiError: POST https://api.clarifai.com/v2/models FAILED(1510503863.806451). status_code: 402, reason: Payment Required, error_code: 11006, error_description: Account limits exceeded, error_details: Request exceeds "custom concept" limit.
 >> Python client 2.0.32 with Python 3.5.2 on linux
 >> POST https://api.clarifai.com/v2/models
 >> REQUEST(1510503863.806451) {
  "model": {
    "output_info": {
      "output_config": {
        "closed_environment": false,
        "concepts_mutually_exclusive": false
      },
      "data": {
        "concepts": [
          {
            "id": "costume"
          },
          {
            "id": "puppet"
          },
          {
            "id": "elephant"
          },
          {
            "id": "goose"
          },
          {
            "id": "hummingbirds"
          },
          {
            "id": "drummer"
          },
          {
            "id": "installation"
          },
          {
            "id": "fly"
          },
          {
            "id": "mechanism"
          },
          {
            "id": "triceratops"
          }
        ]
      }
    },
    "name": "festiveroad",
    "id": "festiveroad"
  }
}
 >> RESPONSE(1510503863.806451) {
  "status": {
    "description": "Account limits exceeded",
    "code": 11006,
    "req_id": "ad00af063b0949a89684f21ec62dfc7c",
    "details": "Request exceeds \"custom concept\" limit."
  }
}