<h1><span style="color:red">Generate Concepts from Images in Image Collection</span></h1>

### This sample notebook will read survey images and add concepts found in the images to a new version of  SuAVE survey

This notebook uses Clarifai API (clarifai.com). To process your images, please generate your own API key at the web site.

## 1. Retrieve survey parameters from the URL

In [1]:
%%javascript
function getQueryStringValue (key)
{  
    return unescape(window.location.search.replace(new RegExp("^(?:.*[&\\?]" + escape(key).replace(/[\.\+\*]/g, "\\$&") + "(?:\\=([^&]*))?)?.*$", "i"), "$1"));
}
IPython.notebook.kernel.execute("survey_url='".concat(getQueryStringValue("surveyurl")).concat("'"));
IPython.notebook.kernel.execute("views='".concat(getQueryStringValue("views")).concat("'"));
IPython.notebook.kernel.execute("view='".concat(getQueryStringValue("view")).concat("'"));
IPython.notebook.kernel.execute("user='".concat(getQueryStringValue("user")).concat("'"));
IPython.notebook.kernel.execute("csv_file='".concat(getQueryStringValue("csv")).concat("'")); 
IPython.notebook.kernel.execute("dzc_file='".concat(getQueryStringValue("dzc")).concat("'")); 
IPython.notebook.kernel.execute("params='".concat(getQueryStringValue("params")).concat("'")); 
IPython.notebook.kernel.execute("active_object='".concat(getQueryStringValue("activeobject")).concat("'")); 
IPython.notebook.kernel.execute("full_notebook_url='" + window.location + "'"); 

<IPython.core.display.Javascript object>

## 2. Import libraries

In [6]:
# !pip install panel
# !pip install clarifai_grpc
# !pip install clarifai

Collecting clarifai
  Using cached clarifai-9.8.1-py3-none-any.whl (2.5 MB)
Collecting tqdm==4.64.1
  Using cached tqdm-4.64.1-py2.py3-none-any.whl (78 kB)
Collecting tritonclient==2.34.0
  Using cached tritonclient-2.34.0-py3-none-manylinux1_x86_64.whl (12.3 MB)
Collecting rich==13.4.2
  Using cached rich-13.4.2-py3-none-any.whl (239 kB)
Installing collected packages: tritonclient, tqdm, rich, clarifai
  Attempting uninstall: tritonclient
    Found existing installation: tritonclient 2.33.0
    Uninstalling tritonclient-2.33.0:
      Successfully uninstalled tritonclient-2.33.0
  Attempting uninstall: tqdm
    Found existing installation: tqdm 4.65.0
    Uninstalling tqdm-4.65.0:
      Successfully uninstalled tqdm-4.65.0
Successfully installed clarifai-9.8.1 rich-13.4.2 tqdm-4.64.1 tritonclient-2.34.0


In [7]:
# common imports
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import Markdown, display

import pandas as pd
pd.set_option('display.max_colwidth', 0)
    
import numpy as np
import panel as pn
from bs4 import BeautifulSoup
from urllib.parse import urljoin

pn.extension()
def printmd(string):
    display(Markdown(string))

absolutePath = "/home/jovyan/jupyter-suave/temp_csvs/"

# local imports
import sys
sys.path.insert(1, '../../helpers')
import panel_libs as panellibs
import suave_integration as suaveint

# specific imports
import requests
import re
import glob, os
import csv

from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
import clarifai
from clarifai_grpc.grpc.api import service_pb2_grpc

stub = service_pb2_grpc.V2Stub(ClarifaiChannel.get_grpc_channel())

## 3. Read the survey file and navigate to full-size images

In [8]:
df = panellibs.extract_data(absolutePath + csv_file)
dflen = len(df.columns)

localdzc = dzc_file.replace("https://maxim.ucsd.edu/dzgen/lib-staging-uploads","/lib-nfs/dzgen")
full_images_location = localdzc.replace("/content.dzc","/full_images/")


## 4. Specify Clarifai API key
<b><h3><span style="color:red">Important: </span>Specify your Clarifai API Key, App ID, Personal Access Token, and User ID, below.<br>You can get them at clarifai.com</h3></b>


In [11]:
Path.home() / 'creds.yml'

PosixPath('/home/jovyan/creds.yml')

In [28]:
# Retrieve the API key

# import credentials file
import yaml
from pathlib import Path

#try:
#    with open(Path.home() / 'creds.yml', 'r') as ymlfile:
#        cfg = yaml.safe_load(ymlfile)
#        api_key=cfg['api_creds']['cfy1']
#        app_ID=cfg['api_creds']['cfy_appid']
#        pat_ID=cfg['api_creds']['cfy_pat']
#        user_ID=cfg['api_creds']['cfy_uid']


#except IOError:
#    printmd("<b><span style='color:red;font-size:150%'>Credentials are unavailable or incorrect. CANNOT CONTINUE.</span></b>")
#    printmd("<b><span style='color:red;font-size:150%'>Please register and obtain the credentials at clarifai.com.</span></b>")
api_key = '4308fe27bbf3418b92953a9f657e80b6'
app_ID = 'classify-images2'
pat_ID = 'd72518b8dbb54647b5061290538e6dce'
user_ID = 'jkaminsky2'
if api_key == '' :
    print('Cannot proceed without an API Key')
else:
    os.environ["CLARIFAI_API_KEY"] = api_key
    os.environ["CLARIFAI_APP_ID"] = app_ID
#     printmd("<b><span style='color:red'>Your CLARIFAI API Key is: </span></b>" +os.environ["CLARIFAI_API_KEY"])
#     printmd("<b><span style='color:red'>Your CLARIFAI APP ID is: </span></b>" +os.environ["CLARIFAI_APP_ID"])



In [29]:
from clarifai_grpc.grpc.api import service_pb2, resources_pb2
from clarifai_grpc.grpc.api.status import status_code_pb2

metadata = (("authorization", f"Key {pat_ID}"),)


## 5. Extract the images, from SuAVE (local to the system) or at URL

In [30]:
# One can point to a local directory with images or to a list of images at a URL
URL_or_local = 'url'  # the case of SuAVE. This only affects the format of a CLARIFAI request 

response = requests.get(full_images_location)
soup = BeautifulSoup(response.text, 'html.parser')

image_links = soup.find_all('a', href=True)

set_of_files = []

for link in image_links:
    image_url = link['href']
    
    if not image_url.startswith(('http:', 'https:')):
        full_image_url = urljoin(full_images_location, image_url)
    else:
        full_image_url = image_url
    
    set_of_files.append(full_image_url)

printmd("<b><span style='color:red'>Count of items to process: </span></b>" + str(len(set_of_files)))


<b><span style='color:red'>Count of items to process: </span></b>2

## 6. Run the classifier

In [31]:
all_data = []   # here, we accummulate the generated concepts for each image
counter = 0

for im in set_of_files:
    counter += 1
    if URL_or_local == 'url':
        
        request = service_pb2.PostModelOutputsRequest(
        # This is the model ID of a publicly available General model. You may use any other public or custom model ID.
            model_id="general-image-recognition",
            user_app_id=resources_pb2.UserAppIDSet(user_id=user_ID, app_id=app_ID),
            inputs=[
                resources_pb2.Input(
                    data=resources_pb2.Data(image=resources_pb2.Image(url=im))
                    )
            ],
        )
        response = stub.PostModelOutputs(request, metadata=metadata)
        
        
    else:
        with open(im, "rb") as f:
            file_bytes = f.read()
        request = service_pb2.PostModelOutputsRequest(
        # This is the model ID of a publicly available General model. You may use any other public or custom model ID.
            model_id="general-image-recognition",
            user_app_id=resources_pb2.UserAppIDSet(user_id=user_ID,app_id=app_ID),
            inputs=[
                resources_pb2.Input(
                    data=resources_pb2.Data(image=resources_pb2.Image(base64=file_bytes))
                    )
            ],
        )
        response = stub.PostModelOutputs(request, metadata=metadata)
        
    if response.status.code != status_code_pb2.SUCCESS:
        print(response)
        raise Exception(f"Request failed, status code: {response.status}")


    file_data = {}
    file_data['#img'] = os.path.basename(im)[:-4]
    
    cons = response.outputs[0].data.concepts
    
    for i in range(len(cons)):  
        file_data['concept_'+str(i+1)] = cons[i].name
        file_data['value_'+str(i+1)] = cons[i].value
    all_data.append(file_data)
    print(str(counter)," ::  Processed file: ", os.path.basename(im))
# debugging:
    if counter == 3:
        break
printmd("<b><span style='color:red'>" + str(counter) + " images processed</span></b>")


1  ::  Processed file:  image_not_available.png
2  ::  Processed file:  US.png


<b><span style='color:red'>2 images processed</span></b>

## 7. Add concepts to dataframe

In [32]:
# adding individual concept fields, as well as a single multiple-response column with all concepts, to a dataframe
newdf = pd.DataFrame(all_data).fillna('')
newdf = newdf[['#img', 
         'concept_1', 'value_1',
         'concept_2', 'value_2',
         'concept_3', 'value_3',
         'concept_4', 'value_4',
         'concept_5', 'value_5',
         'concept_6', 'value_6',
         'concept_7', 'value_7',
         'concept_8', 'value_8',
         'concept_9', 'value_9',
         'concept_10', 'value_10',
         'concept_11', 'value_11',
         'concept_12', 'value_12',
         'concept_13', 'value_13',
         'concept_14', 'value_14',
         'concept_15', 'value_15',
         'concept_16', 'value_16',
         'concept_17', 'value_17',
         'concept_18', 'value_18',
         'concept_19', 'value_19',
         'concept_20', 'value_20']]
multi =''
for i in range(20):
    if i == 19:
        multi += newdf['concept_'+str(i+1)]
    else:
        multi += newdf['concept_'+str(i+1)] +'|'

newdf['tags#multi'] = multi
df_merged = pd.merge(df, newdf, on='#img', how='outer')
printmd("<b><span style='color:red'>Created new dataframe </span></b>")


<b><span style='color:red'>Created new dataframe </span></b>

In [33]:
# view the dataframe
with pd.option_context("display.max_columns", None):
    if any("geometry" in col for col in df_merged.columns):
        display(df_merged.drop(['geometry'],axis=1))
    else:
        display(df_merged)
    
 

Unnamed: 0,Name,OAID#link#multi,Affiliation#sortquan,City#sortquan,Region#sortquan,Country#sortquan,Latitude#hidden,Longitude#hidden,Collaborators#multi#link#sortquan,Scope#multi#sortquan,Keywords#multi#sortquan,OA concepts#multi#sortquan,Publications#hidden,Publication Dates#multi#sortquan,#img,#netvis,concept_1,value_1,concept_2,value_2,concept_3,value_3,concept_4,value_4,concept_5,value_5,concept_6,value_6,concept_7,value_7,concept_8,value_8,concept_9,value_9,concept_10,value_10,concept_11,value_11,concept_12,value_12,concept_13,value_13,concept_14,value_14,concept_15,value_15,concept_16,value_16,concept_17,value_17,concept_18,value_18,concept_19,value_19,concept_20,value_20,tags#multi
0,A Olioso,https://openalex.org/A4227955457,Unknown,,,,,,https://openalex.org/A4227955454|https://openalex.org/A4227955461|https://openalex.org/A4227955455|https://openalex.org/A4227955463|https://openalex.org/A4227955453|https://openalex.org/A4227955464|https://openalex.org/A4227955456|https://openalex.org/A4227955462|https://openalex.org/A4227955460|https://openalex.org/A4227955459|https://openalex.org/A4227955452|https://openalex.org/A4227955458,aquifer|transboundary,,Groundwater|Geology|Geotechnical engineering|Hydrology (agriculture)|Aquifer|Environmental science|Computer science|Water resource management,"<a href='#' onClick='javascript:getPublication({oaids:""https://openalex.org/A4227955457"",search:""Keywords,Scope"",OAConcepts:""OA concepts""})'>Show publications</a>",2021,US,02ac504b6e11517e2110d174ea70a1a7ac1cf19899e1a0f23c29558f6225db03,illustration,0.994655,symbol,0.987689,label,0.985368,image,0.983537,vector,0.972716,sign,0.966199,flag,0.959584,business,0.954416,round out,0.936653,graphic,0.933747,stamp,0.928213,banner,0.922278,design,0.921663,print,0.919978,round,0.913166,ink,0.912747,disjunct,0.907437,seal,0.906509,desktop,0.881336,thread,0.867676,illustration|symbol|label|image|vector|sign|flag|business|round out|graphic|stamp|banner|design|print|round|ink|disjunct|seal|desktop|thread
1,A Olioso,https://openalex.org/A4226682424,Unknown,,,,,,https://openalex.org/A4226682420|https://openalex.org/A4226682425|https://openalex.org/A4226682421|https://openalex.org/A4226682429|https://openalex.org/A4226682431|https://openalex.org/A4226682426|https://openalex.org/A4226682422|https://openalex.org/A4226682428|https://openalex.org/A4226682427|https://openalex.org/A4226682419|https://openalex.org/A4226682423|https://openalex.org/A4226682430,aquifer|transboundary,,Groundwater|Geology|Geotechnical engineering|Hydrology (agriculture)|Aquifer|Environmental science|Water resource management,"<a href='#' onClick='javascript:getPublication({oaids:""https://openalex.org/A4226682424"",search:""Keywords,Scope"",OAConcepts:""OA concepts""})'>Show publications</a>",2021,US,8f95a1d08aacc416f1abe22426fe9c9fd2f8f338bb7365407f284e3985165d23,illustration,0.994655,symbol,0.987689,label,0.985368,image,0.983537,vector,0.972716,sign,0.966199,flag,0.959584,business,0.954416,round out,0.936653,graphic,0.933747,stamp,0.928213,banner,0.922278,design,0.921663,print,0.919978,round,0.913166,ink,0.912747,disjunct,0.907437,seal,0.906509,desktop,0.881336,thread,0.867676,illustration|symbol|label|image|vector|sign|flag|business|round out|graphic|stamp|banner|design|print|round|ink|disjunct|seal|desktop|thread
2,A. Alassane,https://openalex.org/A2484425674,Cheikh Anta Diop University,Dakar,,Senegal,14.686944,-17.463333,https://openalex.org/A2434763705|https://openalex.org/A3069707669|https://openalex.org/A2182351332|https://openalex.org/A3051995119,aquifer|transboundary,,Sociology|Population|Water supply|Demography|Groundwater|Water quality|Ecology|Geology|Geotechnical engineering|Groundwater recharge|Hydrology (agriculture)|Environmental engineering|Aquifer|Biology|Environmental science|Water resource management,"<a href='#' onClick='javascript:getPublication({oaids:""https://openalex.org/A2484425674"",search:""Keywords,Scope"",OAConcepts:""OA concepts""})'>Show publications</a>",2010,US,f55f8f5c25002f0f2a2e121be602248623f07494d5161a13d399ab12aa746bac,illustration,0.994655,symbol,0.987689,label,0.985368,image,0.983537,vector,0.972716,sign,0.966199,flag,0.959584,business,0.954416,round out,0.936653,graphic,0.933747,stamp,0.928213,banner,0.922278,design,0.921663,print,0.919978,round,0.913166,ink,0.912747,disjunct,0.907437,seal,0.906509,desktop,0.881336,thread,0.867676,illustration|symbol|label|image|vector|sign|flag|business|round out|graphic|stamp|banner|design|print|round|ink|disjunct|seal|desktop|thread
3,A. Aureli,https://openalex.org/A2422334401,Unknown,,,,,,https://openalex.org/A2304341794|https://openalex.org/A2182540860,aquifer|transboundary,,Karst|Biology|Tourism|Business|Environmental planning|Archaeology|Environmental science|Groundwater|Geotechnical engineering|Water resources|Water resource management|Environmental resource management|Environmental protection|Law|Ecology|Engineering|Multidisciplinary approach|Aquifer|Geography|Political science,"<a href='#' onClick='javascript:getPublication({oaids:""https://openalex.org/A2422334401"",search:""Keywords,Scope"",OAConcepts:""OA concepts""})'>Show publications</a>",2010,US,fee72a7c6e6595abd9a1fe8878cb3c9be76652d50b3986c6b1cb4ac610869e76,illustration,0.994655,symbol,0.987689,label,0.985368,image,0.983537,vector,0.972716,sign,0.966199,flag,0.959584,business,0.954416,round out,0.936653,graphic,0.933747,stamp,0.928213,banner,0.922278,design,0.921663,print,0.919978,round,0.913166,ink,0.912747,disjunct,0.907437,seal,0.906509,desktop,0.881336,thread,0.867676,illustration|symbol|label|image|vector|sign|flag|business|round out|graphic|stamp|banner|design|print|round|ink|disjunct|seal|desktop|thread
4,A. Aureli,https://openalex.org/A3086349667,Unknown,,,,,,https://openalex.org/A3085518772|https://openalex.org/A3085940897|https://openalex.org/A3086175637|https://openalex.org/A3086707070|https://openalex.org/A3216340081|https://openalex.org/A3084770820|https://openalex.org/A3085504345,aquifer|transboundary,,Environmental resource management|Groundwater|Environmental planning|Geology|Geotechnical engineering|Hydrology (agriculture)|Aquifer|Environmental science|Water resource management,"<a href='#' onClick='javascript:getPublication({oaids:""https://openalex.org/A3086349667"",search:""Keywords,Scope"",OAConcepts:""OA concepts""})'>Show publications</a>",2015,US,bd15eb707485fff043de670a9d011e35d88324f1f4113701f894b92ce4c64d88,illustration,0.994655,symbol,0.987689,label,0.985368,image,0.983537,vector,0.972716,sign,0.966199,flag,0.959584,business,0.954416,round out,0.936653,graphic,0.933747,stamp,0.928213,banner,0.922278,design,0.921663,print,0.919978,round,0.913166,ink,0.912747,disjunct,0.907437,seal,0.906509,desktop,0.881336,thread,0.867676,illustration|symbol|label|image|vector|sign|flag|business|round out|graphic|stamp|banner|design|print|round|ink|disjunct|seal|desktop|thread
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1373,Å½. PekaÅ¡,https://openalex.org/A2491145178,Unknown,,,,,,https://openalex.org/A2478352227|https://openalex.org/A2641079011|https://openalex.org/A3114708001|https://openalex.org/A2061648226,aquifer|transboundary,,Karst|Business|Archaeology|Environmental planning|Process management|Geography,"<a href='#' onClick='javascript:getPublication({oaids:""https://openalex.org/A2491145178"",search:""Keywords,Scope"",OAConcepts:""OA concepts""})'>Show publications</a>",2016,US,46e5ed3ab9553bfac4c049dec30c1d226638344208cecd69b71825f0a43c111b,illustration,0.994655,symbol,0.987689,label,0.985368,image,0.983537,vector,0.972716,sign,0.966199,flag,0.959584,business,0.954416,round out,0.936653,graphic,0.933747,stamp,0.928213,banner,0.922278,design,0.921663,print,0.919978,round,0.913166,ink,0.912747,disjunct,0.907437,seal,0.906509,desktop,0.881336,thread,0.867676,illustration|symbol|label|image|vector|sign|flag|business|round out|graphic|stamp|banner|design|print|round|ink|disjunct|seal|desktop|thread
1374,Å½elimir PekaÅ¡,https://openalex.org/A4267790636,Unknown,,,,,,https://openalex.org/A4267790634|https://openalex.org/A4267790637|https://openalex.org/A4267790638|https://openalex.org/A4267790639|https://openalex.org/A4267790635,aquifer|transboundary,,Paleontology|Ideal (ethics)|Civil engineering|Karst|Groundwater|Law|Water resource management|Geology|Engineering|Geotechnical engineering|Hydrology (agriculture)|Aquifer|Environmental science|Geography|Political science,"<a href='#' onClick='javascript:getPublication({oaids:""https://openalex.org/A4267790636"",search:""Keywords,Scope"",OAConcepts:""OA concepts""})'>Show publications</a>",2016,US,ae12a61c22472adc53867b99f9e307e4e45871a8cc14c841b752512b2835f1a9,illustration,0.994655,symbol,0.987689,label,0.985368,image,0.983537,vector,0.972716,sign,0.966199,flag,0.959584,business,0.954416,round out,0.936653,graphic,0.933747,stamp,0.928213,banner,0.922278,design,0.921663,print,0.919978,round,0.913166,ink,0.912747,disjunct,0.907437,seal,0.906509,desktop,0.881336,thread,0.867676,illustration|symbol|label|image|vector|sign|flag|business|round out|graphic|stamp|banner|design|print|round|ink|disjunct|seal|desktop|thread
1375,Å½eljko KramariÄ,https://openalex.org/A2591057633,Unknown,,,,,,https://openalex.org/A3200145164|https://openalex.org/A1183704316,aquifer|transboundary,,Virology|Karst|Groundwater|Business|Environmental planning|Archaeology|Geology|Geotechnical engineering|Replication (statistics)|Hydrology (agriculture)|Aquifer|Biology|Environmental science|Geography|Water resource management,"<a href='#' onClick='javascript:getPublication({oaids:""https://openalex.org/A2591057633"",search:""Keywords,Scope"",OAConcepts:""OA concepts""})'>Show publications</a>",2012,US,fe94c788e0bd99c18f77c0f32e6e3976c77e2c0b1e420fead4293dda6417de75,illustration,0.994655,symbol,0.987689,label,0.985368,image,0.983537,vector,0.972716,sign,0.966199,flag,0.959584,business,0.954416,round out,0.936653,graphic,0.933747,stamp,0.928213,banner,0.922278,design,0.921663,print,0.919978,round,0.913166,ink,0.912747,disjunct,0.907437,seal,0.906509,desktop,0.881336,thread,0.867676,illustration|symbol|label|image|vector|sign|flag|business|round out|graphic|stamp|banner|design|print|round|ink|disjunct|seal|desktop|thread
1376,ÎÎ¼Î±Î½Î¿ÏÎ­Î»Î± ÎÎ¯ÏÎ¹,https://openalex.org/A4200868924,Unknown,,,,,,,aquifer|transboundary,,Marketing|Geography|Groundwater|Business|Machine learning|Geology|Geotechnical engineering|Watershed|SWOT analysis|Hydrology (agriculture)|Aquifer|Computer science,"<a href='#' onClick='javascript:getPublication({oaids:""https://openalex.org/A4200868924"",search:""Keywords,Scope"",OAConcepts:""OA concepts""})'>Show publications</a>",2021,US,,illustration,0.994655,symbol,0.987689,label,0.985368,image,0.983537,vector,0.972716,sign,0.966199,flag,0.959584,business,0.954416,round out,0.936653,graphic,0.933747,stamp,0.928213,banner,0.922278,design,0.921663,print,0.919978,round,0.913166,ink,0.912747,disjunct,0.907437,seal,0.906509,desktop,0.881336,thread,0.867676,illustration|symbol|label|image|vector|sign|flag|business|round out|graphic|stamp|banner|design|print|round|ink|disjunct|seal|desktop|thread


## 8. Save the new version of CSV file, and give a name to new survey

In [None]:
new_file = suaveint.save_csv_file(df_merged, absolutePath, csv_file)

In [None]:
#Input survey name

from IPython.display import display
input_text = widgets.Text(placeholder='Enter Survey Name...')
output_text = widgets.Text()

def bind_input_to_output(sender):
    output_text.value = input_text.value

# Tell the text input widget to call bind_input_to_output() on submit
input_text.on_submit(bind_input_to_output)

printmd("<b><span style='color:red'>Input survey name here, press Enter, and then run the next cell:</span></b>")
# Display input text box widget for input
display(input_text)

display(output_text)

In [None]:
#Print survey name
survey_name = output_text.value
printmd("<b><span style='color:red'>Survey Name is: </span></b>" + survey_name)

## 9. Generate the survey and create survey URL

In [None]:
suaveint.create_survey(survey_url,new_file, survey_name, dzc_file, user, csv_file, view, views)