<h1><span style="color:red">Generate Concepts from Images in Image Collection</span></h1>

### This sample notebook will read survey images and add concepts found in the images to a new version of  SuAVE survey

This notebook uses Clarifai API (clarifai.com). To process your images, please generate your own API key at the web site.

## 1. Retrieve survey parameters from the URL

In [None]:
%%javascript
function getQueryStringValue (key)
{  
    return unescape(window.location.search.replace(new RegExp("^(?:.*[&\\?]" + escape(key).replace(/[\.\+\*]/g, "\\$&") + "(?:\\=([^&]*))?)?.*$", "i"), "$1"));
}
IPython.notebook.kernel.execute("survey_url='".concat(getQueryStringValue("surveyurl")).concat("'"));
IPython.notebook.kernel.execute("views='".concat(getQueryStringValue("views")).concat("'"));
IPython.notebook.kernel.execute("view='".concat(getQueryStringValue("view")).concat("'"));
IPython.notebook.kernel.execute("user='".concat(getQueryStringValue("user")).concat("'"));
IPython.notebook.kernel.execute("csv_file='".concat(getQueryStringValue("csv")).concat("'")); 
IPython.notebook.kernel.execute("dzc_file='".concat(getQueryStringValue("dzc")).concat("'")); 
IPython.notebook.kernel.execute("params='".concat(getQueryStringValue("params")).concat("'")); 
IPython.notebook.kernel.execute("active_object='".concat(getQueryStringValue("activeobject")).concat("'")); 
IPython.notebook.kernel.execute("full_notebook_url='" + window.location + "'"); 

## 2. Read the survey file and find images

In [None]:
from __future__ import print_function
from clarifai.rest import ClarifaiApp
import pandas as pd
import sys
import csv
import glob, os
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import Markdown, display
def printmd(string):
    display(Markdown(string))

absolutePath = "../../temp_csvs/"

# read the SuAVE survey file
file = open(absolutePath + csv_file, encoding="latin-1")
df = pd.read_csv(file)
dflen = len(df.columns)

localdzc = dzc_file.replace("https://maxim.ucsd.edu/dzgen/lib-staging-uploads","/lib-nfs/dzgen")
full_images_location = localdzc.replace("/content.dzc","/full_images/")

<b><h3><span style="color:red">Important: Specify your Clarifai API Key below.<br>You can get it at clarifai.com</span></h3></b>


In [None]:
# api_key = "38328d08a7ed42c4a55581f21c7e988c" # sample

api_key= "b6097b44bdb542109c59c4522d2f6c60"

if api_key == '':
    print('Cannot proceed without an API Key')
else:
    os.environ["CLARIFAI_API_KEY"] = api_key
    printmd("<b><span style='color:red'>Your CLARIFAI API Key is: </span></b>" +os.environ["CLARIFAI_API_KEY"])

    app = ClarifaiApp()
    model = app.public_models.general_model


## 3. Extract the images

In [None]:
# One can point to a local directory with images or to a list of images at a URL

URL_or_local = 'local'
#URL_or_local = 'url'

set_of_files = glob.glob(full_images_location+"*.png")

printmd("<b><span style='color:red'>Count of items to process: </span></b>" + str(len(set_of_files)))


## 4. Run the classifier

In [None]:
all_data = []   # here, we accummulate the generated concepts for each image
counter = 0

for image in set_of_files:
    counter += 1
    if URL_or_local == 'url':
        response = model.predict_by_url(url=image)
    else:
        response = model.predict_by_filename(filename=image)        
    file_data = {}
    file_data['#img'] = os.path.basename(image)[:-4]
    concepts = response['outputs'][0]['data']['concepts']
    for i in range(len(concepts)):  
        file_data['concept_'+str(i+1)] = concepts[i]['name']
        file_data['value_'+str(i+1)] = concepts[i]['value']
    all_data.append(file_data)
    print(str(counter)," ::  Processed file: ", os.path.basename(image))
# debugging:
#     if counter == 3:
#         break
printmd("<b><span style='color:red'>" + str(counter) + " images processed</span></b>")


## 5. Add concepts to dataframe

In [None]:
# adding individual concept fields, as well as a single multiple-response column with all concepts, to a dataframe
newdf = pd.DataFrame(all_data).fillna('')
newdf = newdf[['#img', 
         'concept_1', 'value_1',
         'concept_2', 'value_2',
         'concept_3', 'value_3',
         'concept_4', 'value_4',
         'concept_5', 'value_5',
         'concept_6', 'value_6',
         'concept_7', 'value_7',
         'concept_8', 'value_8',
         'concept_9', 'value_9',
         'concept_10', 'value_10',
         'concept_11', 'value_11',
         'concept_12', 'value_12',
         'concept_13', 'value_13',
         'concept_14', 'value_14',
         'concept_15', 'value_15',
         'concept_16', 'value_16',
         'concept_17', 'value_17',
         'concept_18', 'value_18',
         'concept_19', 'value_19',
         'concept_20', 'value_20']]
multi =''
for i in range(20):
    if i == 19:
        multi += newdf['concept_'+str(i+1)]
    else:
        multi += newdf['concept_'+str(i+1)] +'|'

newdf['tags#multi'] = multi
df = pd.merge(df, newdf, on='#img', how='outer')
printmd("<b><span style='color:red'>Created new dataframe </span></b>")
df.head()

## 6. Save the new version of CSV file, and give a name to new survey

In [None]:
new_file = absolutePath + csv_file[:-4]+'_v1.csv'
printmd("<b><span style='color:red'>A new temporary file will be created at: </span></b>")
print(new_file)
df.to_csv(new_file, index=None)

In [None]:
#Input survey name

from IPython.display import display
input_text = widgets.Text()
output_text = widgets.Text()

def bind_input_to_output(sender):
    output_text.value = input_text.value

# Tell the text input widget to call bind_input_to_output() on submit
input_text.on_submit(bind_input_to_output)

printmd("<b><span style='color:red'>Input survey name here, press Enter, and then run the next cell:</span></b>")
# Display input text box widget for input
display(input_text)

display(output_text)

In [None]:
#Print survey name
survey_name = output_text.value
printmd("<b><span style='color:red'>Survey Name is: </span></b>" + survey_name)

## 7. Generate the survey and create survey URL

In [None]:
#Parse url
referer = survey_url.split("/main")[0] +"/"
upload_url = referer + "uploadCSV"
new_survey_url_base = survey_url.split(user)[0]

import requests
import re
csv = {"file": open(new_file, "rb")}
upload_data = {
    'name': input_text.value,
    'dzc': dzc_file,
    'user':user
}
headers = {
    'User-Agent': 'suave user agent',
    'referer': referer
}

r = requests.post(upload_url, files=csv, data=upload_data, headers=headers)

if r.status_code == 200:
    printmd("<b><span style='color:red'>New survey created successfully</span></b>")
    regex = re.compile('[^0-9a-zA-Z_]')
    s_url = survey_name
    s_url =  regex.sub('_', s_url)
    url = new_survey_url_base + user + "_" + s_url + ".csv" + "&views=" + views + "&view=" + view
    print(url)
    printmd("<b><span style='color:red'>Click the URL to open the new survey</span></b>")
else:
    printmd("<b><span style='color:red'>Error creating new survey. Check if a survey with this name already exists.</span></b>")
    printmd("<b><span style='color:red'>Reason: </span></b>"+ str(r.status_code) + " " + r.reason)


    
# new_survey_url_base = survey_url.split(user)[0]