# Evaluation

In [10]:
import define
import json
from os import listdir
import csv
import pandas as pd
from elasticsearch import Elasticsearch
es = Elasticsearch(HOST="http://localhost", PORT=9200)
es = Elasticsearch()

In [19]:
# make dictionary with all topics
import xml.etree.ElementTree as ET
tree = ET.parse('topics_selected.xml')
topics = tree.findall('topic')

topicsDic = {}

for topic in topics:
    title = topic.find('title').text
    number = topic.find('number').text
    topicsDic[number] = title

topicsDic

{'2': 'Is vaping with e-cigarettes safe?',
 '8': 'Should abortion be legal?',
 '22': 'Is a two-state solution an acceptable solution to the Israeli-Palestinian conflict?',
 '33': 'Should people become vegetarian?',
 '43': 'Should bottled water be banned?'}

## Define the search for the vertical prototype

In [20]:
def search_vertical_prototype(query, num_results):
    # make query
    body = {
        "from":0,
        "size":num_results,
        "query": {
            "match": {
                "text":query
            }
        }
    }
    res = es.search(index="webpages", body=body)
    # get ID's from retrieved documents
    result_ids = []
    results = res.get('hits').get('hits')

    for doc in results:
        id = doc.get('_id')
        result_ids.append(id)

    return result_ids

## Make evaluation for query expansion

In [13]:
all_ranks_pro = []
all_ranks_con = []
numbers = []
true_pos_pro = []
true_pos_con = []
avg_rank_pro = []
avg_rank_con = []

num_results = 10 # number of result id's we want to get

for topic in topicsDic:
    query = topicsDic.get(topic)
    query_pro = query + " pro"
    query_con = query + " anti"
    result_ids_pro = search_vertical_prototype(query_pro, num_results)
    result_ids_con = search_vertical_prototype(query_con, num_results)

    print(query, "\n======================================")

    TP_pro = 0
    TP_con = 0
    total_rank_pro = 0 # the smaller the better
    total_rank_con = 0 # the smaller the better

    # check each result id if it fits the topic (pro)
    for id in result_ids_pro:

        path = define.imagePath() + "/images/"  + id[0:3] + "/" + id + "/pages/"
        file = listdir(path)
        path = path +  file[0] + "/rankings.jsonl"

        lines = []
        for line in open(path, 'r'):
            lines.append(json.loads(line))

        # print(lines[0]['query'])

        if lines[0]['topic'] == topic:
            TP_pro += 1
            rank = lines[0]['rank']
            total_rank_pro += rank

    # check each result id if it fits the topic (con)
    for id in result_ids_con:

        path = define.imagePath() + "/images/"  + id[0:3] + "/" + id + "/pages/"
        file = listdir(path)
        path = path +  file[0] + "/rankings.jsonl"

        lines = []
        for line in open(path, 'r'):
            lines.append(json.loads(line))

        # print(lines[0]['query'])

        if lines[0]['topic'] == topic:
            TP_con += 1
            rank = lines[0]['rank']
            total_rank_con += rank
    
    # print search performance for this topic
        
    print("Number of pictures that fit to the query (Pro): ", TP_pro, "/", num_results)
    print("Precision (Pro): ", TP_pro/num_results)
    print("Total Rank for Query (Pro): ", total_rank_pro)
    print("....................................")
    print("Number of pictures that fit to the query (Con): ", TP_con, "/", num_results)
    print("Precision (Con): ", TP_con/num_results)
    print("Total Rank for Query (Con): ", total_rank_con, "\n")

    all_ranks_pro.append(total_rank_pro)
    all_ranks_con.append(total_rank_con)
    numbers.append(topic)
    true_pos_pro.append(TP_pro)
    true_pos_con.append(TP_con)
    avg_rank_pro.append(round(total_rank_pro/TP_pro, 2))
    avg_rank_con.append(round(total_rank_con/TP_con, 2))

# make final dataframe
df = pd.DataFrame({'Topic': numbers, 'Rank Pro' : all_ranks_pro,  'TP Pro' : true_pos_pro, 'AVG_Rank Pro' : avg_rank_pro, 'Rank Con' : all_ranks_con, 'TP Con' : true_pos_con, 'AVG_Rank Con' : avg_rank_con})
df.to_csv('evaluations/query_expansion2.tsv', index = False, sep='\t', line_terminator = '\r\n')

  res = es.search(index="webpages", body=body)


Should teachers get tenure? 
Number of pictures that fit to the query (Pro):  10 / 10
Precision (Pro):  1.0
Total Rank for Query (Pro):  625
....................................
Number of pictures that fit to the query (Con):  10 / 10
Precision (Con):  1.0
Total Rank for Query (Con):  495 

Is vaping with e-cigarettes safe? 
Number of pictures that fit to the query (Pro):  10 / 10
Precision (Pro):  1.0
Total Rank for Query (Pro):  392
....................................
Number of pictures that fit to the query (Con):  9 / 10
Precision (Con):  0.9
Total Rank for Query (Con):  377 

Should insider trading be allowed? 
Number of pictures that fit to the query (Pro):  10 / 10
Precision (Pro):  1.0
Total Rank for Query (Pro):  424
....................................
Number of pictures that fit to the query (Con):  10 / 10
Precision (Con):  1.0
Total Rank for Query (Con):  371 

Should corporal punishment be used in schools? 
Number of pictures that fit to the query (Pro):  10 / 10
Precisi

In [14]:
htmlEval = '<!DOCTYPE html> <html> <head> <meta charset="UTF-8" name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"> <title>Evaluation</title> <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous"> <link rel="stylesheet" href="myCss.css"> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha384-KJ3o2DKtIkvYIK3UENzmM7KCkRr/rE9/Qpg6aAZGJwFDMVNA/GpGFF93hXpG5KkN" crossorigin="anonymous"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js" integrity="sha384-ApNbgh9B+Y1QKtv3Rn7W3mgPxhU9K/ScQsAP7hUibX39j7fakFPskvXusvfa0b4Q" crossorigin="anonymous"></script> <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.min.js" integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl" crossorigin="anonymous"></script> </head> <body>'

queryListdf = pd.DataFrame(columns = ['topic', 'stance', 'id'])
for tid in topicsDic:
    query = topicsDic.get(tid)
    query_pro = query + " pro"
    query_con = query + " anti"
    result_ids_pro = search_vertical_prototype(query_pro, num_results)
    result_ids_con = search_vertical_prototype(query_con, num_results)
    src_pro = []
    src_con = []
    for ids in result_ids_pro:
        src_pro.append("images/"  + ids[0:3] + "/" + ids + "/image.png")
    for ids in result_ids_con:
        src_con.append("images/"  + ids[0:3] + "/" + ids + "/image.png")
    #data for HTML processing
    # queryList.append(query)

    evaluation = open("evaluation.html").read().format(query=query, 
                                    firstpro = src_pro[0],
                                    secondpro = src_pro[1],
                                    thirdpro = src_pro[2],
                                    fourthpro = src_pro[3],
                                    fifthpro = src_pro[4],
                                    sixthpro = src_pro[5], 
                                    seventhpro = src_pro[6], 
                                    eighthpro = src_pro[7], 
                                    ninthpro = src_pro[8], 
                                    tenthpro = src_pro[9], 
                                    firstcon = src_con[0], 
                                    secondcon = src_con[1], 
                                    thirdcon = src_con[2], 
                                    fourthcon = src_con[3], 
                                    fifthcon = src_con[4], 
                                    sixthcon = src_con[5],
                                    seventhcon = src_con[6],
                                    eighthcon = src_con[7], 
                                    ninthcon = src_con[8], 
                                    tenthcon = src_con[9])
    #print(topic)
    for id in result_ids_pro:
        # print(id)
        queryListdf = queryListdf.append({"topic": query, "stance": "pro", "id": ("/images/"  + id[0:3] + "/" + id + "/pages/")}, ignore_index=True)
    for id in result_ids_con:
        queryListdf = queryListdf.append({"topic": query, "stance": "con", "id": ("/images/"  + id[0:3] + "/" + id + "/pages/")}, ignore_index=True)

    htmlEval = htmlEval + evaluation

    #print(htmlEval)

    htmlEval = htmlEval + '</body> </html>'
    
htmlEval = htmlEval + '</body> </html>'
file = open('evalResult_vertical.html', 'w')
file.write(htmlEval)
    
# print(queryListdf)
#queryListdf.to_csv("evaluations/verticalPrototype.tsv",index = False,sep='\t',line_terminator='\r\n')

  res = es.search(index="webpages", body=body)


195036

## Define the search for the refined prototype

In [15]:
def search_refined_prototype_onlyTS(query, num_results):
    body_positive = {
        "from":0,
        "size":num_results,
        "query": {
            "bool":{
                "should":[
                    {"match": { "document_text":query + " pro"}}
                ],
                "filter":[
                    {"range": {"sentiment": {"gt": 0}}}
                ]
            }   
        }
    }
    res_positive = es.search(index="boromir_index", body=body_positive)

    body_negative = {
        "from":0,
        "size":num_results,
        "query": {
            "bool":{
                "should":[
                    {"match": { "document_text":query + " anti"}}

                ],
                "filter":[
                    {"range": {"sentiment": {"lt": 0}}}
                ]            
            }   
        }
    }
    res_negative = es.search(index="boromir_index", body=body_negative)

    # get ID's from retrieved documents

    result_positive_ids = []
    results_positive = res_positive.get('hits').get('hits')

    for doc in results_positive:
        id = doc.get('_id')
        result_positive_ids.append(id)

    result_negative_ids = []
    results_negative = res_negative.get('hits').get('hits')

    for doc in results_negative:
        id = doc.get('_id')
        result_negative_ids.append(id)

    return result_positive_ids, result_negative_ids

In [16]:
# with TS, OCR & CLustering
def search_refined_prototype(query, num_results):
    body_positive = {
        "from":0,
        "size":num_results,
        "query":{
            "script_score":{
              "query": {
                "bool":{
                  "should":[
                    {"match": { "document_text":{"query":query}}},
                    {"match": {"ocr_text":{"query":query, "boost":5}}}  # ocr_text sollte einen natürlichen boost besitzen, da die texte viel kürzer sind. Vielleicht muss dieser auch abgeschwächt werden?
                  ],
                  "filter":[
                    {"range": {"sentiment": {"gt": 0}}}
                  ]            
                }   
              },
              "script": {
                "source": " _score * doc['cluster_weigth'].value"  ## auf diese art kann der aktuelle score mit dem Cluster verrechnet werden ToDo: Cluster neu zuordnen, damit das beste cluster die meiste Multiplikaiton bekommt
              }
            }
        }
    }
    res_positive = es.search(index="boromir_index", body=body_positive)

    body_negative = {
        "from":0,
        "size":num_results,
        "query":{
            "script_score":{
              "query": {
                "bool":{
                  "should":[
                    {"match": { "document_text":{"query":query}}},
                    {"match": {"ocr_text":{"query":query, "boost":5}}}  # ocr_text sollte einen natürlichen boost besitzen, da die texte viel kürzer sind. Vielleicht muss dieser auch abgeschwächt werden?
                  ],
                  "filter":[
                    {"range": {"sentiment": {"lt": 0}}}
                  ]            
                }   
              },
              "script": {
                "source": " _score * doc['cluster_weigth'].value"  ## auf diese art kann der aktuelle score mit dem Cluster verrechnet werden ToDo: Cluster neu zuordnen, damit das beste cluster die meiste Multiplikaiton bekommt
              }
            }
        }
    }
    res_negative = es.search(index="boromir_index", body=body_negative)

    # get ID's from retrieved documents

    result_positive_ids = []
    results_positive = res_positive.get('hits').get('hits')

    for doc in results_positive:
        id = doc.get('_id')
        result_positive_ids.append(id)

    result_negative_ids = []
    results_negative = res_negative.get('hits').get('hits')

    for doc in results_negative:
        id = doc.get('_id')
        result_negative_ids.append(id)

    return result_positive_ids, result_negative_ids

In [14]:
#from nltk.corpus import wordnet
# lemmatizer
import nltk
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\VeraL\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [57]:
nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml


True

In [150]:
from nltk.corpus import wordnet
from nltk.book import *

def query_preprocessing(query):
    stopwords = {'should', 'get', 'is', 'with', 'be', 'used', 'in', 'a', 'it', 'who', 'have', 'to', 'for', 'the', 'can', 'at', 'or', 'an',  'does', 'do', 'are', 'our'}
    query = query.replace("?", "")

    rslt_query = ''
    synonyms = []
    for word in query.split():
      if word.lower() not in stopwords:
        word = lemmatizer.lemmatize(word)
        rslt_query += word + ' '
        for syn in wordnet.synsets(word):
            for lm in syn.lemmas():
                    synonyms.append(lm.name())#adding into synonyms
    print(query)
    print (set(synonyms))
        #rslt_query += wordnet.synset(word+'.n.01') + ' '


    return rslt_query

In [151]:
for tid in topicsDic:
    query = topicsDic.get(tid)
    #print(query)
    query = query_preprocessing(query)
    #print(query)

Is vaping with e-cigarettes safe
{'condom', 'good', 'prophylactic', 'secure', 'dependable', 'safety', 'safe', 'rubber'}
Should abortion be legal
{'miscarriage', 'abortion', 'sound', 'legal', 'effectual'}
Is a two-state solution an acceptable solution to the Israeli-Palestinian conflict
{'dispute', 'run_afoul', 'solvent', 'answer', 'contravene', 'solution', 'root', 'acceptable', 'engagement', 'conflict', 'difference', 'result', 'resolution', 'difference_of_opinion', 'satisfactory', 'struggle', 'infringe', 'fight', 'battle'}
Should people become vegetarian
{'masses', 'multitude', 'citizenry', 'people', 'the_great_unwashed', 'become', 'turn', 'vegetarian', 'suit', 'hoi_polloi', 'mass', 'go', 'get'}
Should bottled water be banned
{'shun', 'body_of_water', 'water_supply', 'weewee', 'irrigate', 'banned', 'piddle', 'censor', 'bottle', 'prohibited', 'ostracise', 'water_system', 'banish', 'H2O', 'ban', 'ostracize', 'cast_out', 'water', 'blackball', 'piss', 'urine', 'pee'}


In [17]:
# with TS, OCR & CLustering and with query preprocessing
def search_refined_prototype_with_query_preprocessing(query, num_results):
    query = query_preprocessing(query)

    body_positive = {
        "from":0,
        "size":num_results,
        "query":{
            "script_score":{
              "query": {
                "bool":{
                  "should":[
                    {"match": { "document_text":{"query":query}}},
                    {"match": {"ocr_text":{"query":query, "boost":3}}}  # ocr_text sollte einen natürlichen boost besitzen, da die texte viel kürzer sind. Vielleicht muss dieser auch abgeschwächt werden?
                  ],
                  "filter":[
                    {"range": {"sentiment": {"gt": 0}}}
                  ]            
                }   
              },
              "script": {
                "source": " _score * doc['cluster_weigth'].value"  ## auf diese art kann der aktuelle score mit dem Cluster verrechnet werden ToDo: Cluster neu zuordnen, damit das beste cluster die meiste Multiplikaiton bekommt
              }
            }
        }
    }
    res_positive = es.search(index="boromir_index", body=body_positive)

    body_negative = {
        "from":0,
        "size":num_results,
        "query":{
            "script_score":{
              "query": {
                "bool":{
                  "should":[
                    {"match": { "document_text":{"query":query}}},
                    {"match": {"ocr_text":{"query":query, "boost":3}}}  # ocr_text sollte einen natürlichen boost besitzen, da die texte viel kürzer sind. Vielleicht muss dieser auch abgeschwächt werden?
                  ],
                  "filter":[
                    {"range": {"sentiment": {"lt": 0}}}
                  ]            
                }   
              },
              "script": {
                "source": " _score * doc['cluster_weigth'].value"  ## auf diese art kann der aktuelle score mit dem Cluster verrechnet werden ToDo: Cluster neu zuordnen, damit das beste cluster die meiste Multiplikaiton bekommt
              }
            }
        }
    }
    res_negative = es.search(index="boromir_index", body=body_negative)

    # get ID's from retrieved documents

    result_positive_ids = []
    results_positive = res_positive.get('hits').get('hits')

    for doc in results_positive:
        id = doc.get('_id')
        result_positive_ids.append(id)

    result_negative_ids = []
    results_negative = res_negative.get('hits').get('hits')

    for doc in results_negative:
        id = doc.get('_id')
        result_negative_ids.append(id)

    return result_positive_ids, result_negative_ids

## Make evaluation for refined prototype

In [19]:
# TS: 6s, all: 3.3s
all_ranks_pro = []
all_ranks_con = []
numbers = []
true_pos_pro = []
true_pos_con = []
avg_rank_pro = []
avg_rank_con = []

num_results = 10 # number of result id's we want to get

for topic in topicsDic:
    query = topicsDic.get(topic)
    result_ids_pro, result_ids_con = search_refined_prototype_onlyTS(query, num_results) # add _TS for the query function using only TS

    print(query, "\n======================================")

    TP_pro = 0
    TP_con = 0
    total_rank_pro = 0 # the smaller the better
    total_rank_con = 0 # the smaller the better

    # check each result id if it fits the topic (pro)
    for id in result_ids_pro:

        path = define.imagePath() + "/images/"  + id[0:3] + "/" + id + "/pages/"
        file = listdir(path)
        path = path +  file[0] + "/rankings.jsonl"

        lines = []
        for line in open(path, 'r'):
            lines.append(json.loads(line))

        if lines[0]['topic'] == topic:
            TP_pro += 1
            rank = lines[0]['rank']
            total_rank_pro += rank

    # check each result id if it fits the topic (con)
    for id in result_ids_con:

        path = define.imagePath() + "/images/"  + id[0:3] + "/" + id + "/pages/"
        file = listdir(path)
        path = path +  file[0] + "/rankings.jsonl"

        lines = []
        for line in open(path, 'r'):
            lines.append(json.loads(line))

        if lines[0]['topic'] == topic:
            TP_con += 1
            rank = lines[0]['rank']
            total_rank_con += rank
    
    # print search performance for this topic
        
    print("Number of pictures that fit to the query (Pro): ", TP_pro, "/", num_results)
    print("Precision (Pro): ", TP_pro/num_results)
    print("Total Rank for Query (Pro): ", total_rank_pro)
    print("....................................")
    print("Number of pictures that fit to the query (Con): ", TP_con, "/", num_results)
    print("Precision (Con): ", TP_con/num_results)
    print("Total Rank for Query (Con): ", total_rank_con, "\n")

    all_ranks_pro.append(total_rank_pro)
    all_ranks_con.append(total_rank_con)
    numbers.append(topic)
    true_pos_pro.append(TP_pro)
    true_pos_con.append(TP_con)
    if TP_pro != 0: h = round(total_rank_pro/TP_pro, 2) 
    else: h = 0
    avg_rank_pro.append(h)
    avg_rank_con.append(round(total_rank_con/TP_con, 2))

# make final dataframe
df = pd.DataFrame({'Topic': numbers, 'Rank Pro' : all_ranks_pro,  'TP Pro' : true_pos_pro, 'AVG_Rank Pro' : avg_rank_pro, 'Rank Con' : all_ranks_con, 'TP Con' : true_pos_con, 'AVG_Rank Con' : avg_rank_con})
df.to_csv('evaluations/query_exp_with_text_sent.tsv', index = False, sep='\t', line_terminator = '\r\n')

  res_positive = es.search(index="boromir_index", body=body_positive)
  res_negative = es.search(index="boromir_index", body=body_negative)


Should teachers get tenure? 
Number of pictures that fit to the query (Pro):  10 / 10
Precision (Pro):  1.0
Total Rank for Query (Pro):  621
....................................
Number of pictures that fit to the query (Con):  8 / 10
Precision (Con):  0.8
Total Rank for Query (Con):  384 

Is vaping with e-cigarettes safe? 
Number of pictures that fit to the query (Pro):  10 / 10
Precision (Pro):  1.0
Total Rank for Query (Pro):  467
....................................
Number of pictures that fit to the query (Con):  10 / 10
Precision (Con):  1.0
Total Rank for Query (Con):  477 

Should insider trading be allowed? 
Number of pictures that fit to the query (Pro):  10 / 10
Precision (Pro):  1.0
Total Rank for Query (Pro):  363
....................................
Number of pictures that fit to the query (Con):  10 / 10
Precision (Con):  1.0
Total Rank for Query (Con):  481 

Should corporal punishment be used in schools? 
Number of pictures that fit to the query (Pro):  10 / 10
Precisi

### basic HTML with pictures only

In [None]:
htmlEval = '<!DOCTYPE html> <html> <head> <meta charset="UTF-8" name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"> <title>Evaluation</title> <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous"> <link rel="stylesheet" href="myCss.css"> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha384-KJ3o2DKtIkvYIK3UENzmM7KCkRr/rE9/Qpg6aAZGJwFDMVNA/GpGFF93hXpG5KkN" crossorigin="anonymous"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js" integrity="sha384-ApNbgh9B+Y1QKtv3Rn7W3mgPxhU9K/ScQsAP7hUibX39j7fakFPskvXusvfa0b4Q" crossorigin="anonymous"></script> <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.min.js" integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl" crossorigin="anonymous"></script> </head> <body>'

queryListdf = pd.DataFrame(columns = ['topic', 'stance', 'id'])
for tid in topicsDic:
    query = topicsDic.get(tid)
    query_pro = query + " pro"
    query_con = query + " anti"
    result_ids_pro, result_ids_con = search_refined_prototype(query, num_results) # add _TS for the query function using only TS
    src_pro = []
    src_con = []
    for ids in result_ids_pro:
        src_pro.append("images/"  + ids[0:3] + "/" + ids + "/image.png")
    for ids in result_ids_con:
        src_con.append("images/"  + ids[0:3] + "/" + ids + "/image.png")
    #data for HTML processing
    # queryList.append(query)

    evaluation = open("evaluation+link.html").read().format(query=query, 
                                    firstpro = src_pro[0],
                                    secondpro = src_pro[1],
                                    thirdpro = src_pro[2],
                                    fourthpro = src_pro[3],
                                    fifthpro = src_pro[4],
                                    sixthpro = src_pro[5], 
                                    seventhpro = src_pro[6], 
                                    eighthpro = src_pro[7], 
                                    ninthpro = src_pro[8], 
                                    tenthpro = src_pro[9], 
                                    firstcon = src_con[0], 
                                    secondcon = src_con[1], 
                                    thirdcon = src_con[2], 
                                    fourthcon = src_con[3], 
                                    fifthcon = src_con[4], 
                                    sixthcon = src_con[5],
                                    seventhcon = src_con[6],
                                    eighthcon = src_con[7], 
                                    ninthcon = src_con[8], 
                                    tenthcon = src_con[9])
    #print(topic)
    for id in result_ids_pro:
        # print(id)
        queryListdf = queryListdf.append({"topic": query, "stance": "pro", "id": ("/images/"  + id[0:3] + "/" + id + "/pages/")}, ignore_index=True)
    for id in result_ids_con:
        queryListdf = queryListdf.append({"topic": query, "stance": "con", "id": ("/images/"  + id[0:3] + "/" + id + "/pages/")}, ignore_index=True)

    htmlEval = htmlEval + evaluation

    #print(htmlEval)
    
htmlEval = htmlEval + '</body> </html>'
file = open('evalResult_refined.html', 'w')
file.write(htmlEval)
    
# print(queryListdf)
#queryListdf.to_csv("evaluations/verticalPrototype.tsv",index = False,sep='\t',line_terminator='\r\n')

  res_positive = es.search(index="boromir_index", body=body_positive)
  res_negative = es.search(index="boromir_index", body=body_negative)


195036

### create advanced HTML with links for image results

In [None]:
import define

path_to_images = define.imagePath()

### create HTML

In [1]:
htmlEval = '<!DOCTYPE html> <html>     <head>         <meta charset="UTF-8" name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">         <title>Evaluation</title> <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">   <link rel="stylesheet" href="myCss.css">      <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha384-KJ3o2DKtIkvYIK3UENzmM7KCkRr/rE9/Qpg6aAZGJwFDMVNA/GpGFF93hXpG5KkN" crossorigin="anonymous"></script>   <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js" integrity="sha384-ApNbgh9B+Y1QKtv3Rn7W3mgPxhU9K/ScQsAP7hUibX39j7fakFPskvXusvfa0b4Q" crossorigin="anonymous"></script>    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.min.js" integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl" crossorigin="anonymous"></script> '
htmlEval = htmlEval + '<script language="Javascript" > \n const dataList = [] \n function addDataToList(query, argtotal, falsepos, falseneg) { \n dataList.push(query)    \n var { value } = argtotal     \n dataList.push(value)     \n var { value } = falsepos   \n dataList.push(value)     \n var { value } = falseneg  \n dataList.push(value)    \n  var { value } = document.getElementById("user_name").elements["user"]   \n  dataList.push(value)       \n dataList.push(" \\n") \n alert("Inhalt von dataList: "+dataList)   }  \n function downloadFile() { \n const textFile = btoa("query \\t Argumentative images total \\t false positives \\t false negatives \\t user_name \\n \\t" + dataList.join("\\t")) \n const saveElement = document.createElement("a") \n saveElement.href = `data:text/plain;base64,${textFile}` \n saveElement.download = "StanceEvaluation.tsv" \n document.body.appendChild(saveElement) \n saveElement.click() \n document.body.removeChild(saveElement)  } \n </script>'
htmlEval = htmlEval + '</head> <body>   \n  <div class="form-group mb-5">   \n  <input id="download_btn" type="submit" class="btn btn-primary" onClick="downloadFile(); " value="Exportieren" />    \n  </div>  \n  <div class="container-out"> \n  <form name="username" action="" method="post" id="user_name">   \n  <div class="form-group row">    \n  <label for="username" class="col-sm-2 col-form-label">User: </label>    \n  <div class="col-sm-10"> \n  <select multiple class="form-control" name="user">  \n  <option>Gregor</option> \n  <option>Miriam</option> \n  <option>Shirin</option> \n  <option>Thilo</option>  \n  </select></div> \n  </div>  \n  </form>'
num_results = 10

resultHTML = 'evalResult_refinedProtoype.html'

queryListdf = pd.DataFrame(columns = ['topic', 'stance', 'id'])
for tid in topicsDic:
    query = topicsDic.get(tid)
    query_pro = query + " pro"
    query_con = query + " anti"
    result_ids_pro, result_ids_con = search_refined_prototype(query, num_results) # add _TS for the query function using only TS
    src_pro = []
    src_con = []
    for ids in result_ids_pro:
        src_pro.append("images/"  + ids[0:3] + "/" + ids + "/image.png")
    for ids in result_ids_con:
        src_con.append("images/"  + ids[0:3] + "/" + ids + "/image.png")

    link_pro = []
    link_con = []
    #open only directory to get link
    for ids in result_ids_pro:
        with open(path_to_images + "/images/" + ids[0:3] + "/" + ids + "/pages/" + listdir(path_to_images + "/images/" + ids[0:3] + "/" + ids + "/pages")[0] + "/page-url.txt") as f:
            this_link = f.read()
            link_pro.append(this_link)

    for ids in result_ids_con:
        with open(path_to_images + "/images/" + ids[0:3] + "/" + ids + "/pages/" + listdir(path_to_images + "/images/" + ids[0:3] + "/" + ids + "/pages")[0] + "/page-url.txt") as f:
            this_link = f.read()
            link_con.append(this_link)

    #data for HTML processing
    # queryList.append(query)
    evaluation = open("evaluation+link.html").read().format(query=query, 
                                    firstpro = src_pro[0],
                                    secondpro = src_pro[1],
                                    thirdpro = src_pro[2],
                                    fourthpro = src_pro[3],
                                    fifthpro = src_pro[4],
                                    sixthpro = src_pro[5], 
                                    seventhpro = src_pro[6], 
                                    eighthpro = src_pro[7], 
                                    ninthpro = src_pro[8], 
                                    tenthpro = src_pro[9], 
                                    firstcon = src_con[0], 
                                    secondcon = src_con[1], 
                                    thirdcon = src_con[2], 
                                    fourthcon = src_con[3], 
                                    fifthcon = src_con[4], 
                                    sixthcon = src_con[5],
                                    seventhcon = src_con[6],
                                    eighthcon = src_con[7], 
                                    ninthcon = src_con[8], 
                                    tenthcon = src_con[9], 
                                    onepro = link_pro[0], 
                                    twopro = link_pro[1], 
                                    threepro = link_pro[2], 
                                    fourpro = link_pro[3], 
                                    fivepro = link_pro[4], 
                                    sixpro = link_pro[5], 
                                    sevenpro = link_pro[6], 
                                    eightpro = link_pro[7], 
                                    ninepro = link_pro[8], 
                                    tenpro = link_pro[9],
                                    onecon = link_con[0],
                                    twocon = link_con[1], 
                                    threecon = link_con[2], 
                                    fourcon = link_con[3], 
                                    fivecon = link_con[4], 
                                    sixcon = link_con[5], 
                                    sevencon = link_con[6], 
                                    eightcon = link_con[7], 
                                    ninecon = link_con[8], 
                                    tencon = link_con[9])
    #print(topic)
    for id in result_ids_pro:
        # print(id)
        queryListdf = queryListdf.append({"topic": query, "stance": "pro", "id": ("/images/"  + id[0:3] + "/" + id + "/pages/")}, ignore_index=True)
    for id in result_ids_con:
        queryListdf = queryListdf.append({"topic": query, "stance": "con", "id": ("/images/"  + id[0:3] + "/" + id + "/pages/")}, ignore_index=True)

    htmlEval = htmlEval + evaluation

    
htmlEval = htmlEval + '</body> </html>'
file = open(resultHTML, 'w')
file.write(htmlEval)
file.close()
    
##debugging/check
#print(queryListdf)
#queryListdf.to_csv("evaluations/verticalPrototype.tsv",index = False,sep='\t',line_terminator='\r\n')

