# Functions created for offline analysis

In [None]:
#1 is fake and 0 is real
#code authored by Aditya Tyagi and Suhas Dara
import os
import json
import pandas as pd

In [None]:
image_labels = pd.read_csv("dataset_images/labels.csv")
video_labels = pd.read_csv("dataset_videos/labels.csv")

survey_data = pd.read_csv("worker_data/survey_data.csv", usecols=["code","age","races","gender","education","CRT1","CRT2","CRT3","AC1","AC2","AC3","AC4"])

In [None]:
def get_manifest_data():
    manifest_data = []
    
    name = "worker_data/iteration1/input1.manifest"
    with open(name) as file:
        manifest_data.extend(file.readlines())
    name = "worker_data/iteration2/input2.manifest"
    with open(name) as file:
        manifest_data.extend(file.readlines())
    
    return manifest_data

In [None]:
def get_json_data():
    json_data = []
    
    responses_dir = "worker_data/iteration1/worker_responses/"
    for filename in sorted(os.listdir(responses_dir)):
        with open(os.path.join(responses_dir, filename)) as file:
            json_data.append(json.load(file))
    responses_dir = "worker_data/iteration2/worker_responses/"
    for filename in sorted(os.listdir(responses_dir)):
        with open(os.path.join(responses_dir, filename)) as file:
            json_data.append(json.load(file))
    
    print(len(json_data))
    return json_data

In [None]:
def get_videorationale(response):
    return response["videorationale"]

In [None]:
def get_imagerationale(response):
    return response["imagerationale"]

In [None]:
def get_code(response):
    #returns the code from the worker response
    return int(response["code"])

In [None]:
def get_worker_id(worker_response):
    #returns the ID of worker from worker response
    return worker_response["workerId"]

In [None]:
def get_response_rows(worker_response, manifest_input):
    #returns new rows for dataframe [worker_id, code, image_num, video_num, label, annotation]
    response_rows = []
    
    element1 = manifest_input["source-ref"]
    element1 = element1[element1.rindex("/")+1:]
    element1_label = image_labels[image_labels["filename"]==element1]["label"].iloc[0]
   
    element2 = manifest_input["element2"]
    element2 = element2[element2.rindex("/")+1:]
    element2_label = image_labels[image_labels["filename"]==element2]["label"].iloc[0]
    
    element3 = manifest_input["element3"]
    element3 = element3[element3.rindex("/")+1:]
    element3_label = video_labels[video_labels["filename"]==element3]["label"].iloc[0]
    
    element4 = manifest_input["element4"]
    element4 = element4[element4.rindex("/")+1:]
    element4_label = video_labels[video_labels["filename"]==element4]["label"].iloc[0]
    
    worker_id = get_worker_id(worker_response)
    
    answer_content = worker_response["answerContent"]
    code = get_code(answer_content)
    if(answer_content["image1"]["real"]): #only check for real (0), if it is not then fake (1)
        response_rows.append([worker_id, code, int(element1.split(".")[0]), float('NaN'), element1_label, 0])
    else:
        response_rows.append([worker_id, code, int(element1.split(".")[0]), float('NaN'), element1_label, 1])
    if(answer_content["image2"]["real"]):
        response_rows.append([worker_id, code, int(element2.split(".")[0]), float('NaN'), element2_label, 0])
    else:
        response_rows.append([worker_id, code, int(element2.split(".")[0]), float('NaN'), element2_label, 1])
    if(answer_content["video1"]["real"]):
        response_rows.append([worker_id, code, float('NaN'), int(element3.split(".")[0]), element3_label, 0])
    else:
        response_rows.append([worker_id, code, float('NaN'), int(element3.split(".")[0]), element3_label, 1])
    if(answer_content["video2"]["real"]):
        response_rows.append([worker_id, code, float('NaN'), int(element4.split(".")[0]), element4_label, 0])
    else:
        response_rows.append([worker_id, code, float('NaN'), int(element4.split(".")[0]), element4_label, 1])

    return response_rows

In [None]:
def get_rationale_row(worker_response, manifest_input):
    #returns new rows for dataframe [worker_id, code, image1, image2, video1, video2, image_rat, video_rat]
    response_rows = []
    
    element1 = manifest_input["source-ref"]
    element1 = int(element1[element1.rindex("/")+1:element1.rindex(".")]) #convert the string to only the number
   
    element2 = manifest_input["element2"]
    element2 = int(element2[element2.rindex("/")+1:element2.rindex(".")]) #convert the string to only the number
    
    element3 = manifest_input["element3"]
    element3 = int(element3[element3.rindex("/")+1:element3.rindex(".")]) #convert the string to only the number
    
    element4 = manifest_input["element4"]
    element4 = int(element4[element4.rindex("/")+1:element4.rindex(".")]) #convert the string to only the number
    
    worker_id = get_worker_id(worker_response)
    
    answer_content = worker_response["answerContent"]
    code = get_code(answer_content)
    image = get_imagerationale(answer_content)
    video = get_videorationale(answer_content)
    
    return [worker_id, code, element1, element2, element3, element4, image, video]

# Looping through response here

In [None]:
def create_dataframes():
    manifest_data = get_manifest_data()
    json_data = get_json_data()
    
    df_anot = pd.DataFrame(columns=["workerId","code","image","video","label","annotation"])
    df_rat = pd.DataFrame(columns=["workerId","code","image1","image2","video1","video2","image_rat","video_rat"])
    
    for manifest_index in range(len(manifest_data)):
        manifest_input = json.loads(manifest_data[manifest_index])
        worker_responses = json_data[manifest_index]["answers"]
        
        for worker_response in worker_responses:
            response_rows = get_response_rows(worker_response, manifest_input)
            for row in response_rows:
                df_anot.loc[-1] = row
                df_anot.index = df_anot.index + 1
            
            rationale_row = get_rationale_row(worker_response, manifest_input)
            df_rat.loc[-1] = rationale_row
            df_rat.index = df_rat.index + 1
    
    return df_anot.sort_index(), df_rat.sort_index()

In [None]:
annotations, rationales = create_dataframes()

In [None]:
annotations

In [None]:
rationales

In [None]:
survey_data

In [None]:
pd.merge(annotations, survey_data, on=["code"])