# Functions created for offline analysis

In [1]:
#1 is fake and 0 is real
#code authored by Aditya Tyagi and Suhas Dara
import os
import json
import pandas as pd

In [2]:
image_labels = pd.read_csv("dataset_images/labels.csv")
video_labels = pd.read_csv("dataset_videos/labels.csv")

survey_data = pd.read_csv("worker_data/survey_data.csv", usecols=["code","age","races","gender","education","CRT1","CRT2","CRT3","AC1","AC2","AC3","AC4"])

In [3]:
def get_manifest_data():
    name = "worker_data/iteration1/input1.manifest"
    with open(name) as file:
        return file.readlines()

In [4]:
def get_json_data():
    json_data = []
    responses_dir = "worker_data/iteration1/worker_responses/"
    for filename in sorted(os.listdir(responses_dir)):
        with open(os.path.join(responses_dir, filename)) as file:
            json_data.append(json.load(file))
    return json_data

In [5]:
def get_videorationale(response):
    return response["videorationale"]

In [6]:
def get_imagerationale(response):
    return response["imagerationale"]

In [7]:
def get_code(response):
    #returns the code from the worker response
    return int(response["code"])

In [8]:
def get_worker_id(worker_response):
    #returns the ID of worker from worker response
    return worker_response["workerId"]

In [9]:
def get_response_rows(worker_response, manifest_input):
    #returns new rows for dataframe [worker_id, code, image_num, video_num, label, annotation]
    response_rows = []
    
    element1 = manifest_input["source-ref"]
    element1 = element1[element1.rindex("/")+1:]
    element1_label = image_labels[image_labels["filename"]==element1]["label"].iloc[0]
   
    element2 = manifest_input["element2"]
    element2 = element2[element2.rindex("/")+1:]
    element2_label = image_labels[image_labels["filename"]==element2]["label"].iloc[0]
    
    element3 = manifest_input["element3"]
    element3 = element3[element3.rindex("/")+1:]
    element3_label = video_labels[video_labels["filename"]==element3]["label"].iloc[0]
    
    element4 = manifest_input["element4"]
    element4 = element4[element4.rindex("/")+1:]
    element4_label = video_labels[video_labels["filename"]==element4]["label"].iloc[0]
    
    worker_id = get_worker_id(worker_response)
    
    answer_content = worker_response["answerContent"]
    code = get_code(answer_content)
    if(answer_content["image1"]["real"]): #only check for real (0), if it is not then fake (1)
        response_rows.append([worker_id, code, int(element1.split(".")[0]), float('NaN'), element1_label, 0])
    else:
        response_rows.append([worker_id, code, int(element1.split(".")[0]), float('NaN'), element1_label, 1])
    if(answer_content["image2"]["real"]):
        response_rows.append([worker_id, code, int(element2.split(".")[0]), float('NaN'), element2_label, 0])
    else:
        response_rows.append([worker_id, code, int(element2.split(".")[0]), float('NaN'), element2_label, 1])
    if(answer_content["video1"]["real"]):
        response_rows.append([worker_id, code, float('NaN'), int(element3.split(".")[0]), element3_label, 0])
    else:
        response_rows.append([worker_id, code, float('NaN'), int(element3.split(".")[0]), element3_label, 1])
    if(answer_content["video2"]["real"]):
        response_rows.append([worker_id, code, float('NaN'), int(element4.split(".")[0]), element4_label, 0])
    else:
        response_rows.append([worker_id, code, float('NaN'), int(element4.split(".")[0]), element4_label, 1])

    return response_rows

In [10]:
def get_rationale_row(worker_response, manifest_input):
    #returns new rows for dataframe [worker_id, code, image1, image2, video1, video2, image_rat, video_rat]
    response_rows = []
    
    element1 = manifest_input["source-ref"]
    element1 = int(element1[element1.rindex("/")+1:element1.rindex(".")]) #convert the string to only the number
   
    element2 = manifest_input["element2"]
    element2 = int(element2[element2.rindex("/")+1:element2.rindex(".")]) #convert the string to only the number
    
    element3 = manifest_input["element3"]
    element3 = int(element3[element3.rindex("/")+1:element3.rindex(".")]) #convert the string to only the number
    
    element4 = manifest_input["element4"]
    element4 = int(element4[element4.rindex("/")+1:element4.rindex(".")]) #convert the string to only the number
    
    worker_id = get_worker_id(worker_response)
    
    answer_content = worker_response["answerContent"]
    code = get_code(answer_content)
    image = get_imagerationale(answer_content)
    video = get_videorationale(answer_content)
    
    return [worker_id, code, element1, element2, element3, element4, image, video]

# Looping through response here

In [11]:
def create_dataframes():
    manifest_data = get_manifest_data()
    json_data = get_json_data()
    
    df_anot = pd.DataFrame(columns=["workerId","code","image","video","label","annotation"])
    df_rat = pd.DataFrame(columns=["workerId","code","image1","image2","video1","video2","image_rat","video_rat"])
    
    for manifest_index in range(len(manifest_data)):
        manifest_input = json.loads(manifest_data[manifest_index])
        worker_responses = json_data[manifest_index]["answers"]
        
        for worker_response in worker_responses:
            response_rows = get_response_rows(worker_response, manifest_input)
            for row in response_rows:
                df_anot.loc[-1] = row
                df_anot.index = df_anot.index + 1
            
            rationale_row = get_rationale_row(worker_response, manifest_input)
            df_rat.loc[-1] = rationale_row
            df_rat.index = df_rat.index + 1
    
    return df_anot.sort_index(), df_rat.sort_index()

In [12]:
annotations, rationales = create_dataframes()

In [13]:
annotations

Unnamed: 0,workerId,code,image,video,label,annotation
0,public.us-east-1.A16QZSBYXE5VY8,24733,,10,0,1
1,public.us-east-1.A16QZSBYXE5VY8,24733,,37,1,1
2,public.us-east-1.A16QZSBYXE5VY8,24733,14,,0,1
3,public.us-east-1.A16QZSBYXE5VY8,24733,5,,0,0
4,public.us-east-1.A18CLI0LF3RE3C,44888,,10,0,1
...,...,...,...,...,...,...
195,public.us-east-1.AJDPFPELWYQ7D,54907,9,,0,0
196,public.us-east-1.A10Z4QVBJIA914,29558,,11,0,0
197,public.us-east-1.A10Z4QVBJIA914,29558,,24,1,1
198,public.us-east-1.A10Z4QVBJIA914,29558,8,,0,0


In [14]:
rationales

Unnamed: 0,workerId,code,image1,image2,video1,video2,image_rat,video_rat
0,public.us-east-1.A16QZSBYXE5VY8,24733,5,14,37,10,1. Seems real\n2. Wrinkles below eyes?,Shadows. 1st one did not have them and 2nd one...
1,public.us-east-1.A18CLI0LF3RE3C,44888,5,14,37,10,I don't see any edits in the image.,"The video is a little strange, with several bugs."
2,public.us-east-1.A3V4UX3FRT3KJU,26556,5,14,37,10,the texture of the skin,All expressions looked real to me
3,public.us-east-1.A36FBIDN58N139,46809,5,14,37,10,I didn't notice anything unusual.,In the second video the man had much more arti...
4,public.us-east-1.AY0758IITCGOZ,53774,5,14,37,10,I didn't see anything that looks fake on these...,I didn't notice anything out of place.
5,public.us-east-1.A16QZSBYXE5VY8,61422,36,18,4,39,1st-> left chin.\n2nd-> left chin.,1st-> face movements\n2nd-> could not spot fake?
6,public.us-east-1.A36FBIDN58N139,46809,36,18,4,39,I didn't notice anything unusual.,The first video had flashing colors and face g...
7,public.us-east-1.A3V4UX3FRT3KJU,57244,36,18,4,39,the texture of the skin,the sudden changes of the face
8,public.us-east-1.A2HWC65YAL6MHK,29716,36,18,4,39,I see that the image of human beard closely wh...,Based on human action both are real and i choo...
9,public.us-east-1.A10Z4QVBJIA914,29558,36,18,4,39,They don't appear to have the markers of deep ...,"The first video, the face is constantly changi..."


In [15]:
survey_data

Unnamed: 0,age,races,gender,education,CRT1,AC1,AC2,AC3,AC4,CRT2,CRT3,code
0,30-39,Black or African American,Male,Undergraduate / Associates,0.1,1.0,,,,5,24.0,45007
1,40-49,Black or African American,Female,Undergraduate / Associates,1.05,,,3.0,,100,13.0,53774
2,30-39,White,Male,High school,0.05,,2.0,,,5,24.0,36810
3,30-39,White,Male,Masters / PhD,0.05,,2.0,,,5,24.0,93210
4,40-49,White,Female,Undergraduate / Associates,5.0,,,,4.0,5,24.0,29558
5,20-29,Asian,Male,Undergraduate / Associates,5.0,,2.0,,,5,48.0,29716
6,30-39,Black or African American,Male,Undergraduate / Associates,0.1,,2.0,,,5,24.0,55879
7,40-49,White,Male,Undergraduate / Associates,0.05,,2.0,,,5,24.0,58742
8,30-39,Black or African American,Male,Undergraduate / Associates,0.1,,2.0,,,5,24.0,72001
9,20-29,White,Male,High school,0.05,,2.0,,,5,24.0,46809


In [16]:
pd.merge(annotations, survey_data, on=["code"])

Unnamed: 0,workerId,code,image,video,label,annotation,age,races,gender,education,CRT1,AC1,AC2,AC3,AC4,CRT2,CRT3
0,public.us-east-1.A16QZSBYXE5VY8,24733,,10,0,1,40-49,White,Male,Undergraduate / Associates,0.05,,,3.0,,5,24.0
1,public.us-east-1.A16QZSBYXE5VY8,24733,,37,1,1,40-49,White,Male,Undergraduate / Associates,0.05,,,3.0,,5,24.0
2,public.us-east-1.A16QZSBYXE5VY8,24733,14,,0,1,40-49,White,Male,Undergraduate / Associates,0.05,,,3.0,,5,24.0
3,public.us-east-1.A16QZSBYXE5VY8,24733,5,,0,0,40-49,White,Male,Undergraduate / Associates,0.05,,,3.0,,5,24.0
4,public.us-east-1.A18CLI0LF3RE3C,44888,,10,0,1,20-29,White,Male,Undergraduate / Associates,0.10,,,3.0,,5,24.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,public.us-east-1.A3V4UX3FRT3KJU,78312,9,,0,0,30-39,Black or African American,Male,Undergraduate / Associates,0.10,,,3.0,,5,24.0
196,public.us-east-1.AJDPFPELWYQ7D,54907,,11,0,0,40-49,White,Male,Undergraduate / Associates,0.10,,,,4.0,100,12.5
197,public.us-east-1.AJDPFPELWYQ7D,54907,,24,1,1,40-49,White,Male,Undergraduate / Associates,0.10,,,,4.0,100,12.5
198,public.us-east-1.AJDPFPELWYQ7D,54907,8,,0,0,40-49,White,Male,Undergraduate / Associates,0.10,,,,4.0,100,12.5
