In [1]:
import torch, io, json, random
import pandas as pd

In [2]:
print(torch.__version__)

1.8.1+cu102


In [3]:
with open('generated_features/val/val_expression.pt', 'rb') as f:
  buffer = io.BytesIO(f.read())
expression_results=torch.load(buffer,map_location=torch.device('cpu'));

In [4]:
with open('generated_features/val/val_object_detection.pt', 'rb') as f:
  buffer = io.BytesIO(f.read())
object_results=torch.load(buffer,map_location=torch.device('cpu'));

In [5]:
with open("generated_features/val/val_places365.json", "r") as json_file: 
    places_result=json.load(json_file)

**Get classes**

In [6]:
# Get expression classes
with open("class_files/expression_classes.txt", "r") as f:
    expression_classes = [s.strip() for s in f.readlines()]

In [7]:
# Get coco classes
with open("class_files/coco_classes.txt", "r") as f:
    coco_classes = [s.strip() for s in f.readlines()]

In [8]:
# Get places365 classes
file_name = 'class_files/coco_classes.txt'                                                                                                                                       
places365_classes = list()                                                                                         
with open(file_name) as class_file:                                                                      
    for line in class_file:                                                                              
        places365_classes.append(line.strip().split(' ')[0][3:])  

**Test output**

In [9]:
# Places
places_result['2017_10735550']

{'prob': [0.5115284323692322,
  0.1368655413389206,
  0.07560905069112778,
  0.05295965448021889,
  0.03759666159749031],
 'class': ['museum/indoor',
  'burial_chamber',
  'cemetery',
  'archaelogical_excavation',
  'kindergarden_classroom']}

In [10]:
# Coco
detections=object_results[15]['output']['labels']
for d in detections[:10]:
    print(coco_classes[d])

clock
umbrella
surfboard
frisbee
bowl
keyboard
person
cup
dining table
tv


In [11]:
# Expressions
face_expression=expression_results[0]['2017_90320159'][0]['0']['classes']
for face in face_expression:
    print(expression_classes[int(face)])

neutral
anger
sadness


## Wrangling
### Expression
Load the existing data

In [12]:
main_data = pd.read_csv("data/data.csv")
val_data = main_data[main_data.dataset == "validation"]

Wrangle the expression data

In [13]:
dict_res = {}
for mini_dict in expression_results:
    key = list(mini_dict.keys())[0]
    faces = mini_dict[key]
    faces = list(mini_dict.values())[0]
    res = {class_name: [] for class_name in expression_classes}
    res["likely"] = []
    for face in faces:
        res.update({key: value + [0] for key, value in res.items()})
        face = list(face.values())[0]
        face_expression = face["classes"]
        probabiliites = face["probs"]
        for expression, probability in zip(face_expression, probabiliites):
            res[expression_classes[int(expression)]][-1] = float(probability.numpy())
        res["likely"][-1] = expression_classes[int(face_expression[probabiliites.argmax()])]

    dict_res[key] = res

Convert to a dataframe and rename columns

In [14]:
expression_data = pd.DataFrame.from_dict(dict_res, orient="index")
expression_data.rename(columns=lambda x: f"expression_{x}", inplace=True)

Join the two dataframes

In [15]:
all_data = pd.merge(
    val_data,
    expression_data,
    left_on="id",
    right_index=True,
    how="left"
)

Some quick tests

In [16]:
assert(all_data.shape[0] == max(expression_data.shape[0], val_data.shape[0]))
assert(all_data.shape[1] == expression_data.shape[1] + val_data.shape[1])
for i in range(100):
    key = random.choice(expression_data.index)
    col = random.choice(expression_data.columns)
    assert(all_data[all_data.id == key][col].values[0] == expression_data.loc[key, col])