# Metric computation


In [None]:
!unzip data.zip

unzip:  cannot find or open data.zip, data.zip.zip or data.zip.ZIP.


In [None]:
import numpy as np
import pandas as pd
import numbers

In [None]:
# helper function to create the DataFrame from csv
# empty values are represented as empty strings
def build_df(filename):
  return pd.read_csv(filename,na_filter=False)

In [None]:
# apply the same filename between source and prediction
def convert_filename(filename):
  split = filename.split("_")
  name = split[len(split)-1]
  return name.split(".")[0]


# helper function to transform height from string to float
def convert_height(height):
  # return NaN if value is missing
  if(height==''):
    return np.NaN

  # convert outputs with 'feet' or 'foot'
  elif('feet' in height):
    feet = height.split('feet')[0]
    return float(feet)
  elif('foot' in height):
    feet = height.split('foot')[0]
    return float(feet)

  # convert if the unit of measurement is present
  elif("'" in height):
  # divide feet from inches
    split = height.split("'")
    feet = split[0]
    inch = split[1].split('"')[0]
    return float(feet) + float(inch)/12

  # return NaN otherwise
  else:
    return np.NaN

# helper function to transform age from string to float
def convert_age(age):
  if(age==''):
    return np.NaN
  return float(age)


# helper function to transform weight from string to float
def convert_weight(weight):
  # return NaN if value is missing
  if(weight==''):
    return np.NaN
  elif('pounds' in weight):
    pounds = weight.split('pounds')[0]
    return float(pounds)
  else:
    return float(weight)

In [None]:
# helper function for data preprocessing
def preprocess_df(df):

  # iterate over object type columns
  str_columns = df.select_dtypes(include=object).columns.tolist()

  for col in str_columns:
    # leave filename column unchanged
    if(col=='filename'):
      df[col] = df[col].apply(convert_filename)
      df[col] = df[col].astype('string')

    # represent height as float
    elif(col=='height'):
      df[col] = df[col].apply(convert_height)

    # represent weight as float
    elif(col=='weight'):
      df[col]=df[col].apply(convert_weight)

    elif(col=='age'):
      df[col]=df[col].apply(convert_weight)

    # consider the other columns as string
    else:
      # string to lower case
      df[col] = df[col].apply(str.lower)
      # remove initial or ending dots
      df[col] = df[col].apply(str.strip, args=('.'))
      # Remove similiar ethnicities
      if(col=='ethnical group'):
        df[col] = df[col].replace('mexican', 'hispanic')
        df[col] = df[col].replace('latino', 'hispanic')
        df[col] = df[col].replace('latina', 'hispanic')
        df[col] = df[col].replace('caucasian', 'white')
      # change column to string type
      df[col] = df[col].astype('string')
  return df

In [None]:
# helper function to extract the row of the dataframe with the specified value as filename
def get_row(df, filename):
  return df.loc[df['filename']==filename]

# helper function to compare a string with a group of string
# it returns 0 if pred is equal to src,
# 0.5 if they belong to the same group, 1 otherwise
def compare_groups(src, pred, groups):
  if src == pred:
    return 0

  for group in groups:
    if(src in group and pred in group):
      return 0.5

  return 1

# helper function to compare the ethnic groups,
# using some predefined groups for a discounted error
def compare_ethnicity(src, pred):
  groups = [
      ['african american', 'african', 'aboriginal'],
      ['white', 'hispanic'],
      ['hispanic', 'arab'],
      ['hispanic', 'indian']
  ]

  return compare_groups(src, pred, groups)

# helper function to compare the hair colors,
# using some predefined groups for a discounted error
def compare_hair(src, pred):
  groups = [
      ['black', 'brown'],
      ['blonde', 'light brown'],
      ['brown', 'light brown']
  ]
  return compare_groups(src, pred, groups)

# helper function to compare the iris colors,
# using some predefined groups for a discounted error
def compare_iris(src, pred):
  groups = [
      ['black', 'brown'],
      ['blue', 'green'],
      ['green', 'brown']
  ]
  return compare_groups(src, pred, groups)

# helper function to compare numeric type data,
# it returns 0 if the error is lower than tolerance (threshold/4),
# it returns 1 if the error is greater than threshold,
# otherwise the penalty is between 0 and 1, scaled by the error
def compare_numerics(src, pred, threshold):
  tolerance = threshold/4
  error = abs(src-pred)
  if error<tolerance:
    return 0
  elif error>threshold:
    return 1
  else:
    return (error-tolerance)/(threshold-tolerance)

In [None]:
# compute the metric for one person
def metric(src, pred, thresholds):
  score = []
  for col in src.columns:
    # extract the value for the current column
    src_value = src[col].reset_index(drop=True)[0]
    pred_value = pred[col].reset_index(drop=True)[0]

    # TODO for debug
    #print(f"\tcolumn {col}\t real:{src_value} \t predicted:{pred_value}")

    # skip computation if the original/  ((predicted))   value is NaN or empty string (missing), or if it is the filename column
    if(pd.isna(src_value) or src_value=='' or col=='filename'):# or pd.isna(pred_value)):
      continue

    elif(pd.isna(pred_value)):
      score.append(1)

    # metric for numeric types (age, height, weight)
    elif(isinstance(src_value, numbers.Number)):
      score.append(compare_numerics(src_value, pred_value, thresholds[col]))

    # metric for string types (gender, ethnical group, hair color, iris color)
    elif(isinstance(src_value, str)):
      # metric for ethnicity
      if(col=='ethnical group'):
        score.append(compare_ethnicity(src_value, pred_value))
      elif(col=='hair color'):
        score.append(compare_hair(src_value, pred_value))
      elif(col=='iris color'):
        score.append(compare_iris(src_value, pred_value))
      else:
        score.append(0 if src_value==pred_value else 1)

  # compute and return the final score
  arr = np.asarray(score)
  return np.average(arr)

In [None]:
# compute the metric for each row
# it returns a dataframe containing the score of each subject
def compute_metric(src_df, pred_df, thresholds, transform=None):
  assert len(src_df)==len(pred_df), 'src and pred must have the same number of rows.'
  results = []
  filenames = []

  # iterate over all rows
  for src_filename in src_df['filename']:
    # src_row and pred_row contain the original and predicted data for the current person
    src_row = get_row(src_df, src_filename)
    pred_row = get_row(pred_df, src_filename)

    filenames.append(src_filename)
    # compute the metric for each subject
    score = metric(src_row, pred_row, thresholds)
    results.append(score)

  # transform the scores in percentage of accuracy
  results = list(map(lambda a: (1-a)*100, results))
  if(transform is None):
    return pd.DataFrame({'filename' : filenames, 'score' : results})
  else:
    score_name = f"score_{transform}"
    return pd.DataFrame({'filename' : filenames, score_name : results})

In [None]:
# it creates a pandas dataframe to bind the results of the model
def bind_results(results_list):
  result = results_list[0]

  for i in results_list[1:]:
    result = pd.merge(result, i, on='filename')

  return result

In [None]:
thresholds = {'age': 20, # max error 20 years
              'height': 2.5, # max error 2.5 feet
              'weight': 50 # max error 50 pounds
}

In [None]:
truth_filename = '/content/ground_truth.csv'
truth = build_df(truth_filename)
truth = preprocess_df(truth)
cols = truth.columns

In [None]:
original_filename = 'results_original.csv'
maxim_filename = 'results_maxim.csv'
srgan_filename = 'results_srgan.csv'
total_filename = 'results_tvdenoise.csv'

models = ['tinyllava', 'qwen']

# preprocess dataframes and compute the metrics for one model
def get_results(model, truth):
  original = build_df(f"/content/{model}/{original_filename}")
  original = preprocess_df(original)

  maxim = build_df(f"/content/{model}/{maxim_filename}")
  maxim = preprocess_df(maxim)

  srgan = build_df(f"/content/{model}/{srgan_filename}")
  srgan = preprocess_df(srgan)

  total = build_df(f"/content/{model}/{total_filename}")
  total = preprocess_df(total)
  res_original = compute_metric(truth, original, thresholds, 'original')
  res_maxim = compute_metric(truth, maxim, thresholds, 'maxim')
  res_srgan = compute_metric(truth, srgan, thresholds, 'srgan')
  res_total = compute_metric(truth, total, thresholds, 'total')

  return bind_results([res_original, res_maxim, res_srgan, res_total])

In [None]:
# function to print the means for the methods
def print_means(results):
  for i in results.columns:
    if i == 'filename':
      continue
    print(f"{i}: {results[i].mean()}")

In [None]:
res_tinyllava = get_results('tinyllava', truth)
res_qwen = get_results('qwen', truth)

In [None]:
res_tinyllava

Unnamed: 0,filename,score_original,score_maxim,score_srgan,score_total
0,MendozaEscotaJairoAntonio,79.444444,88.333333,91.666667,79.444444
1,HeatherMaeKelley,94.285714,94.285714,94.285714,89.52381
2,GeorgeWright,83.809524,89.52381,83.809524,83.809524
3,RujaIgnatova,100.0,100.0,91.666667,100.0
4,BirdalOsman,71.904762,76.666667,71.904762,71.904762
5,RaulGuzman,57.142857,64.285714,57.142857,64.285714
6,RobertMorales,78.571429,78.571429,78.571429,78.571429
7,LoriPaige,100.0,100.0,100.0,100.0
8,RaduanoMarco,93.333333,93.333333,93.333333,93.333333
9,BonillasRivasJocelynn,70.0,70.0,70.0,70.0


In [None]:
res_qwen

Unnamed: 0,filename,score_original,score_maxim,score_srgan,score_total
0,MendozaEscotaJairoAntonio,79.444444,79.444444,62.777778,62.777778
1,HeatherMaeKelley,85.714286,85.714286,85.714286,85.396825
2,GeorgeWright,79.047619,83.809524,83.809524,79.047619
3,RujaIgnatova,86.111111,91.666667,86.111111,91.666667
4,BirdalOsman,92.857143,92.857143,92.857143,78.571429
5,RaulGuzman,65.047619,72.190476,65.047619,60.285714
6,RobertMorales,75.238095,79.047619,79.047619,69.52381
7,LoriPaige,99.68254,100.0,100.0,98.412698
8,RaduanoMarco,100.0,83.333333,100.0,100.0
9,BonillasRivasJocelynn,80.666667,83.333333,90.0,90.0


In [None]:
print_means(res_tinyllava)

score_original: 82.84920634920636
score_maxim: 85.5
score_srgan: 83.23809523809524
score_total: 83.08730158730158


In [None]:
print_means(res_qwen)

score_original: 84.38095238095238
score_maxim: 85.13968253968255
score_srgan: 84.53650793650795
score_total: 81.56825396825397
