In [7]:
from process_CSL import * 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os 

## Representation density plotting
1. Load the file labels 
  - Check the number of glosses and compare the number of frames 
  - divide the frames accordingly and label them according to that gloss in a dictionary

2. Load the embeddings and perform PCA or t-SNE 
  - Can choose to do so in samples sizes and then plot them accordingly

In [2]:
'''
Loading the labels
'''

def read_CSL_annotations(CSL_annot_path):
    with open(CSL_annot_path, 'rb') as f:
        data = pickle.load(f)
    return data

## Reading in the labelled annotations
train_labels = read_CSL_annotations("../../CSL-Daily/sentence_label/processed/labels_train.pkl") 
dev_labels = read_CSL_annotations("../../CSL-Daily/sentence_label/processed/labels_dev.pkl")
test_labels = read_CSL_annotations("../../CSL-Daily/sentence_label/processed/labels_test.pkl")


In [3]:
train_labels

{'info': [{'name': 'S000000_P0000_T00', 'translation': '你们好！', 'length': 52},
  {'name': 'S000000_P0004_T00', 'translation': '你们好！', 'length': 47},
  {'name': 'S000000_P0008_T00', 'translation': '你们好！', 'length': 58},
  {'name': 'S000001_P0000_T00', 'translation': '对不起！', 'length': 37},
  {'name': 'S000001_P0004_T00', 'translation': '对不起！', 'length': 33},
  {'name': 'S000001_P0008_T00', 'translation': '对不起！', 'length': 45},
  {'name': 'S000002_P0000_T00', 'translation': '没关系！', 'length': 29},
  {'name': 'S000002_P0004_T00', 'translation': '没关系！', 'length': 35},
  {'name': 'S000002_P0008_T00', 'translation': '没关系！', 'length': 49},
  {'name': 'S000003_P0000_T00', 'translation': '谢谢！', 'length': 30},
  {'name': 'S000003_P0004_T00', 'translation': '谢谢！', 'length': 32},
  {'name': 'S000003_P0008_T00', 'translation': '谢谢！', 'length': 35},
  {'name': 'S000004_P0000_T00', 'translation': '不客气！', 'length': 51},
  {'name': 'S000004_P0004_T00', 'translation': '不客气！', 'length': 41},
  {'name': 'S00

In [4]:
dev_labels

{'info': [{'name': 'S000020_P0000_T00', 'translation': '他今年四岁。', 'length': 54},
  {'name': 'S000020_P0008_T00', 'translation': '他今年四岁。', 'length': 90},
  {'name': 'S000040_P0000_T00', 'translation': '今天星期几？', 'length': 41},
  {'name': 'S000040_P0004_T00', 'translation': '今天星期几？', 'length': 42},
  {'name': 'S000054_P0000_T00', 'translation': '今天我想吃面条。', 'length': 56},
  {'name': 'S000153_P0004_T00', 'translation': '你和小张什么时候认识的？', 'length': 55},
  {'name': 'S000153_P0008_T00', 'translation': '你和小张什么时候认识的？', 'length': 109},
  {'name': 'S000185_P0000_T00', 'translation': '我要去超市买椅子，你去吗？', 'length': 91},
  {'name': 'S000195_P0000_T00', 'translation': '他们下午要做什么？', 'length': 49},
  {'name': 'S000195_P0008_T00', 'translation': '他们下午要做什么？', 'length': 84},
  {'name': 'S000196_P0000_T00', 'translation': '他们想什么时候去买椅子？', 'length': 79},
  {'name': 'S000196_P0004_T00', 'translation': '他们想什么时候去买椅子？', 'length': 90},
  {'name': 'S000201_P0004_T00', 'translation': '我每天六点起床。', 'length': 134},
  {'name': 'S

In [6]:
test_labels

{'info': [{'name': 'S000020_P0004_T00', 'translation': '他今年四岁。', 'length': 52},
  {'name': 'S000040_P0008_T00', 'translation': '今天星期几？', 'length': 66},
  {'name': 'S000054_P0008_T00', 'translation': '今天我想吃面条。', 'length': 93},
  {'name': 'S000153_P0000_T00', 'translation': '你和小张什么时候认识的？', 'length': 76},
  {'name': 'S000185_P0004_T00', 'translation': '我要去超市买椅子，你去吗？', 'length': 108},
  {'name': 'S000185_P0008_T00', 'translation': '我要去超市买椅子，你去吗？', 'length': 161},
  {'name': 'S000195_P0004_T00', 'translation': '他们下午要做什么？', 'length': 51},
  {'name': 'S000196_P0008_T00', 'translation': '他们想什么时候去买椅子？', 'length': 124},
  {'name': 'S000201_P0000_T00', 'translation': '我每天六点起床。', 'length': 79},
  {'name': 'S000213_P0004_T00', 'translation': '他每天回来都很累。', 'length': 71},
  {'name': 'S000213_P0008_T00', 'translation': '他每天回来都很累。', 'length': 115},
  {'name': 'S000218_P0004_T00', 'translation': '这块手表是你的吗？', 'length': 31},
  {'name': 'S000218_P0008_T00', 'translation': '这块手表是你的吗？', 'length': 60},
  {'nam

In [None]:
'''
Function to gather all the tensor .pt files from a specific folder name
'''
def gather_vid_emb(name, phase, img_dir = "../../CSL-Daily/sentence/frames_512x512"): 
  vid_folder = os.path.join(img_dir, f"{phase}/{name}")
  print("Getting video frames from ", vid_folder)

  #list all the files in the folder
  # Keep those who are only .pt and sort them
  # open all the files and keep the tensors in a list



  # return list of tensors
