In [5]:
import os
import re
import math
import requests
from lxml import html
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from imutils.video import FileVideoStream


from sign_utils import *
import os
from tqdm import tqdm


## Crawl TID Sözlük

In [75]:
def groupById(vids: list):
    GROUPS = {}
    curr_id = ''
    for vid in vids:
        _,_,_id,_,fname = vid.split('/')
        if _id != curr_id:
            GROUPS[_id] = [vid]
            curr_id = _id
        else: GROUPS[curr_id].append(vid)
    return GROUPS

In [76]:
from urllib.request import urlopen
from tqdm import tqdm

CHARS = ['A', 'B', 'C', 'Ç', 'D', 'E', 'F', 'G', 'H', 'I', 'İ', 'J', 'K', 'L', 'M', 'N', 'O', 'Ö', 'P', 'R', 'S', 'Ş', 'T', 'U', 'Ü', 'V', 'W', 'Y', 'Z']

for char in tqdm(CHARS):
    
    print(f'At {char=}')
    
    page = str(1)
    BASE_URL = 'https://tidsozluk.aile.gov.tr'
    SEARCH_URL = f'https://tidsozluk.aile.gov.tr/tr/Alfabetik/Arama/{char}?p={page}'
    
    HTML = html.fromstring(requests.get(SEARCH_URL).content)
    PAGES = math.ceil(int(HTML.xpath('//div[contains(@id, "rezults_summ")]//b/text()')[0]) / 10)
    
    for page in range(1, PAGES+1):    
        
        print(f'At {page=}')
        
        if page > 1:
            HTML = html.fromstring(requests.get(f'https://tidsozluk.aile.gov.tr/tr/Alfabetik/Arama/{char}?p={page}').content)
    
        LEXICON = [res.xpath('//h3/text()') for res in HTML.xpath(ROW_XPATH)][0]
        
        VIDEOS = [ re.sub('0\.1','0.5', vid) for vid in [res.xpath('//source/@src') for res in HTML.xpath(ROW_XPATH)][0]]
    
        for name, vids in tqdm(list(zip(LEXICON,list(groupById(VIDEOS).values())))):
            
            for idx, vid in enumerate(vids):
                
                f = urlopen(BASE_URL+vid)
                with open(f'../data/corpus/{name.upper()}_{idx}.mp4', 'wb') as code:
                    code.write(f.read())

  0%|          | 0/29 [00:00<?, ?it/s]

At char='A'
At page=1





NameError: name 'ROW_XPATH' is not defined

## Infer Pose Estimation

In [None]:
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

INFO = {}

for v in tqdm(os.listdir('data/corpus/')[:10]):
    
    #camera = cv2.VideoCapture('data/corpus/'+ v)
    fvs = FileVideoStream('data/corpus/'+ v).start()
    
    fname = v[:-4]
        
    logs_left = None
    move_left = None
    logs_right = None
    move_right = None
    
    phonology_left = SignLanguagePhonology(hand='left')
    phonology_right = SignLanguagePhonology(hand='right')

    window_size = 5
    view_window = 100

    hand_relation = TwoHandedPhonology()
    
    frame = fvs.read()


    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        
        while fvs.more():

            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            results = holistic.process(image)

            logs_right = phonology_right.getJointInfo(
                results.right_hand_landmarks, results.pose_landmarks, [ 'LOCATION', 'ORIENTATION', 'FINGER_SELECTION', 'MOVEMENT'])
            logs_left = phonology_left.getJointInfo(
                results.left_hand_landmarks, results.pose_landmarks, [ 'LOCATION', 'ORIENTATION', 'FINGER_SELECTION', 'MOVEMENT'])

            move_left = phonology_left.getMovementInformation(
                    view_window, window_size, ['Center_Diff', 'Center_MovAVG_x', 'Center_MovAVG_y', 'Center_MovAVG_z',
                                                'INDEX_FINGER', 'MIDDLE_FINGER', 'RING_FINGER', 'PINKY', 'SWITCH', 'LSE_x', 'LSE_y'])
            move_right = phonology_right.getMovementInformation(
                    view_window, window_size, ['Center_Diff', 'Center_MovAVG_x', 'Center_MovAVG_y', 'Center_MovAVG_z',
                                                'INDEX_FINGER', 'MIDDLE_FINGER', 'RING_FINGER', 'PINKY', 'SWITCH', 'LSE_x', 'LSE_y'])

            #hand_relation.findTemporalAlignment(
            #        phonology_left, phonology_right, view_window, window_size, metrics)

            #ss, frame = camera.read()
            frame = fvs.read()
            

            
    INFO[fname] = { }
    
    INFO[fname]['left_log'] = phonology_left.INFO_MEMORY
    INFO[fname]['left_move'] = move_left
    INFO[fname]['right_log'] = phonology_right.INFO_MEMORY
    INFO[fname]['right_move'] = move_right

100%|██████████| 10/10 [00:32<00:00,  3.24s/it]


## Infer Pose Estimation

In [6]:
import pickle
import os
from collections import defaultdict
class LexicalClassification():
    
    def __init__(self):
        
        # TODO: EXPERIMENTS
        # - POSE + DTW
        # - PRIOR_FEATS + DTW
        # - PROPOSED_FEATS + DTW
        
        # EXPERIMENTS: DTW TYPES
        # - WEIGHTED DTW
                
        self.LEXICON = []
        self.VECTORS = []
        
        self.importLexicon(INFO_TYPE='feat', METRICS=None)
        
    def importLexicon(self, INFO_TYPE: str, METRICS: list[str]):
        
        file = os.listdir('data/vectors/feats/')[-1]
                
        ITEMS = pickle.loads(open('data/vectors/feats/'+file, 'rb').read())
        
        for label, INFO in tqdm(ITEMS.items()):
            
            self.LEXICON.append(label)
            self.VECTORS.append(self.processVectors(INFO, INFO_TYPE, METRICS))

    def processVectors(self, INFO, INFO_TYPE, METRICS):
       
       # SET INFORMATION:
       # - LOCATION: Signal to MAJOR LOCATIONS ( MAJOR LOCATION CONSTRAINT)
       # - ORIENTATION: Signal to 6 ORIENTATION (DIFF RELATIVE SCORE TO 3 PAIRS)
       # - FINGER_SELECTION: Signal to 5 FINGER APERTURES (TODO: SHOULD ADD SELECTION/CURVE FEATS) ( SELECTED FINGER CONSTRAINT)
       # - MOVEMENT: Signal to 2 MOVEMENT FEATS ( PATH: (0,1), APERATURE: (0,1) )
       
       
       VECTORS = self.generateVector(INFO['right_log'], 'right')
       VECTORS.update(self.generateVector(INFO['left_log'], 'left'))
       
       return VECTORS
       
    def generateVector(self, points, hand):
        
        FEAT_DICT = defaultdict(list)
        
        for point in points:
        
            for location, (_, proximity) in point['LOCATION'].items():
                
                FEAT_DICT[f"{location}_PROXIMITY_{hand.upper()}"].append(proximity)

            # FIXME: UNNECESSARY DICT
            for orient,rotation in zip(['PALM/BACK', 'TIPS/WRIST', 'URNAL/RADIAL' ],point['ORIENTATION']['ORIENTATION']):
                
                FEAT_DICT[f"{orient}_PROXIMITY_{hand.upper()}"].append(list(rotation)[-1])   
            
            for hand_dict in point['FINGER_SELECTION']:
                
                ff = 'FINGER' if 'FINGER' in hand_dict.keys() else 'finger'                 
                
                FEAT_DICT[f"{hand_dict[ff]}_ANGLE_{hand.upper()}"].append(hand_dict['ANGLE_1' if hand_dict[ff] != 'THUMB' else 'ANGLE_0'])
            
                FEAT_DICT[f"{hand_dict[ff]}_SELECTION_{hand.upper()}"].append(1 if hand_dict['is_selected'] else 0)
                
                FEAT_DICT[f"{hand_dict[ff]}_CURVED_{hand.upper()}"].append(1 if hand_dict['is_curved'] else 0)

            for feats in point['MOVEMENT']:
                            
                feat_name, val = list(feats.items())[0]
                
                FEAT_DICT[f"{feat_name}_MOVEMENT_{hand.upper()}"].append(val[-1])
                
        
        FEAT_DICT = {
            FEATURE: pd.Series(VALS)  for FEATURE, VALS in FEAT_DICT.items()
        }
        
        return FEAT_DICT

CLS = LexicalClassification()

100%|██████████| 2500/2500 [00:08<00:00, 288.27it/s]


In [7]:
KEY_SET = set(['CHEST_PROXIMITY_RIGHT',
 'MOUTH_PROXIMITY_RIGHT',
 'NOSE_PROXIMITY_RIGHT',
 'EAR_PROXIMITY_RIGHT',
 'EYE_PROXIMITY_RIGHT',
 'PALM/BACK_PROXIMITY_RIGHT',
 'TIPS/WRIST_PROXIMITY_RIGHT',
 'URNAL/RADIAL_PROXIMITY_RIGHT',
 'THUMB_ANGLE_RIGHT',
 'THUMB_SELECTION_RIGHT',
 'THUMB_CURVED_RIGHT',
 'INDEX_FINGER_ANGLE_RIGHT',
 'INDEX_FINGER_SELECTION_RIGHT',
 'INDEX_FINGER_CURVED_RIGHT',
 'MIDDLE_FINGER_ANGLE_RIGHT',
 'MIDDLE_FINGER_SELECTION_RIGHT',
 'MIDDLE_FINGER_CURVED_RIGHT',
 'RING_FINGER_ANGLE_RIGHT',
 'RING_FINGER_SELECTION_RIGHT',
 'RING_FINGER_CURVED_RIGHT',
 'PINKY_ANGLE_RIGHT',
 'PINKY_SELECTION_RIGHT',
 'PINKY_CURVED_RIGHT',
 'PATH_MOVEMENT_RIGHT',
 'APERTURE_MOVEMENT_RIGHT',
 'PATH_ANGLE_MOVEMENT_RIGHT',
 'CHEST_PROXIMITY_LEFT',
 'MOUTH_PROXIMITY_LEFT',
 'NOSE_PROXIMITY_LEFT',
 'EAR_PROXIMITY_LEFT',
 'EYE_PROXIMITY_LEFT',
 'PALM/BACK_PROXIMITY_LEFT',
 'TIPS/WRIST_PROXIMITY_LEFT',
 'URNAL/RADIAL_PROXIMITY_LEFT',
 'THUMB_ANGLE_LEFT',
 'THUMB_SELECTION_LEFT',
 'THUMB_CURVED_LEFT',
 'INDEX_FINGER_ANGLE_LEFT',
 'INDEX_FINGER_SELECTION_LEFT',
 'INDEX_FINGER_CURVED_LEFT',
 'MIDDLE_FINGER_ANGLE_LEFT',
 'MIDDLE_FINGER_SELECTION_LEFT',
 'MIDDLE_FINGER_CURVED_LEFT',
 'RING_FINGER_ANGLE_LEFT',
 'RING_FINGER_SELECTION_LEFT',
 'RING_FINGER_CURVED_LEFT',
 'PINKY_ANGLE_LEFT',
 'PINKY_SELECTION_LEFT',
 'PINKY_CURVED_LEFT',
 'PATH_MOVEMENT_LEFT',
 'APERTURE_MOVEMENT_LEFT',
 'PATH_ANGLE_MOVEMENT_LEFT'])

In [12]:
lexicon_to_idx = dict([(j,i) for i,j in enumerate(CLS.LEXICON)])

In [14]:
dict(sorted(lexicon_to_idx.items()))

{'ABARTMAK_0': 336,
 'ABARTMAK_1': 1921,
 'ABI, AĞABEY_0': 2345,
 'ABLA_1': 1891,
 'ABLA_2': 1626,
 'ACABA_0': 232,
 'ACEMI _2': 1599,
 'ACI ÇEKMEK _0': 131,
 'ACI ÇEKMEK _1': 454,
 'ACI ÇEKMEK _2': 1649,
 'ACIL_0': 2474,
 'ACIMAK_0': 638,
 'ACIMAK_1': 877,
 'ACIMAK_2': 754,
 'ACI_0': 923,
 'AD, İSIM_0': 1225,
 'ADA _0': 2224,
 'ADA _1': 1383,
 'ADALET_0': 2156,
 'ADALET_1': 878,
 'ADAM_1': 670,
 'ADAM_2': 1924,
 'ADAY_0': 1515,
 'ADET_0': 1820,
 'ADRES _0': 1577,
 'ADRES _1': 219,
 'ADRES _2': 1028,
 'AFERIN _0': 997,
 'AFFETMEK_0': 109,
 'AFFETMEK_1': 1314,
 'AFIŞ_0': 2281,
 'AIT_0': 940,
 'AIT_2': 1007,
 'AKCIĞER_0': 326,
 'AKICI_0': 1485,
 'AKICI_1': 683,
 'AKILLI, ZEKI_0': 934,
 'AKILSIZ_0': 1119,
 'AKILSIZ_1': 2395,
 'AKIL_0': 2053,
 'AKITMAK _0': 1138,
 'AKLA GELMEK _0': 1731,
 'AKLA GELMEK _1': 379,
 'AKLINA ALMAK_0': 15,
 'AKMAK_0': 1945,
 'AKMAK_1': 797,
 'AKTARMAK_0': 1063,
 'AKŞAM_0': 2107,
 'ALAKA_0': 1586,
 'ALAKA_1': 127,
 'ALAKA_2': 907,
 'ALAN_0': 385,
 'ALAY_0': 368,


In [19]:
from fastdtw import fastdtw
from scipy.spatial.distance import euclidean

SIMILARITY = {}

IDX = 1225

zip_lex = list(zip(CLS.LEXICON,CLS.VECTORS))

for word, candidate in tqdm(zip_lex):

    KEYS = list(KEY_SET.intersection(set(list(candidate.keys())).intersection(CLS.VECTORS[IDX].keys())))
    
    # v0
    BASE = CLS.VECTORS[IDX]
    sim = 0
    kk = 0
    for dim in KEYS:
        #sim += dtw(BASE[dim], candidate[dim])
        try:
            v1, v2 = np.array([BASE[dim].to_list()]), np.array([candidate[dim].to_list()])
            min_len = min([v1.shape[1],v2.shape[1]])
            
            pad = math.floor(abs(v1.shape[1]-v2.shape[1]) / 2)
            
            sim += fastdtw(v1[:,:min_len], v2[:,:min_len], dist=euclidean)[0]
            kk += 1
        except:
            pass


    if kk: SIMILARITY[word] = sim / kk
    
print(CLS.LEXICON[IDX])
vla = pd.DataFrame(sorted(SIMILARITY.items(), key=lambda x:x[1], reverse=False))

return vla

100%|██████████| 2500/2500 [00:02<00:00, 1123.63it/s]

AD, İSIM_0





SyntaxError: 'return' outside function (2132087593.py, line 37)

In [20]:
print(CLS.LEXICON[IDX])
vla = pd.DataFrame(sorted(SIMILARITY.items(), key=lambda x:x[1], reverse=False))
vla.head(50)

AD, İSIM_0


Unnamed: 0,0,1
0,"AD, İSIM_0",0.0
1,BLUZ_1,30.10486
2,ERKEK_1,33.907835
3,SAĞLAM_1,37.196351
4,ANAOKULU_0,39.698024
5,KORKMAK _0,39.821674
6,TITREMEK_1,40.133052
7,KAZANMAK_1,40.292848
8,ALINMAK_1,41.128421
9,SARI_1,43.41151


In [None]:
obj_1[:,0]

array([0.6155439669161669, 0.5474618526299653, 0.4762488430628112,
       0.4068908364287412, 0.35035528325339715, 0.31001180108910603,
       0.28434043784723223, 0.2747569869370229, 0.28035554705269106,
       0.3203169361684198, 0.3494530959468264, 0.37306816584913594,
       0.3826706593965666, 0.38287590105842106, 0.37247900335771095,
       0.35055987205028266, 0.3190332655227857, 0.2847169720661499,
       0.2593015129932318, 0.2560790688576122, 0.2710126240638043,
       0.32011934611499954, 0.36028722126861445, 0.39191978333978855,
       0.4145062600298068, 0.4242517735084594, 0.4455645599477763,
       0.4925675761147404, 0.5648264618828559, 0.6195748872629941],
      dtype=object)

## Crawl TID Sözlük

In [None]:
(obj_1.shape, obj_2[:-2].shape)

((30, 52), (30, 52))