In [2]:
import pandas as pd
from collections import deque
from tqdm import tqdm
import pandas as pd
import cv2
from moviepy.editor import *
from moviepy.config import change_settings
import os
change_settings({"IMAGEMAGICK_BINARY": r"C:\\Program Files\\ImageMagick-7.1.1-Q16-HDRI\\magick.exe"})

In [3]:

# number_of_skeletons = 1000
number_of_skeletons = None
number_of_selected_sentences = 10
skeleton_map_path = "../extractedbtvdataset/skeleton_sentence_map.csv"
sentence_path = "../extractedbtvdataset/sentences.csv"

class SelectedColumns:
    def __init__(self):
        selected_points_for_sign = [{
            'limb': 'pose',
            'points': range(8)
        },{
            'limb': 'face',
            'points': range(48, 68, 1)
        },{
            'limb': 'hand_right',
            'points': range(21)
        },{
            'limb': 'hand_left',
            'points': range(21)
        }]

        selected_columns = []
        for limb in selected_points_for_sign:
            limb_name = limb['limb']
            for point in limb['points']:
                selected_columns.append(f"{limb_name}_{point}_x")
                selected_columns.append(f"{limb_name}_{point}_y")
        
        self.selected_columns = selected_columns + ['sentence_id', 'frame_index_in_sentence']
        self.selected_pose_points = selected_columns
    
    def check_column(self, column_name):
        if not column_name in self.selected_columns:
            raise Exception(f"column {column_name} in dataframe")
    
    def keep_columns(self, dataframe):
        return dataframe[self.selected_columns]
    
    def get_selected_columns(self):
        return self.selected_columns
    
    def get_selected_pose_points(self):
        return self.selected_pose_points

selected_column = SelectedColumns()     

class PoseGraph:
    def __init__(self) -> None:
        self.graph = {
            'pose_0': ['face_48','face_54','pose_1'],
            'pose_1':['pose_2', 'pose_5'],
            'pose_2':['pose_3'],
            'pose_3':['pose_4'],
            'pose_4':['hand_right_0'],
            'pose_5':['pose_6'],
            'pose_6':['pose_7'],
            'pose_7':['hand_left_0'],
            'hand_left_0': [f"hand_left_{i}" for i in [1,5,9,13,17]],
            'hand_left_1': ['hand_left_2'],
            'hand_left_2': ['hand_left_3'],
            'hand_left_3': ['hand_left_4'],
            'hand_left_4': [],
            'hand_left_5': ['hand_left_6'],
            'hand_left_6': ['hand_left_7'],
            'hand_left_7': ['hand_left_8'],
            'hand_left_8': [],
            'hand_left_9': ['hand_left_10'],
            'hand_left_10': ['hand_left_11'],
            'hand_left_11': ['hand_left_12'],
            'hand_left_12': [],
            'hand_left_13': ['hand_left_14'],
            'hand_left_14': ['hand_left_15'],
            'hand_left_15': ['hand_left_16'],
            'hand_left_16': [],
            'hand_left_17': ['hand_left_18'],
            'hand_left_18': ['hand_left_19'],
            'hand_left_19': ['hand_left_20'],
            'hand_left_20': [],
            'hand_right_0': [f"hand_right_{i}" for i in [1,5,9,13,17]],
            'hand_right_1': ['hand_right_2'],
            'hand_right_2': ['hand_right_3'],
            'hand_right_3': ['hand_right_4'],
            'hand_right_4': [],
            'hand_right_5': ['hand_right_6'],
            'hand_right_6': ['hand_right_7'],
            'hand_right_7': ['hand_right_8'],
            'hand_right_8': [],
            'hand_right_9': ['hand_right_10'],
            'hand_right_10': ['hand_right_11'],
            'hand_right_11': ['hand_right_12'],
            'hand_right_12': [],
            'hand_right_13': ['hand_right_14'],
            'hand_right_14': ['hand_right_15'],
            'hand_right_15': ['hand_right_16'],
            'hand_right_16': [],
            'hand_right_17': ['hand_right_18'],
            'hand_right_18': ['hand_right_19'],
            'hand_right_19': ['hand_right_20'],
            'hand_right_20': [],
            'face_48':['face_49','face_60','face_59'],
            'face_49':['face_50'],
            'face_50':['face_51'],
            'face_51':[],
            'face_52':[],
            'face_53':['face_52'],
            'face_54':['face_53','face_64','face_55'],
            'face_55':['face_56'],
            'face_56':[],
            'face_57':[],
            'face_58':['face_57'],
            'face_59':['face_58'],
            'face_60':['face_61','face_67'],
            'face_61':['face_62'],
            'face_62':[],
            'face_63':[],
            'face_64':['face_63','face_65'],
            'face_65':[],
            'face_66':[],
            'face_67':['face_66']
        }

        self.parents = {}
        self.BFS()
    def BFS(self):
        self.bfs_traversal = []
        visited = set()
        queue = deque(['pose_0'])
        while queue:
            vertex = queue.popleft()
            self.bfs_traversal.append(vertex)
            if vertex not in visited:
                visited.add(vertex)
                for neighbor in self.graph[vertex]:
                    if neighbor not in visited:
                        self.parents[neighbor] = vertex
                        queue.extend([neighbor])

    def get_bfs_traversal(self):
        return self.bfs_traversal
    
    def get_parent_of_point(self, keypoint):
        return self.parents[keypoint]
    
graph = PoseGraph()


class SentenceData:
    def __init__(self, id, dataframe, start_time, end_time, video_name, sentence) -> None:
        self.id = id
        self.dataframe = dataframe.set_index('frame_index_in_sentence')
        self.start_time = start_time
        self.end_time = end_time
        self.video_name = video_name
        self.sentence = sentence

    def get(self, index, point_name):
        selected_column.check_column(point_name)
        return self.dataframe.loc[index,point_name]
    
    def set(self, index, point_name, value) -> None:
        selected_column.check_column(point_name)
        self.dataframe.loc[index,point_name] = value

    def get_row_count(self):
        return self.dataframe.shape[0]
    
    def print(self):
        print(self.dataframe.head())
        print(f"columns: {self.dataframe.columns}")
        print(f"row count: {self.dataframe.shape[0]}")

    def estimate_point_from_previous_time(self, time, column_name):
        if time == 0 or time >= self.get_row_count():
            raise Exception("ERROR")
        x_or_y = column_name.split("_")[-1]
        point_name = "_".join(column_name.split("_")[:-1])
        parent_point = graph.get_parent_of_point(point_name)
        parent_of_current_time= self.get(time, point_name=parent_point+f"_{x_or_y}")
        parent_of_previous_time = self.get(time-1, point_name=parent_point+f"_{x_or_y}")
        point_of_previous_time = self.get(time-1, point_name=column_name)
        if parent_of_current_time == 0 or parent_of_previous_time == 0 or point_of_previous_time == 0:
            return 0
        
        return round(point_of_previous_time + (parent_of_current_time - parent_of_previous_time),3)
    
    def estimate_point_from_next_time(self, time, column_name):
        if time >= self.get_row_count() - 1:
            raise Exception("ERROR")
        x_or_y = column_name.split("_")[-1]
        point_name = "_".join(column_name.split("_")[:-1])
        parent_point = graph.get_parent_of_point(point_name)
        parent_of_current_time= self.get(time, point_name=parent_point+f"_{x_or_y}")
        parent_of_next_time = self.get(time+1, point_name=parent_point+f"_{x_or_y}")
        point_of_next_time = self.get(time+1, point_name=column_name)
        if parent_of_current_time == 0 or parent_of_next_time == 0 or point_of_next_time == 0:
            return 0
        
        return round(point_of_next_time + (parent_of_current_time - parent_of_next_time),3)
    
    def get_missing_value_count(self):
        return (self.dataframe == 0).sum().sum()
    
    def replace_missing_value_from_previous_time(self):
        for i in range(1, self.get_row_count()):
            for point_name in graph.get_bfs_traversal():
                column_name_x = f"{point_name}_x"
                column_name_y = f"{point_name}_y"
                if self.get(i, column_name_x) == 0:
                    self.set(i, column_name_x, self.estimate_point_from_previous_time(i, column_name=column_name_x))
                if self.get(i, column_name_y) == 0:
                    self.set(i, column_name_y, self.estimate_point_from_previous_time(i, column_name=column_name_y))

    def replace_missing_value_from_next_time(self):
        for i in range(self.get_row_count()-2, -1, -1):
            for point_name in graph.get_bfs_traversal():
                column_name_x = f"{point_name}_x"
                column_name_y = f"{point_name}_y"
                if self.get(i, column_name_x) == 0:
                    self.set(i, column_name_x, self.estimate_point_from_next_time(i, column_name=column_name_x))
                if self.get(i, column_name_y) == 0:
                    self.set(i, column_name_y, self.estimate_point_from_next_time(i, column_name=column_name_y))

    def render_skeleton_sentence_map():
        

class SignLanguageDataset:
    def __init__(self) -> None:
        print("reading csv from file...")
        self.dataframe = pd.read_csv(skeleton_map_path, nrows = number_of_skeletons, index_col=0)
        sentence_ids = self.dataframe['sentence_id'].unique()
        print("splitting unique sentences...")
        self.sentences = []

        self.sentence_dataframe = pd.read_csv(sentence_path)

        for id in tqdm(sentence_ids[:number_of_selected_sentences]):
            sentence_dataframe = self.dataframe[self.dataframe['sentence_id'] == id]
            sentence_info = self.get_sentence_info_by_id(id)
            start_time = float(sentence_info['start_time'])
            end_time = float(sentence_info['end_time'])
            sentence = sentence_info['sentence'].iloc[0]
            video_name = sentence_info['video_name'].iloc[0]
            self.sentences.append(SentenceData(id, sentence_dataframe, start_time=start_time, end_time=end_time, video_name=video_name, sentence=sentence))

        self.column_names = self.dataframe.columns
    
    def get(self, index):
        return self.sentences[index]
    
    def get_sentence_info_by_id(self, id):
        return self.sentence_dataframe[self.sentence_dataframe["sentence_id"] == id].reset_index()
    
    def get_sentence_count(self):
        return len(self.sentences)

    def get_column_names(self):
        return self.column_names
    
    def replace_all_missing_data_using_previous_time(self):
        for i in tqdm(range(self.get_sentence_count())):
            self.sentences[i].replace_missing_value_from_previous_time()
    
    def replace_all_missing_data_using_next_time(self):
        for i in tqdm(range(self.get_sentence_count())):
            self.sentences[i].replace_missing_value_from_next_time()


    def get_total_missing_value(self):
        total_missing = 0
        for i in tqdm(range(self.get_sentence_count())):
            total_missing = total_missing + self.sentences[i].get_missing_value_count()

        return total_missing

    def save_csv(self):
        combined_dataframe = pd.DataFrame()
        for i in tqdm(range(self.get_sentence_count())):
            sentence_dataframe = self.sentences[i].dataframe.reset_index()
            combined_dataframe = pd.concat([combined_dataframe, sentence_dataframe], axis=0)
        combined_dataframe.reset_index(inplace=True)
        combined_dataframe.drop(['index'], axis=1, inplace=True)
        combined_dataframe.to_csv('implemented_algorithm.csv')
        combined_dataframe.head(100).to_csv('implemented_algorithm_snapshot.csv')


In [4]:
sign_data = SignLanguageDataset()

reading csv from file...
splitting unique sentences...


100%|██████████| 10/10 [00:00<00:00, 12.87it/s]


In [5]:
sign_data.get_total_missing_value()

100%|██████████| 10/10 [00:00<00:00, 588.16it/s]


27848

In [6]:
sign_data.replace_all_missing_data_using_next_time()

100%|██████████| 10/10 [00:03<00:00,  3.05it/s]


In [7]:

sign_data.replace_all_missing_data_using_previous_time()

100%|██████████| 10/10 [00:01<00:00,  5.54it/s]


In [8]:
sign_data.get_total_missing_value()

100%|██████████| 10/10 [00:00<00:00, 588.15it/s]


18802

In [9]:
print(sign_data.get(4).sentence)
print(sign_data.get(4).start_time)
print(sign_data.get(4).end_time)
print(sign_data.get(4).video_name)

print(sign_data.get(5).sentence)
print(sign_data.get(5).start_time)
print(sign_data.get(5).end_time)
print(sign_data.get(5).video_name)

এই ছিল এখনকার আয়োজনে
823.0697
824.9697
_u6jEqimZdc
বেসরকারি বিমান পরিবহন ও পর্যটন মন্ত্রণালয় সম্পর্কিত সংসদীয় স্থায়ী কমিটির সভাপতি আ ম উবায়দুল মোকতাদির চৌধুরী বিজয়ীদের মাঝে পুরস্কার বিতরণ করেন
808.9453
819.8453
_u6jEqimZdc


In [10]:
sign_data.save_csv()

100%|██████████| 10/10 [00:00<00:00, 475.63it/s]
