In [6]:
# import numpy as np
# import pickle

# def find_medians(values):
#     # Split into negative and positive values
#     negative_values = values[values < 0]
#     positive_values = values[values > 0]

#     # Calculate the left median (for negative values)
#     if len(negative_values) > 0:
#         left_median = np.median(negative_values)
#     else:
#         left_median = None  # No negative values, set to None

#     # Calculate the right median (for positive values)
#     if len(positive_values) > 0:
#         right_median = np.median(positive_values)
#     else:
#         right_median = None  # No positive values, set to None

#     return left_median, right_median

# def classify(value, left_median, right_median):
#     # Apply the classification rule based on left and right medians
#     if left_median is not None and value <= left_median:
#         return -1
#     elif right_median is not None and value >= right_median:
#         return 1
#     else:
#         return 0

# def apply_classification(frames):
#     all_x = []
#     all_y = []

#     # Collect all x and y values
#     for i in range(frames.shape[0]):
#         x_values = frames[i][:, 0]
#         y_values = frames[i][:, 1]

#         all_x.extend(x_values)
#         all_y.extend(y_values)

#     # Convert to numpy arrays
#     all_x = np.array(all_x)
#     all_y = np.array(all_y)

#     # Find left and right medians for x and y
#     x_left_median, x_right_median = find_medians(all_x)
#     y_left_median, y_right_median = find_medians(all_y)

#     print(f"x_left_median: {x_left_median}, x_right_median: {x_right_median}")
#     print(f"y_left_median: {y_left_median}, y_right_median: {y_right_median}")

#     # Classify all keypoints in the frames based on the found medians
#     classified_frames = []

#     for i in range(frames.shape[0]):
#         classified_keypoints = []
#         for keypoint in frames[i]:
#             x, y, c = keypoint

#             # Classify x and y
#             classified_x = classify(x, x_left_median, x_right_median)
#             classified_y = classify(y, y_left_median, y_right_median)

#             # Construct new classified keypoint as (classified_x, classified_y, c)
#             classified_keypoints.append([classified_x, classified_y, c])

#         classified_frames.append(np.array(classified_keypoints))

#     return np.array(classified_frames)

# def process_videos(data):
#     doppler_data_classified = {}

#     # Process each video separately
#     for video, frames in data.items():
#         keypoints = frames['keypoints']

#         # Apply classification for this video's frames
#         classified_keypoints = apply_classification(keypoints)

#         doppler_data_classified[video] = {'keypoints': classified_keypoints}

#         # Print the first frame's classified keypoints for debugging
#         print(f"Classified keypoints for video {video} (first frame):")
#         print(doppler_data_classified[video]['keypoints'][0][:5])  # Print the first 5 keypoints of the first frame

#     return doppler_data_classified

# def read_pkl(path):
#     """Reads a pickle file from the given path."""
#     with open(path, 'rb') as f:
#         return pickle.load(f)

# # Read the original data
# path = '/nas/Dataset/Phoenix/phoenix-2014-doppler-normalized.pkl'
# data = read_pkl(path)

# # Apply classification and get the processed dataset
# vectorized = process_videos(data)

# # Save the classified data back to a pickle
# vector_path = '/nas/Dataset/Phoenix/phoenix-2014-doppler-vectorized.pkl'
# with open(vector_path, 'wb') as f:
#     pickle.dump(vectorized, f)

# print("Vectorized dataset created and saved.")

In [4]:
import numpy as np
import pickle

def find_medians(values):
    # Split into negative and positive values
    negative_values = values[values < 0]
    positive_values = values[values > 0]

    # Calculate the left median (for negative values)
    if len(negative_values) > 0:
        left_median = np.median(negative_values)
    else:
        left_median = None  # No negative values, set to None

    # Calculate the right median (for positive values)
    if len(positive_values) > 0:
        right_median = np.median(positive_values)
    else:
        right_median = None  # No positive values, set to None

    return left_median, right_median

def classify(value, left_median, right_median):
    # Apply the classification rule based on left and right medians
    if left_median is not None and value <= left_median:
        return -1
    elif right_median is not None and value >= right_median:
        return 1
    else:
        return 0

def apply_classification(frames):
    all_x = []
    all_y = []

    # Collect all x and y values
    for i in range(frames.shape[0]):
        x_values = frames[i][:, 0]
        y_values = frames[i][:, 1]

        all_x.extend(x_values)
        all_y.extend(y_values)

    # Convert to numpy arrays
    all_x = np.array(all_x)
    all_y = np.array(all_y)

    # Find left and right medians for x and y
    x_left_median, x_right_median = find_medians(all_x)
    y_left_median, y_right_median = find_medians(all_y)

    print(f"x_left_median: {x_left_median}, x_right_median: {x_right_median}")
    print(f"y_left_median: {y_left_median}, y_right_median: {y_right_median}")

    # Classify all keypoints in the frames based on the found medians
    classified_frames = []

    for i in range(frames.shape[0]):
        classified_keypoints = []
        for keypoint in frames[i]:
            x, y, c = keypoint

            # Classify x and y
            classified_x = classify(x, x_left_median, x_right_median)
            classified_y = classify(y, y_left_median, y_right_median)

            # Construct new classified keypoint as (classified_x, classified_y, c)
            classified_keypoints.append([classified_x, classified_y, c])

        classified_frames.append(np.array(classified_keypoints, dtype=np.int8))  # Use int8 to reduce memory

    return np.array(classified_frames)
def process_videos(data):
    doppler_data_classified = {}

    # Process each video separately
    for video, frames in data.items():
        keypoints = frames['keypoints']

        # Apply classification for this video's frames
        classified_keypoints = apply_classification(keypoints)

        doppler_data_classified[video] = {'keypoints': classified_keypoints}

        # Print the first frame's classified keypoints for debugging
        print(f"Classified keypoints for video {video} (first frame):")
        print(doppler_data_classified[video]['keypoints'][0][:5])  # Print the first 5 keypoints of the first frame

    return doppler_data_classified

def read_pkl(path):
    """Reads a pickle file from the given path."""
    with open(path, 'rb') as f:
        return pickle.load(f)

# Read the original data
path = '/nas/Dataset/Phoenix/phoenix-2014-doppler-normalized.pkl'
data = read_pkl(path)

# Apply classification and get the processed dataset
vectorized = process_videos(data)

# Save the classified data back to a pickle
vector_path = '/nas/Dataset/Phoenix/phoenix-2014-doppler-vectorized1.pkl'
with open(vector_path, 'wb') as f:
    pickle.dump(vectorized, f)

print("Vectorized dataset created and saved.")

x_left_median: -0.01708984375, x_right_median: 0.009765625
y_left_median: -0.02587890625, y_right_median: 0.0263671875
Classified keypoints for video fullFrame-210x260px/train/01April_2010_Thursday_heute_default-0/1/01April_2010_Thursday_heute (first frame):
[[-1  0  0]
 [-1  0  0]
 [-1  0  0]
 [-1  0  0]
 [-1  0  0]]
x_left_median: -0.015625, x_right_median: 0.0244140625
y_left_median: -0.02099609375, y_right_median: 0.0234375
Classified keypoints for video fullFrame-210x260px/train/01April_2010_Thursday_heute_default-2/1/01April_2010_Thursday_heute (first frame):
[[0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]]
x_left_median: -0.111328125, x_right_median: 0.0595703125
y_left_median: -0.032470703125, y_right_median: 0.0546875
Classified keypoints for video fullFrame-210x260px/train/01April_2010_Thursday_heute_default-3/1/01April_2010_Thursday_heute (first frame):
[[0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]]
x_left_median: -0.0322265625, x_right_median: 0.0341796875
y_left_median: -0.006

In [5]:
import pickle 
def read_pkl(path):
    with open(path, 'rb') as f:
        data1 = pickle.load(f)
    return data1
path = '/nas/Dataset/Phoenix/phoenix-2014-doppler-vectorized1.pkl'
norm_data= read_pkl(path)
norm_data

{'fullFrame-210x260px/train/01April_2010_Thursday_heute_default-0/1/01April_2010_Thursday_heute': {'keypoints': array([[[-1,  0,  0],
          [-1,  0,  0],
          [-1,  0,  0],
          ...,
          [-1,  0,  0],
          [-1,  0,  0],
          [-1,  0,  0]],
  
         [[-1,  0,  0],
          [-1,  0,  0],
          [-1,  0,  0],
          ...,
          [ 0,  0,  0],
          [-1,  0,  0],
          [-1,  0,  0]],
  
         [[-1,  0,  0],
          [-1,  0,  0],
          [-1,  0,  0],
          ...,
          [-1, -1,  0],
          [-1,  0,  0],
          [-1,  0,  0]],
  
         ...,
  
         [[-1,  1,  0],
          [-1,  1,  0],
          [-1,  0,  0],
          ...,
          [ 0,  1,  0],
          [ 0,  1,  0],
          [ 0,  1,  0]],
  
         [[-1,  0,  0],
          [-1,  1,  0],
          [-1,  0,  0],
          ...,
          [ 0,  1,  0],
          [ 0,  1,  0],
          [ 0,  1,  0]],
  
         [[-1,  0,  0],
          [-1,  0,  0],
          