In [3]:
import os
import re
import cv2
import numpy as np
import pandas as pd
from numpy.linalg import norm
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Load VGG16 model (no top layer, just convolutional base)
vgg_model = VGG16(weights='imagenet', include_top=False)

# Extract numeric part from filenames like scene_23.jpg
def extract_frame_number(filename):
    match = re.search(r'(\d+)', filename)
    return int(match.group(1)) if match else -1

# CNN image preprocessing
def preprocess_image(image_path):
    img = cv2.imread(image_path)
    if img is None:
        raise Exception(f"Cannot read image: {image_path}")
    img = cv2.resize(img, (224, 224))
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    return img

# CNN distance between two images
def compute_cnn_distance(img_path1, img_path2):
    img1 = preprocess_image(img_path1)
    img2 = preprocess_image(img_path2)
    feat1 = vgg_model.predict(img1).flatten()
    feat2 = vgg_model.predict(img2).flatten()
    return norm(feat1 - feat2)

# Modified grouping function using CNN
def group_frames_by_cnn_diff(input_folder, output_folder, threshold):
    os.makedirs(output_folder, exist_ok=True)

    # Sorted image files by number
    image_files = sorted([
        os.path.join(input_folder, f)
        for f in os.listdir(input_folder)
        if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp'))
    ], key=lambda x: extract_frame_number(os.path.basename(x)))

    if len(image_files) < 2:
        print("Not enough images to compute difference.")
        return

    subfolder_idx = 1
    subfolder_path = os.path.join(output_folder, f"sub{subfolder_idx}")
    os.makedirs(subfolder_path, exist_ok=True)

    diff_log = []
    prev_img_path = image_files[0]
    cv2.imwrite(os.path.join(subfolder_path, os.path.basename(prev_img_path)), cv2.imread(prev_img_path))

    for i in range(1, len(image_files)):
        curr_img_path = image_files[i]
        distance = compute_cnn_distance(prev_img_path, curr_img_path)

        diff_log.append({
            'frame1': os.path.basename(prev_img_path),
            'frame2': os.path.basename(curr_img_path),
            'cnn_distance': distance
        })

        if distance > threshold:
            subfolder_idx += 1
            subfolder_path = os.path.join(output_folder, f"sub{subfolder_idx}")
            os.makedirs(subfolder_path, exist_ok=True)

        # Save current image to current subfolder
        cv2.imwrite(os.path.join(subfolder_path, os.path.basename(curr_img_path)), cv2.imread(curr_img_path))
        prev_img_path = curr_img_path

    # Save log
    csv_path = os.path.join(output_folder, "cnn_diff_log.csv")
    pd.DataFrame(diff_log).to_csv(csv_path, index=False)
    print(f"✅ CNN difference log saved to: {csv_path}")
    print(f"✅ Grouped frames saved in: {output_folder}")


input_dir = 'out_directory_lecture_demo2'       # input video frames folder
output_dir = 'lecture_demo2_subdir_cnn'         # where to save subfolders
threshold = 600                                 # try 600-1000 as starting point

group_frames_by_cnn_diff(input_dir, output_dir, threshold)



ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# import cv2
# import numpy as np
# import os
# import csv

# def run_optical_flow_on_folder(image_folder, output_csv_path):
#     # Sort files numerically by extracting numbers from filenames
#     def extract_frame_number(filename):
#         import re
#         match = re.search(r'(\d+)', filename)
#         return int(match.group(1)) if match else -1

#     image_files = sorted([
#         os.path.join(image_folder, f)
#         for f in os.listdir(image_folder)
#         if f.lower().endswith(('.png', '.jpg', '.bmp', '.jpeg'))
#     ], key=lambda x: extract_frame_number(os.path.basename(x)))

#     if len(image_files) < 2:
#         print(f"[{image_folder}] Not enough images to compute optical flow.")
#         return

#     # Initialize analysis results
#     vertical_motion_magnitudes = [] # Average vertical motion magnitude between each frame pair.
#     vertical_motion_std_devs = [] # Standard deviation of vertical flow (variation across the whole frame).
#     regionwise_std_devs = [] # Region-wise variation (how motion differs across grid blocks in the frame).

#     frame1 = cv2.imread(image_files[0])
#     prvs = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
#     step = 16
#     fractions_above_threshold = []

#     for i in range(1, len(image_files)):
#         frame2 = cv2.imread(image_files[i])
#         next = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

#         flow = cv2.calcOpticalFlowFarneback(prvs, next, None, 0.5, 3, 15, 3, 5, 1.2, 0)
#         vert_flow = flow[..., 1]

#         # Whole-frame motion stats
#         vert_magnitude = np.mean(np.abs(vert_flow)) # how much pixels moved vertically, on average
#         # nonzero_pixels = vert_flow[np.abs(vert_flow) > 0]
#         # if nonzero_pixels.size > 0:
#         #     vert_magnitude = np.mean(np.abs(nonzero_pixels))
#         # else:
#         #     vert_magnitude = 0
#         vert_std = np.std(vert_flow) #  how spread out the motion values are — high std = motion inconsistency (e.g. shaky cam)
#         vertical_motion_magnitudes.append(vert_magnitude)
#         vertical_motion_std_devs.append(vert_std)

#         motion_threshold = 4  # You can tune this
#         motion_mask = np.abs(vert_flow) > motion_threshold
#         fraction_moving = np.sum(motion_mask) / motion_mask.size
#         fractions_above_threshold.append(fraction_moving)

#         # Grid-based regional std dev
#         h, w = next.shape
#         grid_size = 4
#         block_h = h // grid_size
#         block_w = w // grid_size

#         region_means = []
#         for gy in range(grid_size):
#             for gx in range(grid_size):
#                 block = vert_flow[gy*block_h:(gy+1)*block_h, gx*block_w:(gx+1)*block_w]
#                 block_mean = np.mean(np.abs(block))
#                 region_means.append(block_mean)

#         region_std = np.std(region_means)
#         regionwise_std_devs.append(region_std)

#         prvs = next.copy()

#     # Save per-folder results
#     with open(output_csv_path, mode='w', newline='') as file:
#         writer = csv.writer(file)
#         # writer.writerow(["Frame Index", "Mean Vertical Motion", "Overall Std Dev", "Region-wise Std Dev"])
#         writer.writerow(["Frame Index", "Mean Vertical Motion", "Overall Std Dev", "Region-wise Std Dev", "Fraction > 4px"])
#         # "Mean Vertical Motion" = How strong was the vertical movement?
#         # "Overall Std Dev" = Was the movement consistent?
#         # "Region-wise Std Dev" = Did different regions move differently?

#         for idx in range(len(vertical_motion_magnitudes)):
#             writer.writerow([
#                 f"{idx} to {idx+1}",
#                 round(vertical_motion_magnitudes[idx], 4),
#                 round(vertical_motion_std_devs[idx], 4),
#                 round(regionwise_std_devs[idx], 4),
#                 round(fractions_above_threshold[idx], 4)
#             ])
#     print(f"✅ Saved: {output_csv_path}")

# # --------- Main Execution Loop --------- #
# root_folder = 'test1_subdir'  # change this to your output folder path
# output_base = os.path.join(root_folder, 'motion_csvs')
# os.makedirs(output_base, exist_ok=True)

# for subfolder in sorted(os.listdir(root_folder)):
#     sub_path = os.path.join(root_folder, subfolder)
#     if os.path.isdir(sub_path) and subfolder.startswith("sub"):
#         csv_path = os.path.join(output_base, f'vertical_motion_{subfolder}.csv')
#         run_optical_flow_on_folder(sub_path, csv_path)

# print("\n✅✅ All subfolders processed.")


# # how many pixels are moving
# # up/down
# # what if the video is baddly record (like eathquake in video)
# # compare all of the frames to first one
# # DO NOT include 0 as value to compute
# # how much of the screen have move more than 4 pixels
# # mean of remaining as matrix to define
# #(original mean dropp 0: fraction of high moving px / out of remaining)




✅ Saved: test1_subdir/motion_csvs/vertical_motion_sub1.csv
✅ Saved: test1_subdir/motion_csvs/vertical_motion_sub2.csv
✅ Saved: test1_subdir/motion_csvs/vertical_motion_sub3.csv
✅ Saved: test1_subdir/motion_csvs/vertical_motion_sub4.csv
✅ Saved: test1_subdir/motion_csvs/vertical_motion_sub5.csv
✅ Saved: test1_subdir/motion_csvs/vertical_motion_sub6.csv
✅ Saved: test1_subdir/motion_csvs/vertical_motion_sub7.csv

✅✅ All subfolders processed.


In [2]:
import cv2
import numpy as np
import os
import csv

def run_optical_flow_on_folder(image_folder, output_csv_path):
    # Sort files numerically by extracting numbers from filenames
    def extract_frame_number(filename):
        import re
        match = re.search(r'(\d+)', filename)
        return int(match.group(1)) if match else -1

    image_files = sorted([
        os.path.join(image_folder, f)
        for f in os.listdir(image_folder)
        if f.lower().endswith(('.png', '.jpg', '.bmp', '.jpeg'))
    ], key=lambda x: extract_frame_number(os.path.basename(x)))

    if len(image_files) < 2:
        print(f"[{image_folder}] Not enough images to compute optical flow.")
        return

    # Initialize analysis results
    vertical_motion_magnitudes = []
    vertical_motion_rewarded_magnitudes = []
    vertical_motion_std_devs = []
    regionwise_std_devs = []

    frame1 = cv2.imread(image_files[0])
    prvs = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    step = 16

    for i in range(1, len(image_files)):
        frame2 = cv2.imread(image_files[i])
        next = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

        flow = cv2.calcOpticalFlowFarneback(prvs, next, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        vert_flow = flow[..., 1]

        # Whole-frame motion stats
        # vert_magnitude = np.mean(np.abs(vert_flow))
        # vert_std = np.std(vert_flow)

        # ----------------------------------

        # Whole-frame motion stats (ignoring zero flow)
        # nonzero_vert_flow = np.abs(vert_flow)[vert_flow != 0]
        # vert_magnitude = np.mean(nonzero_vert_flow) if nonzero_vert_flow.size > 0 else 0
        # vert_std = np.std(nonzero_vert_flow) if nonzero_vert_flow.size > 0 else 0

        # ------------------------------------

        # abs_vert_flow = np.abs(vert_flow)
        # dynamic_threshold = 0.001 * np.max(abs_vert_flow)  # 0.1% of the max value
        # print(dynamic_threshold)
        # filtered_flow = abs_vert_flow[abs_vert_flow > dynamic_threshold]

        # vert_magnitude = np.mean(filtered_flow) if filtered_flow.size > 0 else 0
        # vert_std = np.std(filtered_flow) if filtered_flow.size > 0 else 0

        # -------------------------------------

        # abs_vert_flow = np.abs(vert_flow)
        # dynamic_threshold = 0.005 * np.max(abs_vert_flow)  # 0.05% of max vertical flow

        # # Pixels with significant vertical movement
        # moving_mask = abs_vert_flow > dynamic_threshold
        # significant_motion_values = abs_vert_flow[moving_mask]

        # # Total pixel count for normalization
        # total_pixels = abs_vert_flow.size
        # num_moving_pixels = moving_mask.sum()

        # # Prioritize both strength and coverage
        # if significant_motion_values.size > 0:
        #     weighted_motion = np.sum(significant_motion_values) / total_pixels  # not avg over only moving pixels
        #     weighted_std = np.std(significant_motion_values)
        # else:
        #     weighted_motion = 0
        #     weighted_std = 0

        # vertical_motion_magnitudes.append(weighted_motion)
        # vertical_motion_std_devs.append(weighted_std)

        # --------------------------------------------------

        abs_vert_flow = np.abs(vert_flow)
        dynamic_threshold = 0.005 * np.max(abs_vert_flow)  

        # Mask for significant vertical motion
        moving_mask = abs_vert_flow > dynamic_threshold
        significant_motion_values = abs_vert_flow[moving_mask]

        total_pixels = abs_vert_flow.size
        num_moving_pixels = moving_mask.sum()
        coverage_ratio = num_moving_pixels / total_pixels

        # Parameters
        alpha = 2  # you can tune this

        if significant_motion_values.size > 0:
            weighted_motion = np.sum(significant_motion_values) / total_pixels
            rewarded_motion = weighted_motion * (coverage_ratio ** alpha)
            weighted_std = np.std(significant_motion_values)
        else:
            weighted_motion = 0
            rewarded_motion = 0
            weighted_std = 0

        # Save all
        vertical_motion_rewarded_magnitudes.append(rewarded_motion)
        vertical_motion_magnitudes.append(weighted_motion)  # or save both if you want
        vertical_motion_std_devs.append(weighted_std)





        # vertical_motion_magnitudes.append(vert_magnitude)
        # vertical_motion_std_devs.append(vert_std)

        # Grid-based regional std dev
        h, w = next.shape
        grid_size = 4
        block_h = h // grid_size
        block_w = w // grid_size

        region_means = []
        for gy in range(grid_size):
            for gx in range(grid_size):
                block = vert_flow[gy*block_h:(gy+1)*block_h, gx*block_w:(gx+1)*block_w]
                block_mean = np.mean(np.abs(block))
                region_means.append(block_mean)

        region_std = np.std(region_means)
        regionwise_std_devs.append(region_std)

        prvs = next.copy()

    # Save per-folder results
    with open(output_csv_path, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Frame Index", "Rewarded Vertical Motion", "Mean Vertical Motion", "Overall Std Dev", "Region-wise Std Dev"])
        for idx in range(len(vertical_motion_magnitudes)):
            writer.writerow([
                f"{idx} to {idx+1}",
                round(vertical_motion_rewarded_magnitudes[idx], 4),
                round(vertical_motion_magnitudes[idx], 4),
                round(vertical_motion_std_devs[idx], 4),
                round(regionwise_std_devs[idx], 4)
            ])
    print(f"✅ Saved: {output_csv_path}")

# --------- Main Execution Loop --------- #
root_folder = 'test1_subdir'  # change this to your output folder path
output_base = os.path.join(root_folder, 'motion_csvs')
os.makedirs(output_base, exist_ok=True)

for subfolder in sorted(os.listdir(root_folder)):
    sub_path = os.path.join(root_folder, subfolder)
    if os.path.isdir(sub_path) and subfolder.startswith("sub"):
        csv_path = os.path.join(output_base, f'vertical_motion_{subfolder}.csv')
        run_optical_flow_on_folder(sub_path, csv_path)

print("\n✅✅ All subfolders processed.")


# how many pixels are moving
# up/down
# what if the video is baddly record (like eathquake in video)
# compare all of the frames to first one
# DO NOT include 0 as value to compute
# how much of the screen have move more than 4 pixels
# mean of remaining as matrix to define
#(original mean dropp 0: fraction of high moving px / out of remaining)




✅ Saved: test1_subdir/motion_csvs/vertical_motion_sub1.csv
✅ Saved: test1_subdir/motion_csvs/vertical_motion_sub2.csv
✅ Saved: test1_subdir/motion_csvs/vertical_motion_sub3.csv
✅ Saved: test1_subdir/motion_csvs/vertical_motion_sub4.csv
✅ Saved: test1_subdir/motion_csvs/vertical_motion_sub5.csv
✅ Saved: test1_subdir/motion_csvs/vertical_motion_sub6.csv
✅ Saved: test1_subdir/motion_csvs/vertical_motion_sub7.csv

✅✅ All subfolders processed.
