<font size="6">Created by : Rashmi Pubuditha <br>Date : 2024/01/03</font>

In [1]:
import cv2
import numpy as np
from moviepy.editor import VideoFileClip
import huffman
import os

<font size="5">Functions</font>

In [5]:
def motion_estimation(prev_frame, curr_frame, block_size):
    '''
    Calculate the motion vector using the previous frame and current frame. By analysing the search area find the best match by lowest MSE. 
    '''
    height, width = prev_frame.shape[:2]

    # Initialize the motion vectors
    motion_vectors = np.zeros((height // block_size, width // block_size, 2), dtype=int)


    for y in range(0, height, block_size):
        for x in range(0, width, block_size):
            # Define the search window
            search_area = curr_frame[y:y+block_size, x:x+block_size]

            # Search for the best match in the search window
            min_mse = float('inf')
            best_match = (0, 0)

            for dy in range(-block_size, block_size + 1):
                for dx in range(-block_size, block_size + 1):
                    # Define the current block in the previous frame
                    block = prev_frame[y+dy:y+dy+block_size, x+dx:x+dx+block_size]

                    # Check if the shapes of search_area and block are non-empty
                    if search_area.shape[0] > 0 and search_area.shape[1] > 0 and block.shape[0] > 0 and block.shape[1] > 0:
                        # Check if the shapes of search_area and block are compatible
                        if search_area.shape == block.shape:
                            # Calculate Mean Squared Error (MSE) as the matching criteria
                            mse = np.sum((search_area - block) ** 2)

                            # Update the best match if the current MSE is smaller
                            if mse < min_mse:
                                min_mse = mse
                                best_match = (dy, dx)

            # Update the motion vector for the current block
            motion_row = min(y // block_size, motion_vectors.shape[0] - 1)
            motion_col = min(x // block_size, motion_vectors.shape[1] - 1)
            motion_vectors[motion_row, motion_col] = best_match

    return motion_vectors


def motion_compensation(reference_frame, motion_vectors, block_size):
    '''
    Return the predicted image by motion compensation. 
    This predicted frame is used to get the residual frame. (Residual = Reference - predicted)
    '''
    height, width = reference_frame.shape[:2]
    predicted_frame = np.zeros_like(reference_frame)

    for y in range(0, height, block_size):
        for x in range(0, width, block_size):
            # Get the motion vector for the current block
            # dy, dx = motion_vectors[y // block_size, x // block_size]

            motion_row = min(y // block_size, motion_vectors.shape[0] - 1)
            motion_col = min(x // block_size, motion_vectors.shape[1] - 1)
            dy, dx = motion_vectors[motion_row, motion_col]

            # Apply motion compensation to predict the current block in the current frame
            predicted_block = reference_frame[y+dy:y+dy+block_size, x+dx:x+dx+block_size]

            # Update the predicted frame with the compensated block
            predicted_frame[y:y+block_size, x:x+block_size] = predicted_block

    return predicted_frame


def combine(inv_dct_blocks, height, width, block_size):
    ''' 
    Use to combine all the macroblocks after quantization. Reshaped into original image array size
    '''
    reconstructed_image_arr = np.zeros((height, width), dtype=np.float32)

    for i, (row, col) in enumerate([(i, j) for i in range(0, height, block_size) for j in range(0, width, block_size)]):
        reconstructed_image_arr[row:row + block_size, col:col + block_size] = inv_dct_blocks[i]

    return reconstructed_image_arr


def calculate_probabilities(data):
    ''' 
    Calculate the probabilities after combine all the macroblocks. for construct the one huffman mapping for one image.
    '''
    unique_values, counts = np.unique(data, return_counts=True)
    probabilities = counts / len(data)
    return zip(unique_values, probabilities/8)


def encode_with_huffman(data, huffman_mappings):
    ''' 
    Encode data using huffman mappings
    '''
    encoded_data = ""
    for value in data:
        encoded_data += huffman_mappings[value]
    return encoded_data

def decode_with_huffman(encoded_data, huffman_mappings):
    ''' 
    Decode the data using huffman mappings
    '''
    reverse_mappings = {v: k for k, v in huffman_mappings.items()}
            
    decoded_data = []
    current_code = ""
            
    for bit in encoded_data:
        current_code += bit
        if current_code in reverse_mappings:
            decoded_data.append(reverse_mappings[current_code])
            current_code = ""
            
    return decoded_data

def combine_and_convert_to_uint8(inv_dct_blocks, height, width, block_size):
    ''' 
    After decoding combine each blocks and reconstruct the frame in to oringinal frame array size
    '''
    reconstructed_image = np.zeros((height, width), dtype=np.float32)

    for i, (row, col) in enumerate([(i, j) for i in range(0, height, block_size) for j in range(0, width, block_size)]):
        reconstructed_image[row:row + block_size, col:col + block_size] = inv_dct_blocks[i]

    # Convert the reconstructed image back to uint8
    reconstructed_image = np.clip(reconstructed_image, 0, 255).astype(np.uint8)

    return reconstructed_image


def encode_frames(frame, block_size, quantization_matrix, output_file_path):
    ''' 
    Take frame --> devide into macroblocks --> Apply DCT --> Quantize --> combine blocks --> Calculate probabilities -->Take huffman mapping -->
    -->Encode data --> Save encode data into text file

    Apply this function to encode a frame
    '''
    height, width = frame.shape[:2]
    
    # Divide the frame into 8x8 macro blocks
    macro_blocks = [frame[i:i + block_size, j:j + block_size] for i in range(0, height, block_size) for j in range(0, width, block_size)]

    # Apply DCT to each macro block
    dct_macro_blocks = [cv2.dct(np.float32(block)) for block in macro_blocks]

    # Quantize the DCT coefficients using the given quantization matrix
    quantized_block = [np.round(dct_block / quantization_matrix) for dct_block in dct_macro_blocks]

    # combine again blocks and flattern
    re_img = combine(quantized_block, height, width, block_size)
    flat_re_img = re_img.flatten()
    # Take probalities and mapping huffman
    prob_re_img = list(calculate_probabilities(flat_re_img))
    huffman_mappings = huffman.codebook(prob_re_img)

    # Encode the flattened image using Huffman mappings
    encoded_data = encode_with_huffman(flat_re_img, huffman_mappings)
    
    # Write the encoded data to a text file
    with open(output_file_path, 'w') as output_file:
        output_file.write(encoded_data)

    return prob_re_img, huffman_mappings,encoded_data


def decode_frames(input_file_path,huffman_mappings,block_size, height,width, quantization_matrix):
    ''' 
    Read the text file --> Decode data --> Remake macroblocks --> Dequantized --> Apply inverse DCT --> Combine blocks and reconstruct the frame

    Apply this function to reconstruct a frame by reading the encoded file
    '''
    # Read the encoded data from the text file
    with open(input_file_path, 'r') as input_file:
        encoded_data_re = input_file.read()

    decoded_data = decode_with_huffman(encoded_data_re, huffman_mappings)
    decoded_img = np.array(decoded_data).reshape(height, width)
    macro_blocks_re = [decoded_img[i:i + block_size, j:j + block_size] for i in range(0, height, block_size) for j in range(0, width, block_size)]
    dequantized_blocks = [np.multiply(block, quantization_matrix) for block in macro_blocks_re]
    inv_dct_blocks = [cv2.idct(np.float32(block)) for block in dequantized_blocks]
    # reconstructed_image= combine_and_convert_to_uint8(inv_dct_blocks, height, width, block_size)
    reconstructed_image= combine(inv_dct_blocks, height, width, block_size)

    return reconstructed_image

def mv_encode(motion_vectors, output_file_path_mv):
    ''' 
    Encode the motion vector. Apply huffman for encoding. Save a text file
    '''
    flat_mv = motion_vectors.flatten()
    prob_mv = list(calculate_probabilities(flat_mv))
    huffman_mappings_mv = huffman.codebook(prob_mv)
    encoded_data_mv = encode_with_huffman(flat_mv, huffman_mappings_mv)
    
    # Write the encoded data to a text file
    with open(output_file_path_mv, 'w') as output_file:
        output_file.write(encoded_data_mv)

    return huffman_mappings_mv, encoded_data_mv

def mv_decode(input_file_path_mv,huffman_mappings_mv,height,width,block_size):
    ''' 
    Decode the motion vector by reading the encoded text file.
    '''
    size = (height // block_size, width // block_size, 2)
    with open(input_file_path_mv, 'r') as input_file:
        encoded_data_mv = input_file.read()
    
    decoded_data_mv = decode_with_huffman(encoded_data_mv, huffman_mappings_mv)
    decoded_mv = np.array(decoded_data_mv).reshape(size)

    return decoded_mv


def reconstruct_frame(i_frame, motion_vectors, residual_frame, block_size):
    ''' 
    Reconstruct the frame, Take reconstructed I frame as the first frame and predict next 9 frames using motion vectors and residual frame.
    This is the final output of Inter frame prediction
    '''
    height, width = i_frame.shape
    reconstructed_frame = np.copy(i_frame)

    for i in range(0, height, block_size):
        for j in range(0, width, block_size):
            # Extract motion vector for the current block
            mv_x, mv_y = motion_vectors[i // block_size, j // block_size]

            # Calculate the new position for the block based on motion vectors
            new_i = i + mv_y
            new_j = j + mv_x

            # Check if the new position is within the valid range
            if 0 <= new_i < height - block_size and 0 <= new_j < width - block_size:
                # Extract the corresponding block from the residual frame
                block_residual = residual_frame[i:i + block_size, j:j + block_size]

                # Update the reconstructed frame with the predicted block
                reconstructed_frame[i:i + block_size, j:j + block_size] = i_frame[new_i:new_i + block_size, new_j:new_j + block_size] + block_residual

    return reconstructed_frame


def calculate_average_length(probability, huffman_code):
    ''' 
    Average length = sum of (probability of symbol * len(huffman mapping))
    '''
    average_length = 0.0

    for symbol, prob in probability:
        code = huffman_code[symbol]
        average_length += len(code) * prob

    return average_length


def compression_ratio(height, width, avg_length):
    ''' 
    Compression ratio = size befor compression/ size after compression
    '''
    input_image_size = height*width*8
    compressed_size = height*width*avg_length
    compression_ratio = input_image_size /compressed_size
    return compression_ratio


def intra_predict(curr_frame,height, width):
    ''' 
    Function for Intra frame predintion. Intra-frame prediction by using the average of neighboring pixels
    '''
    predicted_image = np.zeros_like(curr_frame)

    for i in range(1, height):
        for j in range(1, width):
            # Predict the pixel value as the average of its neighbors
            predicted_image[i, j] = (curr_frame[i-1, j] + curr_frame[i, j-1] + curr_frame[i-1, j-1]) // 3

    return predicted_image


In [6]:
# Quantization matrix

quantization_matrix = np.array([[16, 11, 10, 16, 24, 40, 51, 61],
                                            [12, 12, 14, 19, 26, 58, 60, 55],
                                            [14, 13, 16, 24, 40, 57, 69, 56],
                                            [14, 17, 22, 29, 51, 87, 80, 62],
                                            [18, 22, 37, 56, 68, 109, 103, 77],
                                            [24, 35, 55, 64, 81, 104, 113, 92],
                                            [49, 64, 78, 87, 103, 121, 120, 101],
                                            [72, 92, 95, 98, 112, 100, 103, 99]])

<font size="5">Compress frames using Inter prediction <br>Used IPPPPPPPPPI pattern </font>

In [7]:
# Read the video
video_path = 'input_video.mp4'
cap = cv2.VideoCapture(video_path)

fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Create a folder to save frames
output_folder1 = 'motionvector'
os.makedirs(output_folder1, exist_ok=True)

# Create a folder to save frames
output_folder2 = 'inter_predicted'
os.makedirs(output_folder2, exist_ok=True)

# Create a folder to save frames
output_folder3 = 'inter_residual'
os.makedirs(output_folder3, exist_ok=True)

# Create a folder to save frames
output_folder4 = 'inter_reconstructed'
os.makedirs(output_folder4, exist_ok=True)

# Create a folder to save frames
output_folder5 = 'i_frames'
os.makedirs(output_folder5, exist_ok=True)

# Create a folder to save text files
output_folder6 = 'all_text_files'
os.makedirs(output_folder6, exist_ok=True)

# Create a folder to save frames
output_folder8 = 'original_frames'
os.makedirs(output_folder8, exist_ok=True)

# Set the block size for macroblock search
block_size = 8
frame_counter = 0
i_frame_interval = 10  # (IPPPPPPPPPI pattern)
i_no = -1

# Make arrays to save data
original_frame_i = []
processed_frame_i_arr = []
video_arr = []
huffman_mappings_arr = []
huffman_mapping_mv_arr = []
current_bitrate_arr = []

while True:
    # Read the current frame
    ret, curr_frame = cap.read()
    if not ret:
        print('finished the process. Total number of frames =',frame_counter)
        break

    # for I frames (0,10, 20,30,... frames)
    if frame_counter % i_frame_interval == 0:

        # Convert to grayscale 
        curr_frame_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
        cv2.imwrite(os.path.join(output_folder8, f'original_frame_{frame_counter}.png'), curr_frame_gray)
        original_frame_i.append(curr_frame_gray) # original frame array

        # Encode data and save text files
        output_file_path = os.path.join(output_folder6, f'text_i{frame_counter}.txt')
        prob_i, huffman_mappings, encoded_i = encode_frames(curr_frame_gray, block_size=8, quantization_matrix = quantization_matrix , output_file_path=output_file_path) # reconstructed i frame
          
        # Read the text files and decode data
        processed_frame_i = decode_frames(output_file_path,huffman_mappings, block_size, height,width, quantization_matrix)   # Decoded i frame
        cv2.imwrite(os.path.join(output_folder5, f'i_frames_{frame_counter}.png'), processed_frame_i)

        # calculate average length , compression ratio and bitrate
        average_len = calculate_average_length(prob_i, huffman_mappings)
        cr = compression_ratio(height, width, average_len)
        current_bitrate = len(encoded_i)*fps
        current_bitrate_arr.append(current_bitrate)

        # print information  
        print(f'saved I frame {frame_counter}, CR = {cr} , Bitrate {current_bitrate/1000} kbps')

        processed_frame_i_arr.append(processed_frame_i)  # reconstructed i frame array
        i_no += 1
        video_arr.append(processed_frame_i_arr[i_no])  # array of the video frames
        huffman_mappings_arr.append(huffman_mappings) # huffman mappings of frames


    # for predition frames (Inter prediction - P frames)
    elif frame_counter % i_frame_interval != 0:
            
        # Convert to grayscale for motion estimation
        curr_frame_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
        cv2.imwrite(os.path.join(output_folder8, f'original_frame_{frame_counter}.png'), curr_frame_gray)

        # Perform motion estimation to get motion vectors
        motion_vectors = motion_estimation(prev_frame, curr_frame_gray, block_size)

        # Visualize the motion vectors (optional)
        for y in range(0, motion_vectors.shape[0]):
            for x in range(0, motion_vectors.shape[1]):
                dx, dy = motion_vectors[y, x]
                cv2.arrowedLine(curr_frame, (x * block_size + block_size // 2, y * block_size + block_size // 2),
                                (x * block_size + block_size // 2 + dx, y * block_size + block_size // 2 + dy),
                                color=(0, 255, 0), thickness=1)
                    
            # Save the frame with motion vectors
            cv2.imwrite(os.path.join(output_folder1, f'motion_vector_{frame_counter}.png'), curr_frame)


        # Encode and save text files of motion vectors              
        output_file_path_mv = os.path.join(output_folder6, f'text_mv{frame_counter}.txt')
        huffman_mapping_mv,encoded_mv = mv_encode(motion_vectors, output_file_path_mv)
        huffman_mapping_mv_arr.append(huffman_mapping_mv)  # huffman mapping of motion vectors

        # Perform motion compensation to predict the current frame
        predicted_frame = motion_compensation(prev_frame, motion_vectors , block_size)
        cv2.imwrite(os.path.join(output_folder2, f'inter_predicted_frame_{frame_counter}.png'), predicted_frame)

        # take the residual frame
        residual = cv2.absdiff(original_frame_i[i_no], predicted_frame)
        cv2.imwrite(os.path.join(output_folder3, f'inter_residual_frame_{frame_counter}.png'), residual)

        # encode the residual and save text files
        output_file_path_res = os.path.join(output_folder6, f'text_residual{frame_counter}.txt')
        prob_i,huffman_mappings,encoded_i = encode_frames(residual, block_size=8, quantization_matrix = quantization_matrix, output_file_path=output_file_path_res)   
        huffman_mappings_arr.append(huffman_mappings) # huffman mapping of frames

        # Decoding files

        # Decoding motionvector
        motion_vectors_decode = mv_decode(output_file_path_mv, huffman_mapping_mv, height,width,block_size)  
        # Decoding residual
        processed_frame_res = decode_frames(output_file_path_res,huffman_mappings, block_size, height,width, quantization_matrix)

        # Reconstruct the frame and save png
        final_predicted_image = reconstruct_frame(processed_frame_i_arr[i_no], motion_vectors_decode, processed_frame_res, block_size)
        cv2.imwrite(os.path.join(output_folder4, f'reconstructed_{frame_counter}.png'), final_predicted_image)

        # calculate average length, compression ratio and bitrate
        average_len = calculate_average_length(prob_i, huffman_mappings)
        cr = compression_ratio(height, width, average_len)
        current_bitrate = (len(encoded_i)+len(encoded_mv))*fps
        current_bitrate_arr.append(current_bitrate)

        print(f'saved predicted frame {frame_counter}, CR = {cr} , Bitrate {current_bitrate/1000} kbps')
        
        video_arr.append(final_predicted_image)

    prev_frame = curr_frame_gray

    frame_counter += 1         

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()

saved I frame 0, CR = 47.583182473837574 , Bitrate 7747.275 kbps
saved predicted frame 1, CR = 62.29210407364069 , Bitrate 6200.825 kbps
saved predicted frame 2, CR = 61.204871286142385 , Bitrate 6283.075 kbps
saved predicted frame 3, CR = 60.29909094998379 , Bitrate 6374.85 kbps
saved predicted frame 4, CR = 59.55556273576905 , Bitrate 6462.325 kbps
saved predicted frame 5, CR = 58.94891701513543 , Bitrate 6557.0 kbps
saved predicted frame 6, CR = 58.39633438808122 , Bitrate 6576.5 kbps
saved predicted frame 7, CR = 57.96203631275032 , Bitrate 6643.375 kbps
saved predicted frame 8, CR = 57.55054250253683 , Bitrate 6662.15 kbps
saved predicted frame 9, CR = 57.127139032771645 , Bitrate 6697.55 kbps
saved I frame 10, CR = 47.644373216841785 , Bitrate 7737.325 kbps
saved predicted frame 11, CR = 62.69680979977806 , Bitrate 6139.9 kbps
saved predicted frame 12, CR = 61.99146574737773 , Bitrate 6184.275 kbps
saved predicted frame 13, CR = 61.28372649743156 , Bitrate 6249.025 kbps
saved pre

<font size="5">Reconstruct the video </font>

In [15]:
# Define the codec and create a VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'H264')
out = cv2.VideoWriter('Reconstructed_video_inter_prediction1.mp4', fourcc, fps, (width, height), isColor=False)  # isColor=False for grayscale video

# Write each frame to the video
for frame in video_arr:
    out.write(frame)

# Release the VideoWriter object
out.release()

<font size="5">Get the output video using transmitted files (This code, Decoding text files  reconstruct the video)</font>

In [13]:
fps = 25 
width = 640 
height = 360 

# Video writer to save the processed video
output_path = "Transmit_reconstructed_video.mp4"

fourcc = cv2.VideoWriter_fourcc(*'H264')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height), isColor=False)  # isColor=False for grayscale video

# Create a folder to save frames
output_folder1 = 'Transmit_reconstructed'
os.makedirs(output_folder1, exist_ok=True)

# Set the block size for macroblock search
block_size = 8
frame_counter = 0
i_frame_interval = 10
i_no = -1
total_frames = 100

folder_name = 'all_text_files'

frame_i_arr = []
video_arr_decoded = []
mv=0

while frame_counter < total_frames:

    if frame_counter % i_frame_interval == 0:
        
        # Decode I frames
        input_file_path = os.path.join(folder_name, f'text_i{frame_counter}.txt')
        frame_i = decode_frames(input_file_path,huffman_mappings_arr[frame_counter], block_size, height,width, quantization_matrix)  # Decoded I frames
        frame_i_arr.append(frame_i) # I frame array
        i_no += 1
        video_arr_decoded.append(frame_i_arr[i_no])
 

    elif frame_counter % i_frame_interval != 0:
            
            # Decode motion vectors
            input_file_path_mv = os.path.join(folder_name, f'text_mv{frame_counter}.txt')
            mv_decodes = mv_decode(input_file_path_mv, huffman_mapping_mv_arr[mv], height,width,block_size)  # Decoded motion vectors

            # Decode residuals
            input_file_path_res = os.path.join(folder_name, f'text_residual{frame_counter}.txt')
            frame_res = decode_frames(input_file_path_res,huffman_mappings_arr[frame_counter], block_size, height,width, quantization_matrix) # Decoded residuals
            
            # reconstructed frames and save
            predicted_image = reconstruct_frame(frame_i_arr[i_no], mv_decodes, frame_res, block_size)
            cv2.imwrite(os.path.join(output_folder1, f'm_reconstructed_{frame_counter}.png'), predicted_image)

            print(f'Saved reconstructed frame {frame_counter}')
            video_arr_decoded.append(predicted_image)
            mv += 1 

    frame_counter += 1  

# Write each frame to the video
for frame in video_arr_decoded:
    out.write(frame)       

# Release the video capture and close the window
out.release()
print('Reconstructed video is saved')

cap.release()
cv2.destroyAllWindows()

Saved reconstructed frame 1
Saved reconstructed frame 2
Saved reconstructed frame 3
Saved reconstructed frame 4
Saved reconstructed frame 5
Saved reconstructed frame 6
Saved reconstructed frame 7
Saved reconstructed frame 8
Saved reconstructed frame 9
Saved reconstructed frame 11
Saved reconstructed frame 12
Saved reconstructed frame 13
Saved reconstructed frame 14
Saved reconstructed frame 15
Saved reconstructed frame 16
Saved reconstructed frame 17
Saved reconstructed frame 18
Saved reconstructed frame 19
Saved reconstructed frame 21
Saved reconstructed frame 22
Saved reconstructed frame 23
Saved reconstructed frame 24
Saved reconstructed frame 25
Saved reconstructed frame 26
Saved reconstructed frame 27
Saved reconstructed frame 28
Saved reconstructed frame 29
Saved reconstructed frame 31
Saved reconstructed frame 32
Saved reconstructed frame 33
Saved reconstructed frame 34
Saved reconstructed frame 35
Saved reconstructed frame 36
Saved reconstructed frame 37
Saved reconstructed fra

<font size="5">Adjust the compression ratio of all the frame for transmit in a fixed bitrate<br>The fixed bitrate is taken as = 6500 kbps</font>

In [8]:
# Read the video
video_path = 'input_video.mp4'
cap = cv2.VideoCapture(video_path)

fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Create a folder to save frames
output_folder3 = 'adjust_output_frames_residual'
os.makedirs(output_folder3, exist_ok=True)

# Create a folder to save frames
output_folder4 = 'adjust_reconstructed'
os.makedirs(output_folder4, exist_ok=True)

# Create a folder to save frames
output_folder1 = 'adjust_i_frames'
os.makedirs(output_folder1, exist_ok=True)

# Create a folder to save frames
output_folder7 = 'adjust_text_files'
os.makedirs(output_folder7, exist_ok=True)

# Set the block size for macroblock search
block_size = 8
frame_counter = 0
i_frame_interval = 10
i_no = -1
target_bitrate = 6500000  # fixed bitrate is taken as 6500 kbps 

original_frame_i = []
processed_frame_i_arr = []
adjust_video_arr = []
huffman_mappings_i_arr = []
huffman_mapping_mv_arr = []

while True:
    # Read the current frame
    ret, curr_frame = cap.read()
    if not ret:
        print('finished the process. Total number of frames =',frame_counter)
        break

    current_bitrate = current_bitrate_arr[frame_counter]

    # for I frames (0,10,20,30,..)
    if frame_counter % i_frame_interval == 0:

        # Convert to grayscale for motion estimation
        curr_frame_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)

        original_frame_i.append(curr_frame_gray)

        # if current bitrate is greater that target bitrate reduse the quality by multiply Quantizaiton matrix by a factor
        if current_bitrate > target_bitrate:
            factor = 1
            # this loop is running until the target bitrate achived
            while current_bitrate > target_bitrate:
                factor += 0.1
                quantization_matrix1 = np.copy(quantization_matrix)
                quantization_matrix_re = np.round(quantization_matrix1 * factor).astype(int) 
                output_file_path = os.path.join(output_folder7, f'text_i_adjust_{frame_counter}.txt')
                prob_i, huffman_mappings_i, encoded_i = encode_frames(curr_frame_gray, block_size=8, quantization_matrix = quantization_matrix_re , output_file_path=output_file_path) # reconstructed i frame
      
                processed_frame_i = decode_frames(output_file_path,huffman_mappings_i, block_size, height,width, quantization_matrix)   # Decoded i frame

                average_len = calculate_average_length(prob_i, huffman_mappings_i)
                cr = compression_ratio(height, width, average_len)
                current_bitrate = len(encoded_i)*fps
                print('CR = ',cr, ', Bitrate', current_bitrate/1000,'kbps')

        # if current bitrate is less than target bitrate no need to reduse quality
        else:
            output_file_path = os.path.join(output_folder7, f'text_i_adjust_{frame_counter}.txt')
            prob_i, huffman_mappings_i, encoded_i = encode_frames(curr_frame_gray, block_size=8, quantization_matrix = quantization_matrix , output_file_path=output_file_path) # reconstructed i frame
                
            processed_frame_i = decode_frames(output_file_path,huffman_mappings_i, block_size, height,width, quantization_matrix)   # Decoded i frame

            average_len = calculate_average_length(prob_i, huffman_mappings_i)
            cr = compression_ratio(height, width, average_len)
            current_bitrate = len(encoded_i)*fps
            print('CR = ',cr, ', Bitrate', current_bitrate/1000,'kbps')

        print(f"saved i frame {frame_counter}, Adjusted compression ratio is {cr}. Target bitrate: {target_bitrate/1000} kbps, Current bitrate : {current_bitrate/1000} kbps")
        cv2.imwrite(os.path.join(output_folder1, f'i_frames_adjust_{frame_counter}.png'), processed_frame_i)
        processed_frame_i_arr.append(processed_frame_i)
        adjust_video_arr.append(processed_frame_i)
        huffman_mappings_i_arr.append(huffman_mappings_i)
        i_no += 1

    # For P frames
    elif frame_counter % i_frame_interval != 0:
            
        # Convert to grayscale for motion estimation
        curr_frame_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
        
        # Perform motion estimation to get motion vectors
        motion_vectors = motion_estimation(prev_frame, curr_frame_gray, block_size)
                        
        output_file_path_mv = os.path.join(output_folder6, f'text_mv{frame_counter}.txt')
        huffman_mapping_mv,encoded_mv = mv_encode(motion_vectors, output_file_path_mv)
        huffman_mapping_mv_arr.append(huffman_mapping_mv)

        motion_vectors_decode = mv_decode(output_file_path_mv, huffman_mapping_mv, height,width,block_size)  # Decodeed motion vector

        # Perform motion compensation to predict the current frame
        predicted_frame = motion_compensation(prev_frame, motion_vectors_decode , block_size)

        residual = cv2.absdiff(original_frame_i[i_no], predicted_frame)
        cv2.imwrite(os.path.join(output_folder3, f'Residual_frame_{frame_counter}.png'), residual)

        # if current bitrate is greater that target bitrate reduse the quality by multiply Quantizaiton matrix by a factor
        if current_bitrate > target_bitrate:
            factor = 1
            while current_bitrate > target_bitrate:
                factor += 0.1
                quantization_matrix2 = np.copy(quantization_matrix)
                quantization_matrix_res = np.round(quantization_matrix2 * factor).astype(int) 

                output_file_path_res = os.path.join(output_folder6, f'text_residual{frame_counter}.txt')
                prob_i,huffman_mappings_i,encoded_i = encode_frames(residual, block_size=8, quantization_matrix = quantization_matrix_res, output_file_path=output_file_path_res)   
                huffman_mappings_i_arr.append(huffman_mappings_i)
                processed_frame_res = decode_frames(output_file_path_res,huffman_mappings_i, block_size, height,width, quantization_matrix)  # Decoded residual

                average_len = calculate_average_length(prob_i, huffman_mappings_i)
                cr = compression_ratio(height, width, average_len)
                current_bitrate = (len(encoded_i)+len(encoded_mv))*fps
                print('CR = ',cr, ', Bitrate', current_bitrate/1000,'kbps')
                
        # if current bitrate is less than target bitrate no need to reduse quality
        else:
                output_file_path_res = os.path.join(output_folder6, f'text_residual{frame_counter}.txt')

                prob_i,huffman_mappings_i,encoded_i = encode_frames(residual, block_size=8, quantization_matrix = quantization_matrix, output_file_path=output_file_path_res)   
                huffman_mappings_i_arr.append(huffman_mappings_i)
                processed_frame_res = decode_frames(output_file_path_res,huffman_mappings_i, block_size, height,width, quantization_matrix)  # Decoded residual

                average_len = calculate_average_length(prob_i, huffman_mappings_i)
                cr = compression_ratio(height, width, average_len)
                current_bitrate = (len(encoded_i)+len(encoded_mv))*fps
                print('CR = ',cr, ', Bitrate', current_bitrate/1000,'kbps')
        
        print(f"saved predicted frame {frame_counter}, Adjusted compression ratio is {cr}. Target bitrate: {target_bitrate/1000} kbps, Current bitrate : {current_bitrate/1000} kbps")

        # Get the final predicted images using compressed i frames, motion vectors and residuals
        final_predicted_image = reconstruct_frame(processed_frame_i_arr[i_no], motion_vectors, processed_frame_res, block_size)
        
        # Save the predicted image
        cv2.imwrite(os.path.join(output_folder4, f'reconstructed_{frame_counter}.png'), final_predicted_image)

        adjust_video_arr.append(final_predicted_image)

    prev_frame = curr_frame_gray

    frame_counter += 1        

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()

CR =  48.18713297408233 , Bitrate 7650.175 kbps
CR =  48.75416600539596 , Bitrate 7561.2 kbps
CR =  49.33833444308004 , Bitrate 7471.675 kbps
CR =  49.796196799259775 , Bitrate 7402.975 kbps
CR =  50.31219931623231 , Bitrate 7327.05 kbps
CR =  50.68993255367861 , Bitrate 7272.45 kbps
CR =  51.06082379624979 , Bitrate 7219.625 kbps
CR =  51.43951524284955 , Bitrate 7166.475 kbps
CR =  51.74039973051878 , Bitrate 7124.8 kbps
CR =  52.01123072364803 , Bitrate 7087.7 kbps
CR =  52.27731196709976 , Bitrate 7051.625 kbps
CR =  52.51525177447675 , Bitrate 7019.675 kbps
CR =  52.81337526235483 , Bitrate 6980.05 kbps
CR =  53.02074718636512 , Bitrate 6952.75 kbps
CR =  53.27745059074323 , Bitrate 6919.25 kbps
CR =  53.46928859186952 , Bitrate 6894.425 kbps
CR =  53.64435729435344 , Bitrate 6871.925 kbps
CR =  53.87504567044209 , Bitrate 6842.5 kbps
CR =  54.04307877250787 , Bitrate 6821.225 kbps
CR =  54.20937311589194 , Bitrate 6800.3 kbps
CR =  54.37910039349028 , Bitrate 6779.075 kbps
CR =  

In [9]:
# Define the codec and create a VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'H264')
out = cv2.VideoWriter('Adjusted_reconstructed_video.mp4', fourcc, fps, (width, height), isColor=False)  # isColor=False for grayscale video

# Write each frame to the video
for frame in adjust_video_arr:
    out.write(frame)

# Release the VideoWriter object
out.release()

<font size="5">Method 2: Compress each frames using Intra prediction (By taking the average of neiboring pixels) - Additional code</font>

In [15]:
# Read the video
video_path = 'input_video.mp4'
cap = cv2.VideoCapture(video_path)

fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Create a folder to save frames
output_folder7 = 'intra_predicted'
os.makedirs(output_folder7, exist_ok=True)

# Create a folder to save frames
output_folder9 = 'intra_residual'
os.makedirs(output_folder9, exist_ok=True)

# Create a folder to save frames
output_folder10 = 'intra_text'
os.makedirs(output_folder10, exist_ok=True)

# Create a folder to save frames
output_folder11 = 'intra_reconstructed'
os.makedirs(output_folder11, exist_ok=True)

# Set the block size for macroblock search
block_size = 8
frame_counter = 0

while True:
    # Read the current frame
    ret, curr_frame = cap.read()
    if not ret:
        print('Finished the process. Total number of frames =',frame_counter)
        break
    # convert to gray scale
    curr_frame_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)

    # Encoding intra predicted frame
    intra_predicted_result = intra_predict(curr_frame_gray,height,width)
    cv2.imwrite(os.path.join(output_folder7, f'intra_predicted_frame_{frame_counter}.png'), intra_predicted_result)
    output_file_path_intra_pred = os.path.join(output_folder10, f'intra_text_predicted{frame_counter}.txt')
    prob_intra_pred,huffman_mappings_intra_pred,encoded_intra_pred = encode_frames(intra_predicted_result, block_size=8, quantization_matrix = quantization_matrix, output_file_path=output_file_path_intra_pred)

    # Encoding intra residual frame
    residual_intra = cv2.absdiff(curr_frame_gray, intra_predicted_result)
    cv2.imwrite(os.path.join(output_folder9, f'intra_residual_frame_{frame_counter}.png'), residual_intra)
    output_file_path_intra_res = os.path.join(output_folder10, f'intra_text_residual{frame_counter}.txt')
    prob_intra_res,huffman_mappings_intra_res,encoded_intra_res= encode_frames(residual_intra, block_size=8, quantization_matrix = quantization_matrix, output_file_path=output_file_path_intra_res)
            
    # Decoding Transmitted files
    processed_frame_intra_pred = decode_frames(output_file_path_intra_pred,huffman_mappings_intra_pred, block_size, height,width, quantization_matrix)  # Decoded predicted
    processed_frame_intra_res = decode_frames(output_file_path_intra_res,huffman_mappings_intra_res, block_size, height,width, quantization_matrix)  # Decoded residual     
    
    # reconstruct the image
    predicted_intra_frame = intra_predicted_result + processed_frame_intra_res
    # save the reconstructed image
    cv2.imwrite(os.path.join(output_folder11, f'reconstructed_intra_{frame_counter}.png'), predicted_intra_frame)

    # Calculate average length and compression ratio
    average_len_intra_pred = calculate_average_length(prob_intra_pred, huffman_mappings_intra_pred)
    average_len_intra_res = calculate_average_length(prob_intra_res, huffman_mappings_intra_res)
    # calculate compression ratio
    cr_pred = compression_ratio(height, width, average_len_intra_pred)  # Compression ratio for predicted frames
    cr_res = compression_ratio(height, width, average_len_intra_res)  # Compression ratio for residual frames
    # calculate bitrate
    current_bitrate_intra = (len(encoded_intra_pred)+len(encoded_intra_res))*fps

    print(f'Saved frame {frame_counter}, CR_predict = {cr_pred} , CR_residual = {cr_res}, Bitrate {current_bitrate_intra/1000} kbps')

    frame_counter += 1
    
# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()    

Saved frame 0, CR_predict = 43.86730568422181 , CR_residual = 44.864044201988015, Bitrate 16620.35 kbps
Saved frame 1, CR_predict = 43.897864587519734 , CR_residual = 44.90038275676223, Bitrate 16607.85 kbps
Saved frame 2, CR_predict = 43.85712883566962 , CR_residual = 44.89108760179619, Bitrate 16617.35 kbps
Saved frame 3, CR_predict = 43.92898957008244 , CR_residual = 44.97584305304769, Bitrate 16588.125 kbps
Saved frame 4, CR_predict = 43.98323664076596 , CR_residual = 45.00659276261175, Bitrate 16572.175 kbps
Saved frame 5, CR_predict = 44.00358102059089 , CR_residual = 45.00439496044537, Bitrate 16568.7 kbps
Saved frame 6, CR_predict = 43.978776518167926 , CR_residual = 44.99684165234984, Bitrate 16574.8 kbps
Saved frame 7, CR_predict = 44.02184134869432 , CR_residual = 45.02335806540257, Bitrate 16561.775 kbps
Saved frame 8, CR_predict = 43.96042083421529 , CR_residual = 44.959798520000156, Bitrate 16585.05 kbps
Saved frame 9, CR_predict = 43.95871715527571 , CR_residual = 45.010