In [3]:
# mini project_2023

#import libraries
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
import math
import ffmpeg
from scipy.fftpack import dct, idct
from scipy.signal import correlate2d
from scipy.ndimage import map_coordinates

#functions for the image encoder and decoder-------------------------------------------------------------------------------------------------------------------

#Divide into Marco-blocks function----------------------------------------------
def get_macroblocks(gray_image):
    block_size=8
    height, width = gray_image.shape
    num_blocks_height = height // block_size
    num_blocks_width = width // block_size
    macroblocks = []
    for i in range(num_blocks_height):
        for j in range(num_blocks_width):
            # Extract a macroblock
            macroblock = gray_image[i * block_size: (i + 1) * block_size,
                                     j * block_size: (j + 1) * block_size]
            macroblocks.append(macroblock)
    return macroblocks

#Perform DCT function-----------------------------------------------------------
def dct_run(macroblocks):
    transformed_macroblocks = []
    for macroblock in macroblocks:
        # Apply 2D DCT to each macroblock
        dct_block = cv2.dct(np.float32(macroblock))
        transformed_macroblocks.append(dct_block)
    return transformed_macroblocks

#Perform IDCT function----------------------------------------------------------
def inverse_dct_transform(quantized_macroblocks):
    # Initialize an empty list to store the reconstructed macroblocks
    reconstructed_macroblocks = []
    for quantized_block in quantized_macroblocks:
        reconstructed_block = cv2.idct(np.float32(quantized_block))
        reconstructed_block = np.clip(reconstructed_block, 0, 255)
        reconstructed_macroblocks.append(reconstructed_block)
    return reconstructed_macroblocks

#Quantization or de-quantization function---------------------------------------
def quan_or_dequan(coefficients,level,function,fac_max,fac_min,pq):
  # print("max",fac_max)
  # print("min",fac_min)
  #for quantization>> function=1, for dequantization >>function=2
  if int(level) == 1:
     factor=fac_max
  elif int(level)==2:
     factor=(fac_max+fac_min)/2
  elif int(level)==3:
     factor=fac_min
  else:
    raise ValueError("Invalid quality level")
  
  quantization_matrix_1 = np.array([[32, 22, 20, 32, 48, 80, 102, 122],
                                         [24, 24, 28, 38, 52, 116, 120, 110],
                                         [28, 26, 32, 48, 80, 114, 138, 112],
                                         [28, 34, 44, 58, 102, 174, 160, 124],
                                         [36, 44, 74, 112, 136, 218, 206, 154],
                                         [48, 70, 110, 128, 162, 208, 226, 184],
                                         [98, 128, 156, 174, 206, 242, 240, 202],
                                         [144, 184, 190, 196, 224, 200, 206, 198]])
  
  quantization_matrix=quantization_matrix_1*(factor/32)*pq
  # print("Factor=",factor)

  if function==1:
    result_coefficients = np.round(coefficients / quantization_matrix).astype(int)
  if function==2:
    result_coefficients = coefficients * quantization_matrix
  return result_coefficients 
  
 

#Run-length coding fuction------------------------------------------------------
def generate_rle(quantized_coefficients):
  rle_blocks = []
  for block in quantized_coefficients:
      rle_block = []
      current_value = None
      run_length = 0
      for value in block.flatten():
          if value == current_value:
                run_length += 1
          else:
              if current_value is not None:
                  rle_block.append((current_value, run_length))
              current_value = value
              run_length = 1
      if current_value is not None:
          rle_block.append((current_value, run_length))
      rle_blocks.append(rle_block)
  return rle_blocks

#Write run-length code into text file function----------------------------------
def write_rle_to_text_file(rle_blocks, filename):
    with open(filename, 'w') as f:
        for rle_block in rle_blocks:
            for value, run_length in rle_block:
                f.write(f"{value},{run_length}")
                f.write("\n")

#Zig Zag scan function----------------------------------------------------------
def zigzag_scan(matrix):
    rows, cols = matrix.shape
    result = []
    for i in range(rows + cols - 1):
        if i % 2 == 0:  # moving up
            for j in range(min(i, rows - 1), max(0, i - cols + 1) - 1, -1):
                result.append(matrix[j, i - j])
        else:  # moving down
            for j in range(max(0, i - cols + 1), min(i, rows - 1) + 1):
                result.append(matrix[j, i - j])
    return result

#Read the bit stream------------------------------------------------------------
def split_into_macroblocks(lst):
    macroblocks = []
    current_macroblock = []
    sum=0
    for i in range(0, len(lst), 2):
        value, length = lst[i], lst[i + 1]
        current_macroblock.append((value, length))
        sum+=lst[i+1]
        if sum == 64:
            macroblocks.append(current_macroblock)
            current_macroblock = []
            sum=0
    return macroblocks

#Decode the run length code function--------------------------------------------
def decode_all_macroblocks(run_length_codes):
    decoded_macroblocks = []
    for run_length_code in run_length_codes:
        decoded_macroblock = []
        for value, length in run_length_code:
            decoded_macroblock.extend([value] * length)
        decoded_macroblocks.append(decoded_macroblock)
    return decoded_macroblocks

#Inverse Zigzag scan------------------------------------------------------------
def inv_zigzag(matrix):
  temp=0
  new=np.zeros(64)
  flat_mat=np.array(matrix).flatten()
  indices=[0, 1, 8, 16, 9, 2, 3, 10,
           17, 24, 32, 25, 18, 11, 4, 5,
           12, 19, 26, 33, 40, 48, 41, 34,
           27, 20, 13, 6, 7, 14, 21, 28,
           35, 42, 49, 56, 57, 50, 43, 36,
           29, 22, 15, 23, 30, 37, 44, 51,
           58, 59, 52, 45, 38, 31, 39, 46,
           53, 60, 61, 54, 47, 55, 62, 63]
  for k in range(len(indices)):
    temp=flat_mat[k]
    new[indices[k]]=temp
  new_mat=np.array(new).reshape(8,8)
  return new_mat

#Image reconstruction function--------------------------------------------------
def re_con(idct_blocks,height,width,block_size):
  reconstructed_image = np.zeros((height, width), dtype=np.float32)
  for i, (row, col) in enumerate([(i, j) for i in range(0, height, block_size) for j in range(0, width, block_size)]):
    reconstructed_image[row:row + block_size, col:col + block_size] = idct_blocks[i]
  reconstructed_image = np.clip(reconstructed_image, 0, 255).astype(np.uint8)
  return reconstructed_image

#decimal to binary and binary to decimal function-------------------------------
def decimal_to_binary_9bit(decimal_value):
    if decimal_value >= 0:
        binary_value = bin(decimal_value)[2:].zfill(8)
        binary_representation = '0' + binary_value
    else:
        binary_value = bin(abs(decimal_value))[2:].zfill(8)
        binary_representation = '1' + binary_value
    return binary_representation

def binary_to_decimal_9bit(binary_value):
    sign_bit = int(binary_value[0])
    value_bits = binary_value[1:]
    decimal_value = int(value_bits, 2)
    if sign_bit == 1:
        decimal_value = -decimal_value
    return decimal_value

#Excract frames from video function---------------------------------------------
def excract_frame(video_path):
  video = cv2.VideoCapture(video_path)
  fps = video.get(cv2.CAP_PROP_FPS)
  frame_list=[]
  count=0
  for i in range(10):
    ms = 1000*((1/fps)*i)
    video.set(cv2.CAP_PROP_POS_MSEC, ms)
    ret, frame = video.read()
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    arr=np.array(gray_frame)
    frame_list.append(arr)
    count+=1
  return frame_list

#Main frame compressor function----------------------------------------------------------
def comp(gray_image,target_size,quality):
  pq=1
  for i in range(10**9):
    height,width=gray_image.shape
    marcoBlocks_ori=get_macroblocks(gray_image)
    dct_val=dct_run(marcoBlocks_ori)
    fac_max=(max(abs(np.max(dct_val)),abs(np.min(dct_val))))/255
    min_abs_value = np.min(np.abs(dct_val))
    min_abs_val = 1 if min_abs_value == 0 else min_abs_value
    fac_min=min_abs_val/0.05
    
    
    quan_dct_val=quan_or_dequan(dct_val,quality,1,fac_max,fac_min,pq)
    zig_list=[]
    for i in range(len(quan_dct_val)):
      zig=zigzag_scan(quan_dct_val[i])
      zig_list.append(zig)
    zig_list=np.array(zig_list)
    lis_z=[]
    for i in range(len(zig_list)):
      arr=np.array(zig_list[i]).reshape(8,8)
      lis_z.append(arr)
    zig_rl=generate_rle(lis_z)
    bit_st=[]
    for i in range(len(zig_rl)):
      for j in range(len(zig_rl[i])):
        bin_val_1=decimal_to_binary_9bit(zig_rl[i][j][0])
        bit_st.append(bin_val_1)
        bin_val_2=bin(zig_rl[i][j][1])[2:].zfill(8)
        bit_st.append(bin_val_2)
    binary_string = ''.join(bit_st)
    size=len(binary_string)
    print("Bit stream size=",size)
    if size/1000>target_size:
      pq+=1
      continue
    else:
      break
  return binary_string,fac_max,fac_min,pq,quality

#Transmission___________________________________________________________________

#Main de compressor function-------------------------------------------------------------
def de_comp(bit_stream,height,wdith,fac_max,fac_min,pq,quality):
    decimal_values=[]
    for i in range(0, len(bit_stream), 17):  # 9 bits + 8 bits = 17 bits in each iteration
        # Reading 9 bits
        val1 = bit_stream[i:i+9]
        dec1=binary_to_decimal_9bit(str(val1))
        decimal_values.append(dec1)
        val2 = bit_stream[i+9:i+17]
        dec2=int(val2,2)
        decimal_values.append(dec2)
    result = split_into_macroblocks(decimal_values)
    #Decode
    decoded_macroblocks = decode_all_macroblocks(result)
    #Convert decoded runlength code to 8x8 blocks in zig zag
    lis_zig=[]
    for i in range(len(decoded_macroblocks)):
      arr_zig=np.array(decoded_macroblocks[i]).reshape(8,8)
      lis_zig.append(arr_zig)
    #reconstruct the blocks accorfing to zig zag scan
    lis_zig2=[]
    for j in range(len(lis_zig)):
      arr_zig2=inv_zigzag(lis_zig[j])
      lis_zig2.append(arr_zig2)
    #Apply de quantization in zig zag
    de_con_zig=quan_or_dequan(lis_zig2,quality,2,fac_max,fac_min,pq)
    #Apply IDCT in zig zag
    idct_blocks=inverse_dct_transform(de_con_zig)
    #Image reconstruction in zig zag
    img_re_zig=re_con(idct_blocks,height,wdith,8)
    return img_re_zig
#-------------------------------------------------------------------------------------------------------------------------




In [9]:

#video encoder________________________________________________________________________________________________________

#main functions-------------------------------------------------------------------------------------------------
#motion vector calculate function--------------------------------------------
def calculate_motion_vectors(prev_frame, curr_frame, block_size, search_block):
    height, width = curr_frame.shape
    motion_vectors = np.zeros((height//block_size, width//block_size, 2), dtype=np.int32)
    for i in range(0, height, block_size):
        for j in range(0, width, block_size):
            block_prev = prev_frame[i:i+block_size, j:j+block_size]
            motion_vector = np.array([0, 0])
            step = search_block
            while step >= 1:
                # Ensure that the search area is larger or equal to the block_prev
                search_area = curr_frame[max(0, i-motion_vector[0]-step):min(height, i-motion_vector[0]+step+block_size),
                                            max(0, j-motion_vector[1]-step):min(width, j-motion_vector[1]+step+block_size)]
                block_search = block_prev[:search_area.shape[0], :search_area.shape[1]]
                # cross-correlation
                correlation_map = correlate2d(search_area, block_search, mode='valid')
                # Find the maximum correlation
                max_corr_location = np.unravel_index(np.argmax(correlation_map), correlation_map.shape)
                # Update the motion vector
                motion_vector += np.subtract(max_corr_location, (i-motion_vector[0]-step, j-motion_vector[1]-step))
                # Reduce the step size
                step //= 2
            # Store the motion vector
            motion_vectors[i//block_size, j//block_size] = motion_vector
    return motion_vectors

#calculate residual frame function-------------------------------------------------------
def residual_frames_build(previous_frame, current_frame, block_size, search_range):
    motion_vectors = calculate_motion_vectors(previous_frame, current_frame, block_size, search_range)

    height, width = current_frame.shape
    residual_frame = np.zeros((height, width), dtype=np.int32)
    for i in range(0, height, block_size):
        for j in range(0, width, block_size):
            motion_vector = motion_vectors[i//block_size, j//block_size]
            # find motion-compensated block from the previous frame
            mv_x, mv_y = motion_vector
            # calculate the new position based on the motion vector
            new_i, new_j = int(i + mv_y), int(j + mv_x)
            # check that the new position is within the frame boundaries
            new_i = np.clip(new_i, 0, previous_frame.shape[0] - block_size)
            new_j = np.clip(new_j, 0, previous_frame.shape[1] - block_size)
            # Extract the block
            interpolated_block = map_coordinates(previous_frame, np.meshgrid(np.arange(new_i, new_i + block_size),
                                                                    np.arange(new_j, new_j + block_size)),
                                                  order=1, mode='nearest')
            # Subtract the motion-compensated block from the current frame block
            residual_frame[i:i+block_size, j:j+block_size] = current_frame[i:i+block_size, j:j+block_size] - interpolated_block
    return residual_frame

#motion compensation prediction function---------------------------------------------------
def motion_prediction(base_frame, motion_vectors, residual_frame, block_size):
    height, width = base_frame.shape
    predicted_frame = np.zeros_like(base_frame, dtype=np.int32)

    for i in range(0, height, block_size):
        for j in range(0, width, block_size):
            motion_vector = motion_vectors[i//block_size, j//block_size]
            # find the motion vector components
            mv_x, mv_y = motion_vector
            # Calculate the new position based on the motion vector
            new_i, new_j = int(i + mv_y), int(j + mv_x)
            # check that the new position is within the frame boundaries
            new_i = np.clip(new_i, 0, base_frame.shape[0] - block_size)
            new_j = np.clip(new_j, 0, base_frame.shape[1] - block_size)
            # extract the block
            interpolated_block = map_coordinates(base_frame, np.meshgrid(np.arange(new_i, new_i + block_size),
                                                                    np.arange(new_j, new_j + block_size)),
                                                  order=1, mode='nearest')
            # Add the motion-compensated block to the residual frame
            predicted_frame[i:i+block_size, j:j+block_size] = interpolated_block + residual_frame[i:i+block_size, j:j+block_size]
    return predicted_frame

#regenerate the video from decoded frames function-------------------------------------------------------
def generate_video(output_frame_folder, output_video_path, num_frames, frames_per_second):  
    frame_list = []   
    # Load frames 
    for i in range(num_frames):
        frame_path = os.path.join(output_frame_folder, f"{i+1}_frame.png")
        frame = cv2.imread(frame_path, cv2.IMREAD_GRAYSCALE)
        frame_list.append(frame)         
    # Check if frames are available
    if not frame_list:
        print("No frames found for video creation.")
        return
    height, width = frame_list[0].shape[:2]
    # Create VideoWriter object
    #use 'H264' for build mp4 file  
    video_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'H264'), frames_per_second, (width, height))   
    # Check if VideoWriter is successfully opened
    if not video_writer.isOpened():
        print("Error opening VideoWriter. Check codec and filename.")
        return
    # Write frames
    for frame in frame_list:
        video_writer.write(frame)
    # Release the VideoWriter object
    video_writer.release()

#___________________________________________________________________________________________________________________________
print("Enter the bit rate. Type 'none' if not specified.")
bitrate=input("Enter the bitrate= ")
if bitrate.lower() == "none":
    bit_rate=float('inf')
else:
    bit_rate=int(bitrate)

print("Enter the quality level.Enter '1' for High,'2'for Middle,'3'for Low quality.")
quality_level=int(input("Enter the quality leve= "))


#open video
vid_path = r'D:\Downloads\Mini project\Foreman.mp4'
cap = cv2.VideoCapture(vid_path)
fps = cap.get(cv2.CAP_PROP_FPS)
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

#create directory
text_file=r"D:\Downloads\Mini project\text_files"
os.makedirs(text_file, exist_ok=True)
output_frame_directory=r"D:\Downloads\Mini project\decoded_frames"
os.makedirs(output_frame_directory, exist_ok=True)
motion_vector=r"D:\Downloads\Mini project\motion_vector"
os.makedirs(motion_vector, exist_ok=True)

#initiate parameters
block_size = 16
f_count = 0
orig_frame_interval = 5
desired_frame_limit = 300
search_range = 8

#calculate frames, encode, decode and predict frames
while True:
  ret, cur_frame = cap.read()
  if not ret or f_count>=desired_frame_limit:
    break
  cur_Iframe_gray = cv2.cvtColor(cur_frame, cv2.COLOR_BGR2GRAY)
  f_count+=1
  
  height, width =  cur_Iframe_gray.shape[:2]
  #compress
  cmp_ref,fac_max,fac_min,pq,quality=comp(cur_Iframe_gray,bit_rate,quality_level)
 
  #write text file
  text_path = os.path.join(text_file, f"{f_count}_reference_frame.txt")
  with open(text_path, "w") as file:
      file.write(cmp_ref)
  #decompress
  decmp_ref=de_comp(cmp_ref,height,width,fac_max,fac_min,pq,quality)
  ref_frame=decmp_ref
  cur_frame_gray=cur_Iframe_gray

  #save the frame
  frame_path = os.path.join(output_frame_directory, f"{f_count}_frame.png")
  cv2.imwrite(frame_path, ref_frame)
  for _ in range(orig_frame_interval):    
      previous_frame = cur_frame_gray
      ret, cur_frame = cap.read()
      if not ret or f_count>=desired_frame_limit:
        break
      cur_frame_gray= cv2.cvtColor(cur_frame, cv2.COLOR_BGR2GRAY)
      #motion vectors
      mvectors_fr = calculate_motion_vectors(previous_frame,cur_frame_gray, block_size, search_range)

      #write text file
      flat_mv=mvectors_fr.flatten()
      m_vector = os.path.join(motion_vector, f"{f_count}_frame_motion_vector.txt")
      np.savetxt(m_vector, flat_mv, fmt='%d', delimiter=',')
   
      #get residuals    
      residual_image =residual_frames_build(previous_frame,cur_frame_gray, block_size, search_range)
      f_count += 1
      #compress the residual
      cmp_residual,fac_max,fac_min,pq,quality=comp(residual_image,bit_rate,quality_level)

      #write compressd bit stream to text file
      text_path = os.path.join(text_file, f"{f_count}_residual_frame.txt")
      with open(text_path, "w") as file:
        file.write(cmp_residual)
       
      # decompress the residual
      decmp_residual=de_comp(cmp_residual,height,width,fac_max,fac_min,pq,quality)

      #get the predicted frame for reconstruct the video
      predicted_frame=motion_prediction(ref_frame,mvectors_fr,decmp_residual,block_size)
     
      #save the frames
      frame_path = os.path.join(output_frame_directory, f"{f_count}_frame.png")
      cv2.imwrite(frame_path, predicted_frame)    
cap.release()

#reconstruct and save the decoded video
output_video="output_video.mp4"
output_frame_directory=r"D:\Downloads\Mini project\decoded_frames"
os.makedirs(output_frame_directory, exist_ok=True)

output_video=generate_video(output_frame_directory,output_video,f_count,fps)



Enter the bit rate. Type 'none' if not specified.


Enter the quality level.Enter '1' for High,'2'for Middle,'3'for Low quality.
Bit stream size= 1695971
Bit stream size= 6074848
Bit stream size= 6071261
Bit stream size= 6095469
Bit stream size= 6071516
Bit stream size= 6109800
Bit stream size= 1659931
Bit stream size= 6083178
Bit stream size= 6115597
Bit stream size= 6102915
Bit stream size= 6082685
Bit stream size= 6087955
Bit stream size= 1660424
Bit stream size= 6086017
Bit stream size= 6083688
Bit stream size= 6080611
Bit stream size= 6082345
Bit stream size= 6072842
Bit stream size= 1654678
Bit stream size= 6112367
Bit stream size= 6092137
Bit stream size= 6130608
Bit stream size= 6142814
Bit stream size= 6108185
Bit stream size= 1629926
Bit stream size= 6032841
Bit stream size= 5981382
Bit stream size= 5976010
Bit stream size= 6025701
Bit stream size= 6031532
Bit stream size= 1638341
Bit stream size= 5988998
Bit stream size= 6044180
Bit stream size= 6018289
Bit stream size= 6006984
Bit stream size= 6021383
Bit stream size= 166615