In [1]:
import os
import json
import time
import requests
import numpy as np
from PIL import Image
from io import BytesIO
import warnings
warnings.filterwarnings("ignore")


In [2]:
def load_image_from_url(url):
    response = requests.get(url, verify=False)
    image_extension = None
    
    # Get the content type from the response headers
    content_type = response.headers.get('Content-Type')
    
    # Determine the image format from the content type
    if content_type == 'image/jpeg':
        image_extension = '.jpg'
    elif content_type == 'image/png':
        image_extension = '.png'
    elif content_type == 'image/gif':
        image_extension = '.gif'
    elif content_type == 'image/webp':
        image_extension = '.webp'
    # Add more conditions for other image formats if needed
    
    # If image format is not determined, default to '.jpg'
    if not image_extension:
        image_extension = '.jpg'
    
    # Open the image from the response content
    image = Image.open(BytesIO(response.content))
    
    return image, image_extension


def detect_white_lines(image):
    # Convert the image to grayscale
    gray = image.convert('L')
    
    # Threshold the grayscale image to obtain a binary mask of the white lines
    binary_mask = (np.array(gray) == 0) | (np.array(gray) == 1) | (np.array(gray) == 255)
    
    # Detect white lines in rows
    white_lines_rows = np.all(binary_mask, axis=1)
    
    # Find the indices of white lines in rows
    white_lines_row_indices = np.where(white_lines_rows)[0]
    
    return white_lines_row_indices

def remove_rows(image, row_indices_to_remove):
    # Convert the image to a NumPy array
    image_array = np.array(image)
    
    # Create a mask for the rows to keep
    mask = np.ones(image_array.shape[0], dtype=bool)
    mask[row_indices_to_remove] = False
    
    # Remove the specified rows
    cropped_image_array = image_array[mask]
    
    # Convert the cropped image array back to a PIL image
    cropped_image = Image.fromarray(cropped_image_array)
    
    return cropped_image

In [3]:
def remove_special_characters(title_name):
    # Filter out non-alphanumeric characters and spaces
    cleaned_title_name = ''.join(char for char in title_name if char.isalnum() or char.isspace())
    return cleaned_title_name


def process_image(image_url, title_name, bucket_folder_path):
    # Load image from URL
    image, image_ext = load_image_from_url(image_url)
    
    # Detect and remove white lines
    row_indices = detect_white_lines(image)
    cropped_image = remove_rows(image, row_indices)
    
    # Save the cropped image
    title_name = remove_special_characters(title_name)
    save_path = os.path.join(bucket_folder_path, f"{title_name}{image_ext}")
    cropped_image.save(save_path)

def process_images(images_data, images_folder):
    if not os.path.exists(images_folder):
        os.makedirs(images_folder, exist_ok=True)
    
    for bucket_name, bucket_content in images_data.items():
        print(f"Bucket Name: {bucket_name}\t| Processing {len(bucket_content)} contents poster images...")
        bucket_folder_path = os.path.join(images_folder, bucket_name)
        os.makedirs(bucket_folder_path, exist_ok=True)
        
        for title_name, title_poster_url in bucket_content.items():
            process_image(title_poster_url, title_name, bucket_folder_path)

        time.sleep(20)


In [4]:
if __name__ == "__main__":
    # Example usage:
    images_folder = "./vertical_poster_images"
    if not os.path.exists(images_folder):
        os.makedirs(images_folder)
    with open("buckets_contents_image_urls.json", "r") as f:
        bucket_contents_data = json.load(f)

    process_images(bucket_contents_data, images_folder)


Bucket Name: Popular in Crime	| Processing 43 contents poster images...
Bucket Name: Popular in Comedy	| Processing 47 contents poster images...
Bucket Name: Popular in Biopic	| Processing 25 contents poster images...
Bucket Name: Top Rated on IMDb	| Processing 40 contents poster images...
Bucket Name: Thriller Movies	| Processing 39 contents poster images...
Bucket Name: Comedy Movies	| Processing 40 contents poster images...
Bucket Name: Latest Releases	| Processing 31 contents poster images...


In [5]:
# title_poster_url = "https://img10.hotstar.com/image/upload/sources/r1/cms/prod/9446/1711972319446-v"
# title_name = "extraordinary birder with christian cooper"
# images_folder = "/Users/rosenta/Downloads/"
# process_image(title_poster_url, title_name, images_folder)

In [6]:
import os

In [11]:
for i in os.listdir(images_folder):
    s = i.replace(" ", '_')
    print(f"zip -r zipped_folder/{s}.zip '{i}'")

zip -r zipped_folder/Popular_in_Thriller.zip 'Popular in Thriller'
zip -r zipped_folder/Latest_Releases.zip 'Latest Releases'
zip -r zipped_folder/Popular_Movies.zip 'Popular Movies'
zip -r zipped_folder/Popular_in_Biopic.zip 'Popular in Biopic'
zip -r zipped_folder/.DS_Store.zip '.DS_Store'
zip -r zipped_folder/Popular_in_Crime.zip 'Popular in Crime'
zip -r zipped_folder/Popular_in_Romance.zip 'Popular in Romance'
zip -r zipped_folder/Popular_in_Action.zip 'Popular in Action'
zip -r zipped_folder/Free_-_Newly_Added.zip 'Free - Newly Added'
zip -r zipped_folder/Thriller_Movies.zip 'Thriller Movies'
zip -r zipped_folder/Comedy_Movies.zip 'Comedy Movies'
zip -r zipped_folder/Critically_Acclaimed_Movies.zip 'Critically Acclaimed Movies'
zip -r zipped_folder/Exclusive_Indian_Movies.zip 'Exclusive Indian Movies'
zip -r zipped_folder/Hand-Picked_For_Couples.zip 'Hand-Picked For Couples'
zip -r zipped_folder/Watch_with_Friends.zip 'Watch with Friends'
zip -r zipped_folder/Popular_Shows.zip 'P