In [None]:
id_list = [
    "181913649",
    "112126428",
    "87743020",
    "124822590",
    "129242436",
    "438680",
    "217743513",
    "131087935",
    "61579",
    "93895088",
    "4087833",
    "102156234",
    "1035805",
    "101470",
    "91538330",
    "131940431",
    "119139145",
    "155067746",
    "5496396",
    "123999232",
    "27813981",
    "61532",
    "21735",
    "135256802",
    "100777631",
    "226297822",
    "80707627",
    "124055727",
    "148909805",
    "28251713",
    "8072285",
    "61585",
    "252600902",
    "101288",
    "61539",
    "134797956",
    "161865971",
    "6235864",
    "61556",
    "180190441",
    "175540452",
    "110163934",
    "61527",
    "84341851",
    "91545132",
    "61544",
    "14371066",
    "55311130",
    "61582",
    "405658",
    "196652226",
    "61533",
    "16464531",
    "101511",
    "79132341",
    "1509839",
    "195515965",
    "101287",
    "235589",
    "100947",
    "61516",
    "99683372",
    "132769734",
    "14230520",
    "259237855",
    "245898",
    "922147",
    "101910402",
    "101440",
    "61580",
    "101716",
    "40945639",
    "259680",
    "109765",
    "9440985",
    "61581",
    "56225174",
    "163573",
    "12403754",
    "29617627",
    "460541",
    "21604248",
    "195389",
    "1367068",
    "444501",
    "6531067",
    "766986",
    "100955"
]

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import requests
import json
from google.cloud import storage
import os
from dotenv import load_dotenv

load_dotenv()

# Set up Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run Chrome in headless mode

def get_image_path(driver):
    # Find the canva
    canva = driver.find_element(By.CLASS_NAME, "mm-preview")

    # Find the image
    image = canva.find_element(By.TAG_NAME, "img")

    return image.get_attribute("src")

def extract_box_values(props):
    # find the "left" word and get the value
    left = props.find("left")
    left = props[left:].split(": ")[1]
    left = left.split("px")[0]

    # find the "top" word and get the value
    top = props.find("top")
    top = props[top:].split(": ")[1]
    top = top.split("px")[0]

    # find the "width" word and get the value
    width = props.find("width")
    width = props[width:].split(": ")[1]
    width = width.split("px")[0]

    # find the "height" word and get the value
    height = props.find("height")
    height = props[height:].split(": ")[1]
    height = height.split("px")[0]

    return {
        "left": int(left),
        "top": int(top),
        "width": int(width),
        "height": int(height)
    }

def get_boxes(driver):
    # Find the canva
    canva = driver.find_element(By.CLASS_NAME, "mm-preview")

    # find all divs with class "drag-box drag-top off"
    drag_boxes = canva.find_elements(By.CLASS_NAME, "drag-box")
    drag_box_values = []
    for drag_box in drag_boxes:
        style = drag_box.get_attribute("style")
        drag_box_values.append(extract_box_values(style))
    return drag_box_values


def process_pages(url):
    # Create a new instance of the Chrome driver
    driver = webdriver.Chrome(options=chrome_options)

    # Fetch the page
    driver.get(url)

    # Get the image
    image = get_image_path(driver)

    # Get the json
    boxes = get_boxes(driver)

    # Quit the driver
    driver.quit()

    return image, boxes

def upload_image_to_gcs(url, bucket_name, destination_blob_name):
    # Fetch the image from the URL
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"Error fetching image from {url}, status code: {response.status_code}")
    
    # Create a client for Google Cloud Storage
    client = storage.Client()
    
    # Get the bucket where the image will be uploaded
    bucket = client.bucket(bucket_name)
    
    # Create a new blob (file) in the bucket
    blob = bucket.blob(destination_blob_name)
    
    # Upload the image data to Google Cloud Storage
    blob.upload_from_string(response.content, content_type=response.headers['Content-Type'])
    
    print(f"Image successfully uploaded to {bucket_name}/{destination_blob_name}")


def upload_json_to_gcs(json_data, bucket_name, destination_blob_name):
    # Create a client for Google Cloud Storage
    client = storage.Client()
    
    # Get the bucket where the image will be uploaded
    bucket = client.bucket(bucket_name)
    
    # Create a new blob (file) in the bucket
    blob = bucket.blob(destination_blob_name)
    
    # Upload the image data to Google Cloud Storage
    blob.upload_from_string(json.dumps(json_data), content_type="application/json")
    
    print(f"JSON successfully uploaded to {bucket_name}/{destination_blob_name}")


def process_meme(meme_id):
    ROOT_URL = "https://imgflip.com"
    BUCKET_NAME = os.getenv("BUCKET_NAME")
    print("Processing meme with id: " + meme_id)
    meme_url = ROOT_URL + "/memegenerator/" + meme_id
    image_url, boxes = process_pages(meme_url)
    image_path = "imgs/" + meme_id + "." + image_url.split(".")[-1]
    json_path = "json/" + meme_id + ".json"
    upload_image_to_gcs(image_url, BUCKET_NAME, image_path)
    upload_json_to_gcs(boxes, BUCKET_NAME, json_path)


for meme_id in id_list:
    process_meme(meme_id)