In [40]:
import pandas as pd
import requests
import urllib.request 
from PIL import Image 
from io import BytesIO
from bs4 import BeautifulSoup
import os

In [None]:
# trade promo
trade_promo = pd.read_excel('data/Trade Promo Brief Template - WK2 Catch it quick.xlsx', sheet_name= 'EDM Modules', skiprows=6)
# dropping first col since its empty
trade_promo = trade_promo.drop(trade_promo.columns[0], axis= 1)
# remove whitespace from column names
trade_promo.columns = trade_promo.columns.str.replace(' ', '')
# drop rows where module size is null - indicating modul name is empty
trade_promo = trade_promo[trade_promo['ModuleSize'].notnull()]
# retain imp cols
retain_cols = [i for i in trade_promo.columns if 'priority' in i.lower() or 'imagelink' in i.lower()]
trade_promo = trade_promo.loc[:,retain_cols]
trade_promo.columns.values[0] = 'module'
trade_promo_pivot = trade_promo.melt(id_vars= 'module', var_name= 'Image', value_name='URL')
trade_promo_pivot = trade_promo_pivot[trade_promo_pivot['URL'].str.startswith('h') & trade_promo_pivot['URL'].notnull()]
trade_promo_pivot.head(100) 

In [None]:
# Function to download an image
def download_image(url, folder_name, image_counters):
    headers = {
        'Cache-Control': 'no-cache',
        'Pragma': 'no-cache'
    }
    response = requests.get(url, headers = headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    img_tag = soup.find('img', {'alt':'main product photo'})
    try:
        img_url = img_tag['src']
        img_response = requests.get(img_url)
        if img_response.status_code == 200:
           # Create the folder if it does not exist
            os.makedirs(f'Trade Sales/{folder_name}', exist_ok=True)
           
           # Initialize the image counter for the folder if not already set
            if folder_name not in image_counters:
                image_counters[folder_name] = 1  # Start counting from 1
           
           # Get the current image counter for the folder
            img_counter = image_counters[folder_name]
            img_path = os.path.join('Trade Sales', folder_name, f'image_{img_counter}.jpg')
           
            with open(img_path, 'wb') as f:
                f.write(img_response.content)
            print(f'Downloaded {img_path} from {url}')
           # Increment the image counter for the folder
            image_counters[folder_name] += 1
            return img_url
        else:
            print(f'Failed to download image from {url}')
    except TypeError:
        if url[0] == 'O' or url[0] == 'W':
            print(f'{folder_name}: {url} is a server file path, please access manually')
        else:
            print(f'Likely URL gave 404. Please check {folder_name}: {url}')
        pass

# Initialize dictionary to store image counters per folder
image_counters = {}
src_url = []
# Loop through the DataFrame and download each image
for index, row in trade_promo_pivot.iterrows():
    folder_name = row['module']
    url = row['URL']
    url_store = download_image(url.strip(), folder_name.strip(), image_counters)
    src_url.append(url_store)

In [43]:
# output the image sources
trade_promo_pivot['img_src'] = src_url
trade_promo_pivot = trade_promo_pivot.drop('URL', axis=1)
trade_promo_pvt2 = trade_promo_pivot.pivot(index= 'module', columns= 'Image', values='img_src')
trade_promo_pvt2.to_csv('output.csv', sep = ',')