# Downloading item images from their scraped URLs

This notebook is intended for Colab, using its integration with Drive to save each image file as an RGB matrix for later use in the neural network.

The SOLD item images portion is intended for the CSV file obtained after scraping the Selling Hub of your shop, for use in training the neural network.

The LISTED item images portion is inteded for the CSV file obtained after scraping your shop's listed items that haven't sold yet, for use on the trained neural network.

In [2]:
import requests
import pandas as pd
import time
import random as rand
import numpy as np
from PIL import Image
import io

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Run for SOLD item images:

In [None]:
def get_sold_image(image_url, index):
    try:
        # Make a request to fetch the image
        response = requests.get(image_url)

        # Check if the request was successful
        if response.status_code == 200:
            # Convert image to rgb matrix
            img = Image.open(io.BytesIO(response.content)).convert('RGB')
            img_resized = img.resize((128, 128), Image.LANCZOS)

            # Convert PIL Image to numpy array
            rgb_matrix = np.array(img_resized)

            # Path to save the RGB matrix in Google Drive
            save_path = '/content/drive/MyDrive/depop_images/depop_image_'+ str(index) +'.npy'

            # Save the RGB matrix as a NumPy (.npy) file
            np.save(save_path, rgb_matrix)

        else:
            print(f"Failed to retrieve image from URL: {image_url}")

    except Exception as e:
        print(f"Error saving image from URL {image_url}: {e}")

In [None]:
# Read CSV file
df = pd.read_csv('depop_item_details_complete.csv')
df.head()

In [2]:
# Run loop to save each image corresponding to the links in dataframe
for index, image_url in enumerate(df['Image']):
    # Call function get_sold_image()
    get_sold_image(image_url, index)

    # Invoke time delay so Depop doesn't flag us
    time.sleep(rand.uniform(2, 5))

## Run for downloading LISTED item images:

In [4]:
def get_listed_image(image_url, index):
    try:
        # Make a request to fetch the image
        response = requests.get(image_url)

        # Check if the request was successful
        if response.status_code == 200:
            # Convert image to rgb matrix
            img = Image.open(io.BytesIO(response.content)).convert('RGB')
            img_resized = img.resize((128, 128), Image.LANCZOS)

            # Convert PIL Image to numpy array
            rgb_matrix = np.array(img_resized)

            # Path to save the RGB matrix in Google Drive
            save_path = '/content/drive/MyDrive/depop_images/depop_listed_images/depop_listed_'+ str(index) +'.npy'

            # Save the RGB matrix as a NumPy (.npy) file
            np.save(save_path, rgb_matrix)

        else:
            print(f"Failed to retrieve image from URL: {image_url}")

    except Exception as e:
        print(f"Error saving image from URL {image_url}: {e}")

In [6]:
# Read CSV file
df = pd.read_csv('depop_listed_items.csv')
df.head()

Unnamed: 0,Image URL,Price,Size,Brand,Description
0,https://media-photos.depop.com/b1/15860578/203...,$30.00,Size S,Bebe,🌫️ Vintage bebe cami 🌫️\n\n▫️ Love that the wa...
1,https://media-photos.depop.com/b1/15860578/203...,$17.00,Size M,Derek Heart,🌫️ Striped maxi dress 🌫️\n\n▫️ Soft thin knit ...
2,https://media-photos.depop.com/b1/15860578/203...,$25.00,Size L,Other,🌫️ Lightweight baby blue jacket 🌫️\n\n▫️ Baby ...
3,https://media-photos.depop.com/b1/15860578/203...,$20.00,Size 4,Banana Republic,🌫️ Vintage wool midi skirt 🌫️\n\n▫️ Gorge brow...
4,https://media-photos.depop.com/b1/15860578/203...,$27.00,Size 6,Banana Republic,🌫️ Hot pink baggy slacks 🌫️\n\n▫️ Garment is 1...


In [7]:
# Run loop to save image corresponding to each link in dataframe
for index, image_url in enumerate(df['Image URL']):
    # Call function get_listed_image()
    get_listed_image(image_url, index)

    # Invoke delay to appease the Depop servers
    time.sleep(rand.uniform(4, 7))