In [None]:

import requests
from bs4 import BeautifulSoup
from pprint import pprint
import os
import urllib.request
from tqdm import tqdm
from fastai.vision.utils import download_images
from fastai.basics import Path
from fastbook import search_images_ddg

In [None]:

def scrape_website(website_link):
    """
    Scrapes a website for image links.
    
    Args:
        website_link (str): The link to the website to be scraped.
        
    Returns:
        list: A list of image links found on the website.
    """
    
    
    response = requests.get(website_link)
    soup = BeautifulSoup(response.content, "html.parser")
    elements = soup.find_all(class_="section pt-4")

    
    picture_elements = [element.find_all("source") for element in elements]

    thumbnail_links = [source['srcset'] for element in picture_elements for source in element if source.has_attr('srcset')]
    image_links = []
    
    for link in thumbnail_links:
        image_links.append(link.replace('thumbnail', 'image'))
    
    return image_links





In [None]:

# List of website links
website_base_link = 'https://www.kickresume.com/en/help-center/resume-samples/?page='
links = []
# Loop through the website links
for i  in tqdm(range(1, 110)):
    # Call the scrape_website function for each link
    website_link = website_base_link + str(i)
    links.extend(scrape_website(website_link))
    
len(links)


In [None]:


# Create the folder if it doesn't exist
folder_path = "image/resume"
os.makedirs(folder_path, exist_ok=True)

# Download and save the images
for i, link in tqdm(enumerate(links)):
    image_path = os.path.join(folder_path, f"image_{i}.jpg")
    urllib.request.urlretrieve(link, image_path)


In [None]:

classes = ['cover_letter','documents','invitation','recomandation_letter','poster','ppt','notes','report','statement','pics']

# Create the folder if it doesn't exist
folder_path = "image/notresume"
os.makedirs(folder_path, exist_ok=True)

from urllib.error import HTTPError



for class_name in tqdm(classes):
    urls = search_images_ddg(class_name, max_images=250)
    
    try:
        download_images(urls =urls, dest=folder_path)
    except Exception as e:
        pass

In [None]:
import os

def count_images_in_folder(folder_path):
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp']
    count = 0

    for file_name in os.listdir(folder_path):
        if any(file_name.lower().endswith(ext) for ext in image_extensions):
            count += 1

    return count

# Specify the folder path
folder_path = "image/resume"

# Call the function to count the images
image_count = count_images_in_folder(folder_path)
print(f"There are {image_count} images in the folder.")
