In [17]:
import urllib.parse
import csv
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Website URL (replace this with the actual URL of the website)
base_url = 'https://www.deviantart.com/?topic=aliens&page='
start_page = 1
end_page = 10  # Adjust this to the desired end page

image_urls = set()

# Set up the WebDriver (assuming ChromeDriver is in your system's PATH)
driver = webdriver.Chrome()

def fetch_data_from_page(page_number):
    url = f"{base_url}{page_number}"
    driver.get(url)
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'img')))
    
    print(f"Scraping {url}")
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    image_tags = soup.find_all('img')
    
    # Extract image URLs
    for img in image_tags:
        img_url = img.get('src')
        if img_url and not img_url.startswith('data:image'):
            img_url = urllib.parse.urljoin(base_url, img_url)
            image_urls.add(img_url)

# Loop through the desired number of pages
for i in range(start_page, end_page + 1):
    fetch_data_from_page(i)

driver.quit()

# Save unique image URLs to a CSV file
with open('image_urls.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Image URL'])
    for url in image_urls:
        writer.writerow([url])

print("Unique Image URLs saved to image_urls.csv")



Scraping https://www.deviantart.com/?topic=aliens&page=9
Scraping https://www.deviantart.com/?topic=aliens&page=10
Unique Image URLs saved to image_urls.csv


In [18]:
import csv
import os
import requests
from urllib.parse import urlparse

# Path to the CSV file containing image URLs
csv_file_path = 'image_urls.csv'

# Directory to save downloaded images
download_directory = 'alien_images'

# Create the download directory if it doesn't exist
os.makedirs(download_directory, exist_ok=True)

# Function to download an image from a URL
def download_image(url, file_path):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            with open(file_path, 'wb') as file:
                file.write(response.content)
            print(f"Downloaded: {file_path}")
        else:
            print(f"Failed to download: {url}")
    except Exception as e:
        print(f"Error downloading {url}: {e}")

# Open the CSV file and download images
with open(csv_file_path, mode='r', newline='') as file:
    csv_reader = csv.reader(file)
    next(csv_reader)  # Skip the header row
    for row in csv_reader:
        image_url = row[0]
        file_name = os.path.basename(urlparse(image_url).path)
        file_path = os.path.join(download_directory, file_name)
        if not os.path.exists(file_path):
            download_image(image_url, file_path)
        else:
            print(f"Skipping already downloaded image: {file_name}")


Downloaded: alien_images\artbymorgancmorgan.jpg
Downloaded: alien_images\justinchauveaudesign.png
Downloaded: alien_images\hf_concept_1_by_apolonster_dgs7f2c-250t.jpg
Downloaded: alien_images\hop_in_by_nativestew_dgowf2b-300w.jpg
Downloaded: alien_images\nephilim__sketch__by_n3o_a1pha_dh0ldn4-250t.jpg
Downloaded: alien_images\fracfx.gif
Downloaded: alien_images\instakatharsis.png
Downloaded: alien_images\unholyslurry.jpg
Downloaded: alien_images\rosesstreet.jpg
Downloaded: alien_images\yimumu.png
Downloaded: alien_images\qaichang.png
Downloaded: alien_images\sternenvermaechtnis.jpg
Downloaded: alien_images\alien___humanoid_couple_01_by_nagatedesign_dgxsur3-200h.jpg
Downloaded: alien_images\my_biomechanical_the_punisher__portrait__by_toothcheek_dgxfkp0-200h.jpg
Downloaded: alien_images\meet_the_mammoth_queen_by_skywardpanda_dgy2mvo-250t.jpg
Downloaded: alien_images\d-the-darkie.jpg
Downloaded: alien_images\alien_attack_begins_by_neantai_dgwqijh-200h.jpg
Downloaded: alien_images\jerod64.

Downloaded: alien_images\great_wars_of_nebila_prime_by_du8prime_dgq57qi-250t.jpg
Downloaded: alien_images\anoty07.jpg
Downloaded: alien_images\alien_in_a_dark_spaceship_by_epicbackdropcreation_dgua6pi-375w.png
Downloaded: alien_images\cybermaelys.jpg
Downloaded: alien_images\02144_by_lordsorril_dgvp3pk-250t.jpg
Downloaded: alien_images\mandalorian_jedi_by_thauri_dh0j0bv-250t.jpg
Downloaded: alien_images\nativestew.jpg
Downloaded: alien_images\msmaldesignsai.png
Downloaded: alien_images\molten22.jpg
Downloaded: alien_images\jeninedufo.jpg
Downloaded: alien_images\christian-cline.jpg
Downloaded: alien_images\kosu811.jpg
Downloaded: alien_images\imogenarium.jpg
Downloaded: alien_images\dimuzart.jpg
Downloaded: alien_images\octoalien_by_gener8ai_dgrm2to-300w.jpg
Downloaded: alien_images\xfileaddict.jpg
Downloaded: alien_images\alien_pov_by_neantai_dgv2wbg-200h.jpg
Downloaded: alien_images\metal_cyborg_by_blackjach23567985611_dgvaacy-250t.png
Downloaded: alien_images\ambush_by_jano1705_dgz6

Downloaded: alien_images\most_dangerous_species_in_dune_universe_by_feather4100_dgzh6vh-250t.jpg
Downloaded: alien_images\colony_ship_by_wez69_dgyuw1j-250t.jpg
Downloaded: alien_images\invasion_eclipse____saucers_hovering_in_cosmic_sha_by_xfileaddict_dgsh5w0-350t.jpg
Downloaded: alien_images\valentine-ai.jpg
Downloaded: alien_images\4063354053_by_futurerender_dgrig7g-250t.jpg
Downloaded: alien_images\cyberpunkish_ninja_by_eclipsepheonix_dgzlv3n-250t.jpg
Downloaded: alien_images\hackers_by_inkvision17_dgxgcd5-300w.jpg
Downloaded: alien_images\cell_dweller_by_turbaturtle_dh0kqba-350t.jpg
Downloaded: alien_images\sleepy_alien_bed_by_lambdogcomics_dgv2szc-400t.jpg
Downloaded: alien_images\asymoney.jpg
Downloaded: alien_images\ai_astronaut_on_the_moon__by_gazithecreator_dgwrkbr-250t.jpg
Downloaded: alien_images\skywardpanda.jpg
Downloaded: alien_images\untitled_by_anoty07_dgq4vnk-300w.jpg
Downloaded: alien_images\skchaturesh.jpg
Downloaded: alien_images\space_station_codename__angel_three_b

Downloaded: alien_images\queencreative.jpg
Downloaded: alien_images\cx24_by_minellium_dgr5qnr-300w.jpg
Downloaded: alien_images\mudflats_02_by_drman_dgut545-250t.jpg
Downloaded: alien_images\the_elegy_of_the_replicant_4_by_msmaldesignsai_dgwelus-250t.jpg
Downloaded: alien_images\dreamshaper_v6_concept_art_fucxking_hostile_1_by_unmelody_dgrvjyh-414w.jpg
Downloaded: alien_images\alien_landscape_by_doffdog_dgy515o-250t.jpg
Downloaded: alien_images\ka-pow96.jpg
Downloaded: alien_images\beebeerockz69.jpg
Downloaded: alien_images\ema-king.jpg
Downloaded: alien_images\space_race_by_alien236_dgyb6zm-250t.jpg
Downloaded: alien_images\futurerender.jpg
Downloaded: alien_images\sol_art_by_solorionbrando_dgqrurw-250t.jpg
Downloaded: alien_images\starbound_protector_by_heartyjessman_dgrzam8-350t.jpg
Downloaded: alien_images\xenomorpho_by_keehunter_dgu6d6m-250t.jpg
Downloaded: alien_images\sith_by_thauri_dgxq9oy-250t.jpg
Downloaded: alien_images\inkimagine.jpg
Downloaded: alien_images\order_and_chaos

In [16]:
# import urllib.parse
# import csv
# from bs4 import BeautifulSoup
# import requests
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.support.ui import WebDriverWait
# from selenium.webdriver.support import expected_conditions as EC

# # Website URL (replace this with the actual URL of the website)
# base_url = 'https://www.deviantart.com/?topic=character-design&cursor=MTQwYWI2MjA9MSY1OTBhY2FkMD0wJmQxNzRiNmJjPU4lMkZB&page='
# start_page = 9
# end_page =  10   # Adjust this to the desired end page

# image_urls = set()

# # Set up the WebDriver (assuming ChromeDriver is in your system's PATH)
# driver = webdriver.Chrome()

# def fetch_data_from_page(page_number):
#     url = f"{base_url}{page_number}"
#     driver.get(url)
#     WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'img')))
    
#     print(f"Scraping {url}")
#     soup = BeautifulSoup(driver.page_source, 'html.parser')
#     image_tags = soup.find_all('img')
    
#     # Extract image URLs
#     for img in image_tags:
#         img_url = img.get('src')
#         if img_url and not img_url.startswith('data:image'):
#             img_url = urllib.parse.urljoin(base_url, img_url)
#             image_urls.add(img_url)

# # Loop through the desired number of pages
# for i in range(start_page, end_page + 1):
#     fetch_data_from_page(i)

# driver.quit()

# # Save unique image URLs to a CSV file
# with open('images_urls.csv', 'w', newline='') as file:
#     writer = csv.writer(file)
#     writer.writerow(['Image URL'])
#     for url in image_urls:
#         writer.writerow([url])

# print("Unique Image URLs saved to image_urls.csv")


Scraping https://www.deviantart.com/?topic=character-design&cursor=MTQwYWI2MjA9MSY1OTBhY2FkMD0wJmQxNzRiNmJjPU4lMkZB&page=9
Scraping https://www.deviantart.com/?topic=character-design&cursor=MTQwYWI2MjA9MSY1OTBhY2FkMD0wJmQxNzRiNmJjPU4lMkZB&page=10
Unique Image URLs saved to image_urls.csv
