In [None]:
import os
import csv
from bs4 import BeautifulSoup
import requests

# Base directory path where you want to save everything
base_directory = r"/content/Untitled Folder"  # Update this to your desired path

# Create directories if they don't exist
category = 'picks'
csv_file_name = f'{category}_post_urls.csv'

csv_directory = base_directory
os.makedirs(csv_directory, exist_ok=True)

# CSV file setup
csv_file_name = os.path.join(csv_directory, csv_file_name)

# Headers for the request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
}

# Function to fetch and process data from the webpage
def fetch_data_from_page(url):
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all divs with class "other other--has-3-columns"
        div_tags = soup.find_all('div', class_='other other--has-3-columns')

        for div_tag in div_tags:
            # Find all a tags within the div
            a_tags = div_tag.find_all('a', href=True)

            with open(csv_file_name, mode='a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)

                for a_tag in a_tags:
                    # Extract the URL from the a tag
                    post_url = a_tag['href']

                    # Extract the image URL from the link tag within the a tag
                    link_tag = a_tag.find('link', itemprop='image')
                    image_url = link_tag['href'] if link_tag else None

                    # Write post URL and image URL to CSV
                    writer.writerow([post_url, image_url])
                    print(f"Post URL: {post_url}, Image URL: {image_url} written to CSV.")

                    # Extract metadata from the post URL
                    post_response = requests.get(post_url, headers=headers)
                    if post_response.status_code == 200:
                        post_soup = BeautifulSoup(post_response.text, 'html.parser')

                        # Extract view count, download count, like count
                        stats_div = post_soup.find('div', class_='stats')
                        view_count = stats_div.find('dt', string='# of views').find_next('dd').get_text(strip=True) if stats_div and stats_div.find('dt', string='# of views') else None
                        download_count = stats_div.find('dt', string='# of downloads').find_next('dd').get_text(strip=True) if stats_div and stats_div.find('dt', string='# of downloads') else None
                        like_count = stats_div.find('dt', string='# of likes').find_next('dd').get_text(strip=True) if stats_div and stats_div.find('dt', string='# of likes') else None

                       # Extract "Online since" information
                        online_since_tag = post_soup.find('div', class_='').find('p', string=lambda text: 'Online since' in text)
                        online_since = online_since_tag.get_text(strip=True).split(':')[-1].strip() if online_since_tag else None


                        # Write metadata to CSV
                        writer.writerow([view_count, download_count, like_count, online_since])
                        print(f"View count: {view_count}, Download count: {download_count}, Like count: {like_count}, Online since: {online_since}")
                    else:
                        print(f"Failed to fetch post URL: {post_url}")

    else:
        print(f"Failed to fetch page: {url}")

# URL of the webpage to scrape
base_url = "https://skitterphoto.com/photos/picks"
num_pages = 1  # Number of pages to scrape

# Iterate through each page
for page_num in range(1, num_pages + 1):
    page_url = f"{base_url}?page={page_num}"
    print("Constructed URL:", page_url)
    print(f"Scraping page {page_num}...")
    fetch_data_from_page(page_url)

print("URLs download complete.")


Constructed URL: https://skitterphoto.com/photos/picks?page=1
Scraping page 1...
Post URL: https://skitterphoto.com/photos/12254, Image URL: https://skitterphoto.com/photos/skitterphoto-12254-default.jpg written to CSV.
View count: 106, Download count: 5, Like count: 1, Online since: None
Post URL: https://skitterphoto.com/photos/12172/old-couple, Image URL: https://skitterphoto.com/photos/skitterphoto-12172-default.jpg written to CSV.
View count: 224, Download count: 3, Like count: 1, Online since: None
Post URL: https://skitterphoto.com/photos/12169/distels, Image URL: https://skitterphoto.com/photos/skitterphoto-12169-default.jpg written to CSV.
View count: 190, Download count: 5, Like count: 1, Online since: None
Post URL: https://skitterphoto.com/photos/12166/tree, Image URL: https://skitterphoto.com/photos/skitterphoto-12166-default.jpg written to CSV.
View count: 216, Download count: 9, Like count: 1, Online since: None
Post URL: https://skitterphoto.com/photos/12142/basking-in-

KeyboardInterrupt: 