import librqaries

In [1]:
import requests
from bs4 import BeautifulSoup
import os
import json

get response

In [2]:
# URL and Headers
url = 'https://unsplash.com/'
Headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.',
    'Accept-Language': 'en-US,en;q=0.5'
}
response = requests.get(url, headers=Headers)
print(response.status_code)

200


In [3]:
# Parse HTML and Extract Image Links
soup = BeautifulSoup(response.text, 'html.parser')
img_tags = soup.find_all('img')


In [4]:
# Extract the image links (considering srcset as well)
image_links = []
for img in img_tags:
    if 'srcset' in img.attrs:
        # srcset can have multiple image URLs, usually we take the first one
        srcset = img['srcset'].split(',')[0]
        image_links.append(srcset.split(' ')[0])
    elif 'src' in img.attrs:
        image_links.append(img['src'])

In [5]:
# Print the number of image links found and the first few links for verification
print(f'Found {len(image_links)} image links.')
print(image_links[:5])  # Print the first 5 image links for verification
# Create directory for images
os.makedirs('unsplash_images', exist_ok=True)
# Download Images and Save Metadata as JSON
metadata_list = []

Found 191 image links.
['https://plus.unsplash.com/premium_vector-1689096833880-42980c252802?dpr=1&h=304&q=80&w=190&auto=format&fit=crop&ixlib=rb-4.0.3', 'https://plus.unsplash.com/premium_vector-1689096833880-42980c252802?dpr=1&h=266&q=80&w=532&auto=format&fit=crop&ixlib=rb-4.0.3', 'https://plus.unsplash.com/premium_vector-1707445732171-ec2950176fe4?dpr=1&h=152&q=80&w=95&auto=format&fit=crop&ixlib=rb-4.0.3', 'https://plus.unsplash.com/premium_vector-1707445732171-ec2950176fe4?dpr=1&h=130&q=80&w=130&auto=format&fit=crop&ixlib=rb-4.0.3', 'https://plus.unsplash.com/premium_vector-1707445732035-ac1253c16a60?dpr=1&h=152&q=80&w=95&auto=format&fit=crop&ixlib=rb-4.0.3']


In [6]:
# Create main directory for images and metadata
main_image_dir = 'unsplash_images'
os.makedirs(main_image_dir, exist_ok=True)

In [7]:
# Step 1: Download images and create empty JSON files
for idx, link in enumerate(image_links):
    try:
        response = requests.get(link)
        if response.status_code == 200:
            img_dir = os.path.join(main_image_dir, f'image_{idx}')
            os.makedirs(img_dir, exist_ok=True)

            img_name = 'image.jpg'
            img_path = os.path.join(img_dir, img_name)
            with open(img_path, 'wb') as f:
                f.write(response.content)
            print(f'Downloaded {img_name} in {img_dir}')

            # Create empty JSON file
            json_path = os.path.join(img_dir, 'metadata.json')
            with open(json_path, 'w') as json_file:
                json.dump({}, json_file, indent=4)
            print(f"Created empty JSON file for {img_name} in {img_dir}")

        else:
            print(f"Failed to download {link}: Status code {response.status_code}")
    except Exception as e:
        print(f"Error occurred while downloading {link}: {e}")


Downloaded image.jpg in unsplash_images\image_0
Created empty JSON file for image.jpg in unsplash_images\image_0
Downloaded image.jpg in unsplash_images\image_1
Created empty JSON file for image.jpg in unsplash_images\image_1


KeyboardInterrupt: 

In [None]:
# Replace with your actual PhotoTag.ai API key
main_image_dir = 'unsplash_images'
PHOTO_TAG_API_KEY = '8XhH-GajH-QzVO-wsMI'
PHOTO_TAG_API_URL = 'https://server.phototag.ai/api/keywords'

In [None]:
# Step 1: Download images and create empty JSON files
for idx, link in enumerate(image_links):
    try:
        response = requests.get(link)
        if response.status_code == 200:
            img_dir = os.path.join(main_image_dir, f'image_{idx}')
            os.makedirs(img_dir, exist_ok=True)

            img_name = 'image.jpg'
            img_path = os.path.join(img_dir, img_name)
            with open(img_path, 'wb') as f:
                f.write(response.content)
            print(f'Downloaded {img_name} in {img_dir}')

            # Create empty JSON file
            json_path = os.path.join(img_dir, 'metadata.json')
            with open(json_path, 'w') as json_file:
                json.dump({}, json_file, indent=4)
            print(f"Created empty JSON file for {img_name} in {img_dir}")

        else:
            print(f"Failed to download {link}: Status code {response.status_code}")
    except Exception as e:
        print(f"Error occurred while downloading {link}: {e}")


Downloaded image.jpg in unsplash_images\image_0
Created empty JSON file for image.jpg in unsplash_images\image_0


KeyboardInterrupt: 

In [None]:
def get_tags_from_phototag(image_path):
    """Send image to PhotoTag.ai and get metadata."""
    try:
        headers = {
            "Authorization": f'Bearer 8XhH-GajH-QzVO-wsMI'
        }
        payload = {
            "language": "en",
            "maxKeywords": 5,
            "requiredKeywords": "beach,sky",
            "customContext": "vacation photo"
        }
        files = [
            ('file', open(image_path, 'rb'))
        ]
        
        response = requests.request("POST",
                                    PHOTO_TAG_API_URL,
                                    headers=headers,
                                    data=payload,
                                    files=files)
        
        if response.status_code == 200:
            metadata = response.json()
            return metadata
        else:
            print(f"Failed to get tags for {image_path}: Status code {response.status_code}, Response: {response.text}")
            return {'tags': []}
    except Exception as e:
        print(f"Error occurred while getting tags for {image_path}: {e}")
        return {'tags': []}

def update_json_with_metadata(img_dir, image_url):
    """Update JSON file with metadata from PhotoTag.ai."""
    img_path = os.path.join(img_dir, 'image.jpg')
    json_path = os.path.join(img_dir, 'metadata.json')
    
    if os.path.exists(img_path) and os.path.exists(json_path):
        # Get metadata from PhotoTag.ai
        metadata = get_tags_from_phototag(img_path)
        metadata.update({
            'image_name': 'image.jpg',
            'image_path': img_path,
            'image_url': image_url
        })
        
        # Save metadata to JSON file
        with open(json_path, 'w') as json_file:
            json.dump(metadata, json_file, indent=4)
        print(f"Metadata for {img_path} saved in {json_path}")
    else:
        print(f"Image or JSON file not found in {img_dir}")

# Example usage
image_links = [
    'https://images.unsplash.com/photo-1491553895911-0055eca6402d',
    'https://images.unsplash.com/photo-1506748686214-e9df14d4d9d0',
    'https://images.unsplash.com/photo-1511203466129-824e631920d4',
    # Add more image URLs as needed
]

main_image_dir = 'unsplash_images'

for idx, link in enumerate(image_links):
    img_dir = os.path.join(main_image_dir, f'image_{idx}')
    update_json_with_metadata(img_dir, link)

Metadata for unsplash_images\image_0\image.jpg saved in unsplash_images\image_0\metadata.json
Metadata for unsplash_images\image_1\image.jpg saved in unsplash_images\image_1\metadata.json
Metadata for unsplash_images\image_2\image.jpg saved in unsplash_images\image_2\metadata.json


In [None]:
from queue import Queue
import threading

In [None]:
# Function to get tags from PhotoTag.ai
def get_tags_from_phototag(image_path):
    try:
        headers = {
            "Authorization": f'Bearer 8XhH-GajH-QzVO-wsMI'
        }
        payload = {
            "language": "en",
            "maxKeywords": 5,
            "requiredKeywords": "beach,sky",
            "customContext": "vacation photo"
        }
        files = [
            ('file', open(image_path, 'rb'))
        ]
        
        response = requests.post(PHOTO_TAG_API_URL, headers=headers, data=payload, files=files)
        
        if response.status_code == 200:
            metadata = response.json()
            return metadata
        else:
            print(f"Failed to get tags for {image_path}: Status code {response.status_code}, Response: {response.text}")
            return {'tags': []}
    except Exception as e:
        print(f"Error occurred while getting tags for {image_path}: {e}")
        return {'tags': []}

# Function to update JSON file with metadata
def update_json_with_metadata(img_dir, image_url):
    img_path = os.path.join(img_dir, 'image.jpg')
    json_path = os.path.join(img_dir, 'metadata.json')
    
    if os.path.exists(img_path) and os.path.exists(json_path):
        metadata = get_tags_from_phototag(img_path)
        metadata.update({
            'image_name': 'image.jpg',
            'image_path': img_path,
            'image_url': image_url
        })
        
        with open(json_path, 'w') as json_file:
            json.dump(metadata, json_file, indent=4)
        print(f"Metadata for {img_path} saved in {json_path}")
    else:
        print(f"Image or JSON file not found in {img_dir}")


In [None]:
# Function to download images, create JSON files, and update metadata using multithreading
def process_images_multithread(image_links, main_image_dir):
    # Lock for synchronization
    lock = threading.Lock()
    
    # Worker function for each thread
    def worker(idx, link):
        try:
            img_dir = os.path.join(main_image_dir, f'image_{idx}')
            os.makedirs(img_dir, exist_ok=True)
            
            # Download image
            response = requests.get(link)
            if response.status_code == 200:
                img_path = os.path.join(img_dir, 'image.jpg')
                with open(img_path, 'wb') as f:
                    f.write(response.content)
                
                # Print and log messages safely
                with lock:
                    print(f'Downloaded {img_path}')
                    print(f"Created empty JSON file for {img_path}")
                
                # Create empty JSON file
                json_path = os.path.join(img_dir, 'metadata.json')
                with open(json_path, 'w') as json_file:
                    json.dump({}, json_file, indent=4)
                
                # Update JSON file with metadata
                update_json_with_metadata(img_dir, link)
                
            else:
                with lock:
                    print(f"Failed to download {link}: Status code {response.status_code}")
        except Exception as e:
            with lock:
                print(f"Error occurred: {e}")
                
                # Create and start threads
    threads = []
    for idx, link in enumerate(image_links):
        t = threading.Thread(target=worker, args=(idx, link))
        t.start()
        threads.append(t)
    
    # Wait for all threads to complete
    for t in threads:
        t.join()


In [None]:
import time
import matplotlib.pyplot as plt

In [None]:
if __name__ == "__main__":
    # Example image links
    image_links = [
        'https://images.unsplash.com/photo-1491553895911-0055eca6402d',
        'https://images.unsplash.com/photo-1506748686214-e9df14d4d9d0',
        'https://images.unsplash.com/photo-1511203466129-824e631920d4',
        # Add more image URLs as needed
    ]

    main_image_dir = 'unsplash_images'
    
    # Process images using multithreading
    process_images_multithread(image_links, main_image_dir)

    
   

Failed to download https://images.unsplash.com/photo-1511203466129-824e631920d4: Status code 404
Downloaded unsplash_images\image_1\image.jpg
Created empty JSON file for unsplash_images\image_1\image.jpg
Downloaded unsplash_images\image_0\image.jpg
Created empty JSON file for unsplash_images\image_0\image.jpg
Failed to get tags for unsplash_images\image_1\image.jpg: Status code 403, Response: {"error":"There are not enough upload credits","data":null}
Metadata for unsplash_images\image_1\image.jpg saved in unsplash_images\image_1\metadata.json
Failed to get tags for unsplash_images\image_0\image.jpg: Status code 403, Response: {"error":"There are not enough upload credits","data":null}
Metadata for unsplash_images\image_0\image.jpg saved in unsplash_images\image_0\metadata.json
