## Import Libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import os
import logging


##  Configure Logging

In [2]:
# Logging setup
log_file = "image_scraper.log"
logging.basicConfig(
    filename=log_file,
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)


##  Create Folder for Images

In [32]:
folder_name = input("Write Folder Name: ")
save_dir = "images/" + folder_name

if not os.path.exists(save_dir):
    os.makedirs(save_dir)
    logging.info(f"Created directory: {save_dir}")


## Enter Search Query & Define Headers

In [33]:
query = input("Enter Image Name: ")

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
}

url = f"https://www.google.com/search?tbm=isch&q={query}"

## Fetch and Parse the Page

In [34]:
try:
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    images_tag = soup.find_all("img")
    del images_tag[0]  # Skip Google logo
    print(f"Found {len(images_tag)} images.")
except Exception as e:
    logging.critical(f"Failed to fetch or parse page: {e}")
    print("Error occurred while loading the page.")


Found 20 images.


## Download and Save Images

In [35]:
img_data_mongo = []

for idx, i in enumerate(images_tag):
    try:
        image_url = i['src']
        image_data = requests.get(image_url).content
        mydict = {"index": image_url, "image": image_data}
        img_data_mongo.append(mydict)

        file_path = os.path.join(save_dir, f"{query}_{idx}.jpg")
        with open(file_path, "wb") as f:
            f.write(image_data)

        logging.info(f"Saved image {idx} from {image_url}")
    except Exception as e:
        logging.error(f"Failed to download image {idx}: {e}")


## Import and Connect to MongoDB

In [28]:
from pymongo import MongoClient
from urllib.parse import quote_plus

# Credentials
username = "vpooja30"
password = quote_plus("Pooja@424")  # Encodes @ symbol properly

# MongoDB URI
uri = f"mongodb+srv://{username}:{password}@images.m7rbfla.mongodb.net/?retryWrites=true&w=majority&appName=Images"

# Connect
client = MongoClient(uri)

try:
    client.admin.command('ping')
    print("✅ Connected to MongoDB Atlas!")
except Exception as e:
    print("❌ Connection failed:", e)


✅ Connected to MongoDB Atlas!


## Create Database and Collection

In [36]:
db = client["ImageScraperDB"]  # Database
collection = db[query+"_image_data"]  # Collection

## Insert Image Data While Scraping

In [37]:
img_data_mongo = []

for idx, i in enumerate(images_tag):
    try:
        image_url = i['src']
        image_data = requests.get(image_url).content

        # Save to file
        file_path = os.path.join(save_dir, f"{query}_{idx}.jpg")
        with open(file_path, "wb") as f:
            f.write(image_data)

        # Save to MongoDB
        doc = {
            "index": idx,
            "query": query,
            "image_url": image_url,
            "image_data": image_data  # binary image
        }
        collection.insert_one(doc)

        logging.info(f"Saved image {idx} to MongoDB and disk")
    except Exception as e:
        logging.error(f"Error at image {idx}: {e}")


## View MongoDB Data

In [31]:
sample = collection.find_one()
print(sample["image_url"])

https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcT0w0pCk-HyQT278B7u6lPPeUg4L2T2hD-QiD34uNLxi1MsH7aOWT3WxrUOgRY&s
