In [1]:
import os, pymongo, pprint
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pymongo import MongoClient
from app.ml.utils.mongo_vectorstore import MongoVectorStore
from app.ml.utils.generate_trend_vectors import summaries_by_gender
from dotenv import load_dotenv
from db import global_init

In [2]:
load_dotenv(dotenv_path="../.env")
global_init()

Connecting to MongoDB at mongodb+srv://mikeguyon98:RmWPuse65PYolM81@cluster0.6db0wou.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0


In [3]:
import requests
from bs4 import BeautifulSoup
import boto3
from dotenv import load_dotenv
import os
import datetime
from pymongo import MongoClient
from PIL import Image
import io

client = MongoClient(os.getenv("MONGO_URI"))
db = client["test"]

# Convert any image to JPEG
def convert_to_jpeg(image):
    if image.format != 'JPEG':
        image = image.convert('RGB')
    img_byte_arr = io.BytesIO()
    image.save(img_byte_arr, format='JPEG')
    return img_byte_arr

# Resize image if larger than 20MB
def resize_image(image, max_size_mb=20):
    img_byte_arr = io.BytesIO()
    image.save(img_byte_arr, format='JPEG')
    img_byte_arr.seek(0)

    if img_byte_arr.getbuffer().nbytes > max_size_mb * 1024 * 1024:  # Convert MB to bytes
        factor = (max_size_mb * 1024 * 1024 / img_byte_arr.getbuffer().nbytes)**0.5
        new_size = (int(image.size[0] * factor), int(image.size[1] * factor))
        image = image.resize(new_size, Image.ANTIALIAS)
        img_byte_arr = io.BytesIO()
        image.save(img_byte_arr, format='JPEG')

    return img_byte_arr


def search_images_bing(api_key, term, count=10):
    print(api_key)
    print(count)
    url = "https://api.bing.microsoft.com/v7.0/images/search"
    headers = {"Ocp-Apim-Subscription-Key": api_key}
    params = {"q": term, "imageType": "photo", "count": count}
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
    search_results = response.json()
    image_urls = [img["contentUrl"] for img in search_results["value"]]
    return image_urls


def upload_image_to_s3(img_byte_arr, bucket_name, file_name):
    session = boto3.Session(
        aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
        aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
        region_name=os.getenv("AWS_REGION"),
    )
    s3 = session.client("s3")
    
    try:
        s3.upload_fileobj(io.BytesIO(img_byte_arr.getvalue()), bucket_name, file_name)
        print(f"Successfully uploaded {file_name} to S3 bucket {bucket_name}")
        # Get the public URL for the uploaded image
        public_url = f"https://{bucket_name}.s3.amazonaws.com/{file_name}"
        return public_url
    except Exception as e:
        print(f"Failed to upload {file_name}: {e}")
        return None

def process_and_upload_image(image_url, bucket_name):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
    response = requests.get(image_url, stream=True, headers=headers)
    try:
        response.raise_for_status()
        image = Image.open(response.raw)
        img_byte_arr = convert_to_jpeg(image)
        img_byte_arr = resize_image(Image.open(img_byte_arr))
        file_name = f"{image_url.split('/')[-1].split('.')[0]}.jpg"
        return upload_image_to_s3(img_byte_arr, bucket_name, file_name)
    except Exception as e:
        print(f"Error processing image {image_url}: {e}")
        return None



def get_images(references : dict) -> None:
    for man in references["male"]:
        process_image(man, "male")
    for female in references["female"]:
        process_image(female, "female")
    print("Images uploaded successfully")

def process_image(person: str, gender: str) -> None:
    keyword = f"{person} outfits {datetime.datetime.now().year}"
    total_images = 4
    bucket_name = "modemixer-images"
    image_urls = search_images_bing(os.getenv("BING_SEARCH_API_KEY"), keyword, count=total_images)
    for image_url in image_urls:
        s3_url = process_and_upload_image(image_url, bucket_name)
        if s3_url:
            document = {
                "url": s3_url,
                "gender": gender,
                "created_at": datetime.datetime.now(),
            }
            db.FashionReference.insert_one(document)
            if db.FashionReference.find_one(document):
                print("Document inserted successfully")
            else:
                print("Failed to insert document")
        else:
            print(f"Failed to process image {image_url}")


In [4]:
print(search_images_bing(os.getenv("BING_SEARCH_API_KEY"), "Kylie Jenner outfits"))

e0a93276ad3a4757bdb2184a1d582287
10
['https://media.glamour.com/photos/5e12384f297e2e00083c5df0/master/w_1600%2Cc_limit/GettyImages-1074669474.jpg', 'http://cos.h-cdn.co/assets/16/18/1462488249-gettyimages-527501278.jpg', 'https://www.hawtcelebs.com/wp-content/uploads/2019/02/kylie-jenner-at-pre-grammy-gala-and-grammy-salute-in-beverly-hills-02-09-2019-2.jpg', 'https://celebmafia.com/wp-content/uploads/2020/04/kylie-jenner-street-style-beverly-hills-04-24-2020-1.jpg', 'https://celebmafia.com/wp-content/uploads/2019/12/kylie-jenner-night-out-style-december-2019-6.jpg', 'https://celebmafia.com/wp-content/uploads/2016/02/kylie-jenner-street-style-out-in-new-york-city-february-2016-3.jpg', 'https://celebmafia.com/wp-content/uploads/2016/02/kylie-jenner-street-style-out-in-new-york-city-february-2016-1.jpg', 'https://celebmafia.com/wp-content/uploads/2017/02/kylie-jenner-heads-to-a-store-in-beverly-hills-2-27-2017-1.jpg', 'https://celebmafia.com/wp-content/uploads/2015/03/kylie-jenner-style

In [5]:
references = {
    "male" : [
        "David Beckham",
        "Timothée Chalamet",
        "Donald Glover",
        "Lil Nas X",
        "Troye Sivan",
        "Pharrell Williams",
        "Brad Pitt",
        "ASAP Rocky",
        "Justin Bieber",
        "Harry Styles",
    ],
    "female" : [
        "Rihanna",
        "Zendaya",
        "Bella Hadid",
        "Ariana Grande",
        "Billie Eilish",
        "Dua Lipa",
        "Selena Gomez",
        "Kylie Jenner",
        "Taylor Swift",
        "Beyonce",
    ]
}

In [6]:
get_images(references)

e0a93276ad3a4757bdb2184a1d582287
4
Successfully uploaded 20200430-David-02.jpg to S3 bucket modemixer-images
Document inserted successfully
Successfully uploaded beckham-outfits-8.jpg to S3 bucket modemixer-images
Document inserted successfully
Successfully uploaded David-Beckham-formal-style.jpg to S3 bucket modemixer-images
Document inserted successfully
Successfully uploaded 20200430-David-07.jpg to S3 bucket modemixer-images
Document inserted successfully
e0a93276ad3a4757bdb2184a1d582287
4
Successfully uploaded gettyimages-1178964377_p.jpg to S3 bucket modemixer-images
Document inserted successfully
Successfully uploaded GettyImages-1388058563.jpg to S3 bucket modemixer-images
Document inserted successfully
Successfully uploaded c1c834fc6935ebc6213020da9525c724.jpg to S3 bucket modemixer-images
Document inserted successfully
Successfully uploaded timothee-chalamet-golden-globes-2024-06.jpg to S3 bucket modemixer-images
Document inserted successfully
e0a93276ad3a4757bdb2184a1d582287