In [None]:
# Install packages
!pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib pymongo pillow requests scikit-learn rembg onnxruntime tqdm

In [None]:
from google.colab import auth
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials

# Authenticate with Google Drive
from google.colab import auth
auth.authenticate_user()

from google.auth import default
creds, _ = default()

from googleapiclient.discovery import build
service = build('drive', 'v3', credentials=creds)


In [None]:
# MongoDB connection
import pymongo

MONGO_URI = "mongodb+srv://divyaavutida_db_user:eqoCfxvryy9SWRBC@color-anlaysis.zznhd2u.mongodb.net/"
client = pymongo.MongoClient(
    MONGO_URI,
    retryWrites=True,
    tls=True,
    tlsAllowInvalidCertificates=True,
    tlsAllowInvalidHostnames=True,
    serverSelectionTimeoutMS=60000
)
db = client['color_analysis']
collection = db['photos']

In [None]:
try:
    client.admin.command("ping")
    print("MongoDB connected!")
except Exception as e:
    print("MongoDB error:", e)

MongoDB connected!


In [None]:
from rembg import remove
from PIL import Image
import io
import requests

def remove_background(image_path):
    try:
        response = requests.get(image_path, stream=True)
        response.raise_for_status()
        input_img = response.content
        output_img = remove(input_img)
        img_without_bg = Image.open(io.BytesIO(output_img))
        return img_without_bg
    except Exception as e:
        print(f"Error removing background: {e}")
        return None


In [None]:
from sklearn.cluster import KMeans
import numpy as np

def extract_colors_with_percentage(img_without_bg, k=4):
    if img_without_bg is None:
        return None

    img = img_without_bg.convert("RGBA")
    img = img.resize((200, 200))
    arr = np.array(img)

    # mask to remove transparent pixels
    mask = arr[:, :, 3] > 0
    arr = arr[mask][:, :3]

    kmeans = KMeans(n_clusters=k, random_state=42)
    labels = kmeans.fit_predict(arr)
    colors = kmeans.cluster_centers_.astype(int)

    total_pixels = len(labels)
    percentages = [(np.sum(labels == i) / total_pixels) * 100 for i in range(k)]

    # Combine colors + percent
    combined = []
    for i in range(k):
        r, g, b = colors[i]
        hexcode = "#{:02x}{:02x}{:02x}".format(int(r), int(g), int(b))
        percent = round(float(percentages[i]), 2)
        combined.append((hexcode, percent))

    # SORT by percentage DESC
    combined_sorted = sorted(combined, key=lambda x: x[1], reverse=True)

    # Convert to numbered dict {"1": {...}, "2": {...}}
    result = {}
    for idx, (hexcode, percent) in enumerate(combined_sorted, start=1):
        result[str(idx)] = {
            "color": hexcode,
            "percentage": percent
        }

    return result


In [None]:
# Function to save photo data to MongoDB
def save_photo_data(photo_url, colors_sorted, is_available=True, gender="female"):
    doc = {
        "photo_url": photo_url,
        "colors_sorted": colors_sorted,
        "is_available": is_available,
        "gender": gender
    }
    result = collection.insert_one(doc)
    return result.inserted_id


In [None]:
# Function to list files in a folder
def list_files_in_folder(folder_id):
    files = []
    page_token = None
    while True:
        response = service.files().list(
            q=f"'{folder_id}' in parents and mimeType contains 'image/'",
            fields="nextPageToken, files(id, name, mimeType, webContentLink)",
            pageToken=page_token
        ).execute()
        files.extend(response.get('files', []))
        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break
    return files

In [None]:
from googleapiclient.http import MediaIoBaseDownload
import io

def download_drive_image(file_id):
    try:
        request = service.files().get_media(fileId=file_id)
        fh = io.BytesIO()
        downloader = MediaIoBaseDownload(fh, request)

        done = False
        while not done:
            status, done = downloader.next_chunk()

        fh.seek(0)
        return fh.read()  # raw image bytes

    except Exception as e:
        print(f"Error downloading image: {e}")
        return None


In [None]:
from tqdm import tqdm

# Main function
def process_drive_folder(folder_id, gender):
    files = list_files_in_folder(folder_id)
    for file in tqdm(files, desc=f"Processing {gender} images"):
        try:
            # Download image
            url = file['webContentLink']
            file_id = file['id']
            raw_bytes = download_drive_image(file_id)

            if raw_bytes is None:
                print(f"Skipping {file['name']} — download failed")
                continue

            # remove background
            output_img = remove(raw_bytes)
            image_without_background = Image.open(io.BytesIO(output_img))

            # Analyze colors
            color_json = extract_colors_with_percentage(image_without_background)

            # Save to database
            save_photo_data(
                photo_url=url,
                colors_sorted=color_json,
                is_available=True,
                gender=gender
            )
            # print(f"Processed: {file['name']} ({gender})")
        except Exception as e:
            print(f"Error processing {file['name']}: {str(e)}")


In [None]:
def list_folders_in_folder(parent_folder_id):
    results = service.files().list(
        q=f"'{parent_folder_id}' in parents and mimeType='application/vnd.google-apps.folder'",
        fields="files(id, name)"
    ).execute()
    return results.get('files', [])

print(list_folders_in_folder("14p2_Q4uNSa-_0xK4FnlBligY_VdfDr6M"))




[{'id': '1eXrlk63mqgvydGRv0NQnSgI5O0BKesKf', 'name': 'female'}, {'id': '1U8slwm-8ucUSn8BsjJhqorj620byYW4h', 'name': 'male'}]


In [12]:
# Folder IDs (replace with your actual folder IDs)
FEMALE_FOLDER_ID = "1eXrlk63mqgvydGRv0NQnSgI5O0BKesKf"
MALE_FOLDER_ID = "1U8slwm-8ucUSn8BsjJhqorj620byYW4h"

# Process folders
process_drive_folder(FEMALE_FOLDER_ID, "female")
process_drive_folder(MALE_FOLDER_ID, "male")

print("Done.")

Processing female images:   0%|          | 0/638 [00:00<?, ?it/s]Downloading data from 'https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx' to file '/root/.u2net/u2net.onnx'.

  0%|                                               | 0.00/176M [00:00<?, ?B/s][A
  4%|█▍                                    | 6.57M/176M [00:00<00:02, 65.7MB/s][A
  9%|███▎                                  | 15.2M/176M [00:00<00:02, 78.1MB/s][A
 14%|█████▏                                | 24.0M/176M [00:00<00:01, 82.5MB/s][A
 19%|███████                               | 32.8M/176M [00:00<00:01, 84.4MB/s][A
 23%|████████▉                             | 41.2M/176M [00:00<00:01, 77.0MB/s][A
 28%|██████████▋                           | 49.5M/176M [00:00<00:01, 79.0MB/s][A
 33%|████████████▌                         | 58.0M/176M [00:00<00:01, 80.9MB/s][A
 38%|██████████████▍                       | 66.7M/176M [00:00<00:01, 82.6MB/s][A
 43%|████████████████▎                     | 75.4M/176

Done.



