In [None]:
! pip install open_clip_torch
! pip install ftfy
! pip install cohere

In [2]:
from transformers import AutoModel, AutoProcessor
import torch
from sklearn.metrics.pairwise import cosine_similarity
import cohere
import pandas as pd
from tqdm import tqdm
import numpy as np

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
df = pd.read_csv("/content/sampled_data.csv")

In [6]:
df = df.drop_duplicates()

# List of accessory-related columns to remove
accessory_columns = [
    "image_url_1", "gender",
    "accessory_type", "bag_type", "belt_type", "heels_width", "heels_height",
    "boots-shoe_type", "shoe_material", "bracelet_type", "glasses_type",
    "headwear_type", "necklet_type", "neckwear_type", "sandals-shoe_type",
    "shorts_type", "slippers-shoe_type", "sneakers_type", "sportswear_type",
    "sweatshirt_type", "swimwear_type", "tank_type"
]

# Drop the columns from the DataFrame
df = df.drop(columns=accessory_columns, errors='ignore')


In [7]:
# Define the function for text description
def create_text_description(row):
    """
    Generates a formatted descriptive text while omitting empty values.
    """
    def get_value(col_name):
        """Return the value if it's not null, otherwise return an empty string."""
        return str(row[col_name]).strip() if pd.notnull(row[col_name]) else ""

    # Structured description format
    description_parts = []

    # Main clothing attributes
    category = get_value("category")
    occasion = get_value("occasion")
    style = get_value("style")
    material = get_value("material")
    colors = get_value("colors")
    pattern = get_value("pattern")
    more_attributes = get_value("more_attributes")

    tops_fit = get_value("tops_fit")
    tops_length = get_value("tops_length")
    sleeve_type = get_value("sleeve_type")
    sleeve_length = get_value("sleeve_length")
    neckline_type = get_value("neckline_type")
    blazer_neckline_type = get_value("blazer_neckline_type")

    overclothes_type = get_value("overclothes_type")
    overclothes_neckline_type = get_value("overclothes_neckline_type")
    overclothes_closure = get_value("overclothes_closure")
    overclothes_sleeveless_type = get_value("overclothes-sleeveless_type")

    bottoms_fit = get_value("bottoms_fit")
    bottoms_length = get_value("bottoms_length")
    skirt_type = get_value("skirt_type")
    skirt_length = get_value("skirt_length")
    jumpsuit_length = get_value("jumpsuit_length")

    waist_type = get_value("waist_type")
    poncho_type = get_value("poncho_type")
    brand = get_value("brand")

    # Build description dynamically
    if category:
        description_parts.append(f"This is a {category} designed for {occasion}." if occasion else f"This is a {category}.")
    if style:
        description_parts.append(f"It belongs to the {style} style category.")
    if material:
        description_parts.append(f"It is made from {material}.")
    if colors:
        description_parts.append(f"The item is available in {colors}.")
    if pattern:
        description_parts.append(f"It features a {pattern} pattern.")
    if more_attributes:
        description_parts.append(f"Additional design elements include {more_attributes}.")

    # Tops description
    tops_description = []
    if tops_fit:
        tops_description.append(f"a {tops_fit} fit")
    if tops_length:
        tops_description.append(f"a {tops_length} length")
    if sleeve_type:
        tops_description.append(f"a {sleeve_type} sleeve type")
    if sleeve_length:
        tops_description.append(f"a {sleeve_length} sleeve length")
    if neckline_type:
        tops_description.append(f"a {neckline_type} neckline")

    if tops_description:
        description_parts.append("For tops, it has " + ", ".join(tops_description) + ".")

    if blazer_neckline_type:
        description_parts.append(f"If it's a blazer, the neckline follows the {blazer_neckline_type} style.")

    # Overclothes description
    overclothes_description = []
    if overclothes_type:
        overclothes_description.append(f"a {overclothes_type} type")
    if overclothes_neckline_type:
        overclothes_description.append(f"a {overclothes_neckline_type} neckline")
    if overclothes_closure:
        overclothes_description.append(f"a {overclothes_closure} closure")

    if overclothes_description:
        description_parts.append("If it's an overclothes item, it has " + ", ".join(overclothes_description) + ".")

    if overclothes_sleeveless_type:
        description_parts.append(f"If sleeveless, it belongs to the {overclothes_sleeveless_type} category.")

    # Bottoms description
    bottoms_description = []
    if bottoms_fit:
        bottoms_description.append(f"a {bottoms_fit} fit")
    if bottoms_length:
        bottoms_description.append(f"a {bottoms_length} length")

    if bottoms_description:
        description_parts.append("For bottoms, it features " + ", ".join(bottoms_description) + ".")

    if skirt_type and skirt_length:
        description_parts.append(f"If it's a skirt, it falls under the {skirt_type} category with a {skirt_length} length.")
    elif skirt_type:
        description_parts.append(f"If it's a skirt, it falls under the {skirt_type} category.")
    elif skirt_length:
        description_parts.append(f"If it's a skirt, it has a {skirt_length} length.")

    if jumpsuit_length:
        description_parts.append(f"If it's a jumpsuit, it has a {jumpsuit_length} length.")

    if waist_type:
        description_parts.append(f"It has a {waist_type} waist design.")

    if poncho_type:
        description_parts.append(f"If it’s a poncho, it follows the {poncho_type} type.")

    # Brand information
    if brand:
        description_parts.append(f"The item is produced by the brand {brand}.")

    return " ".join(description_parts)

# Apply function to DataFrame
df["text_description"] = df.apply(create_text_description, axis=1)


In [8]:
df['text_description'] = df['text_description'].astype(str)

In [9]:
# Load the model and processor for embedding
model = AutoModel.from_pretrained('Marqo/marqo-fashionSigLIP', trust_remote_code=True).to(device)
processor = AutoProcessor.from_pretrained('Marqo/marqo-fashionSigLIP', trust_remote_code=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/302 [00:00<?, ?B/s]

marqo_fashionSigLIP.py:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Marqo/marqo-fashionSigLIP:
- marqo_fashionSigLIP.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
You are using a model of type siglip to instantiate a model of type . This is not supported for all configurations of models and can yield errors.


model.safetensors:   0%|          | 0.00/813M [00:00<?, ?B/s]

open_clip_model.safetensors:   0%|          | 0.00/813M [00:00<?, ?B/s]

open_clip_config.json:   0%|          | 0.00/881 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/516 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/20.6k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.53k [00:00<?, ?B/s]

In [10]:
# Define batch size for GPU
batch_size = 64

# Initialize an empty list for embeddings
embeddings = []

# Generate embeddings using GPU
for i in tqdm(range(0, len(df), batch_size), desc="Generating Embeddings with GPU"):
    batch_texts = df['text_description'].iloc[i:i+batch_size].tolist()


    processed = processor(
        text=batch_texts,
        padding=True,
        return_tensors="pt",
        truncation=True
    )

    # Move data to GPU
    input_ids = processed['input_ids'].to(device)

    # Generate embeddings
    with torch.no_grad():
        text_features = model.get_text_features(input_ids, normalize=True)

    # Move embeddings back to CPU for storage
    batch_embeddings = text_features.cpu().numpy()
    embeddings.extend(batch_embeddings)

# Store embeddings in the DataFrame
df['embedding'] = embeddings

# Convert list of embeddings into a numpy array for similarity calculations
embedding_matrix = np.vstack(df['embedding'].to_numpy())


Generating Embeddings with GPU: 100%|██████████| 8/8 [02:04<00:00, 15.54s/it]


In [11]:
# Convert embeddings to string for saving in CSV
df['embedding'] = df['embedding'].apply(lambda x: ','.join(map(str, x)))

# Save the DataFrame as a CSV file
output_path = '/content/final_fashion_recommendations.csv'
df.to_csv(output_path, index=False)

print(f"DataFrame saved to {output_path}")


DataFrame saved to /content/final_fashion_recommendations.csv


In [13]:
# Function to generate an embedding for a given query using GPU
def generate_embedding(text):
    processed = processor(text=[text], padding=True, return_tensors="pt", truncation=True)
    input_ids = processed['input_ids'].to(device)

    with torch.no_grad():
        embedding = model.get_text_features(input_ids, normalize=True)

    return embedding[0].cpu().numpy()  # Move the result back to CPU


# Function to find similar fashion items
def find_similar_fashion_items(query_text, top_k=10):
    query_embedding = generate_embedding(query_text).reshape(1, -1)
    similarities = cosine_similarity(query_embedding, embedding_matrix)[0]
    top_indices = similarities.argsort()[-top_k:][::-1]
    recommended_items = df.iloc[top_indices][['barcode']]
    return recommended_items

In [14]:
API_KEY = '3GuDMsWlQRGnwC8crhZUqmoh39hqrLSlpgPQVQTS'
co = cohere.Client(API_KEY)

In [15]:
# Function to generate fashion advice based on body type
def analyze_body_type(body_type):
    # Custom prompt
    prompt = f"""
    You are a professional female fashion consultant specializing in personalized styling based on body types.

    Please provide a concise list of characteristics for a {body_type} body type based on the following features:

    - **Tops Fit:** [Short answer]
    - **Sleeve Type:** [Short answer]
    - **Neckline Type:** [Short answer]
    - **Sleeve Length:** [Short answer]
    - **Waist Type:** [Short answer]
    - **Bottoms Length:** [Short answer]
    - **Skirt Type:** [Short answer]
    - **Bottoms Fit:** [Short answer]
    - **Bottoms Type:** [Short answer]

    Additionally, provide recommendations for:
    - **Style:** [Short answer]
    - **Colors:** [Short answer]
    - **Patterns:** [Short answer]

    If you don't have any ideas for a specific feature, skip it without explanation.

    **Format:** Follow the bullet-point format exactly as shown above without additional explanations or paragraphs.

    Write in a concise, insightful, and professional tone.
    """


    # Generate response using Cohere
    response = co.generate(
        model='command-xlarge-nightly',
        prompt=prompt,
        max_tokens=1500,
        temperature=0.7,
        frequency_penalty=0.5,
        # stop_sequences=["## Final Thoughts", "## Conclusion", "\n\n"]
    )

    # Check the full response object
    print("Full Response Object:", response)

    # Extract the generated text
    result_text = response.generations[0].text.strip()

    # Print the output in a clean format


    # Return the result (optional)
    return result_text

In [18]:


fashion_advice = analyze_body_type("Hourglass")
print("+++++++++++++++++++++++++++")
print(fashion_advice)
recommended_items = find_similar_fashion_items(fashion_advice)

# Display results
print(recommended_items)

+++++++++++++++++++++++++++
- **Tops Fit:** Fitted or slightly tailored to define the waist.
- **Sleeve Type:** Fitted sleeves, cap sleeves, or flutter sleeves.
- **Neckline Type:** V-necks, sweetheart necklines, and off-the-shoulder styles.
- **Sleeve Length:** Short to three-quarter length sleeves.
- **Waist Type:** Well-defined, accentuated waist.
- **Bottoms Length:** Mid-rise or high-rise bottoms that sit at the smallest part of the waist.
- **Skirt Type:** Pencil, A-line, or circle skirts.
- **Bottoms Fit:** Fitted through the hips and thighs.
- **Bottoms Type:** Flared or wide-leg pants.

- **Style:** Emphasize the waist-to-hip ratio with belted dresses, peplum tops, and wrap styles.
- **Colors:** Monochromatic outfits or color blocking to enhance curves.
- **Patterns:** Opt for vertical stripes, diagonal patterns, or strategically placed prints to create a longer, slimmer silhouette.
            barcode
463  50062301040101
193  50061601040101
188  45671204040101
327  5056590104

In [19]:
fashion_advice = analyze_body_type("Rectangle")
print(fashion_advice)
recommended_items = find_similar_fashion_items(fashion_advice)

# Display results
print(recommended_items)


- **Tops Fit:** Fitted or slightly loose, but avoid boxy shapes.
- **Sleeve Type:** Flared or bell sleeves.
- **Neckline Type:** V-necks, scoop necks.
- **Sleeve Length:** Short or three-quarter length sleeves.
- **Waist Type:** Belted or wrap styles to create the illusion of a waist.
- **Bottoms Length:** Mid-rise or high-rise bottoms.
- **Skirt Type:** A-line, pleated, or flared skirts.
- **Bottoms Fit:** Fitted or tailored.
- **Bottoms Type:** Wide-leg pants, bootcut jeans.

- **Style Recommendations:**
  - Wrap dresses
  - Structured jackets
  - Peplum tops
  - Belted outfits

- **Colors:** Monochromatic looks, or try color blocking to create visual interest.
- **Patterns:** Vertical stripes, color blocking, or subtle prints.
            barcode
442  49716501030101
188  45671204040101
417  48037701040101
297  46313701050101
328  49086901040101
287  48812601050101
353  47835101030101
99   48530401010101
470  49772501050101
439  50144901040101


In [20]:
fashion_advice = analyze_body_type("Apple")
print(fashion_advice)
recommended_items = find_similar_fashion_items(fashion_advice)

# Display results
print(recommended_items)

- **Tops Fit:** Fitted or slightly loose tops that define the waist.
- **Sleeve Type:** Cap sleeves, flutter sleeves.
- **Neckline Type:** V-necks, sweetheart necklines.
- **Sleeve Length:** Short or elbow-length sleeves.
- **Waist Type:** Belted or wrapped styles.
- **Bottoms Length:** Longer hemlines, mid-length skirts and dresses.
- **Skirt Type:** A-line, flared, or circle skirts.
- **Bottoms Fit:** Fitted at the hips with a slight flare.
- **Bottoms Type:** Wide-leg pants, bootcut jeans.
- **Style:** Emphasize creating curves and adding volume to the lower body.
- **Colors:** Earth tones, rich jewel tones, pastels.
- **Patterns:** Vertical stripes, color blocking, and patterns with smaller prints.
            barcode
286  48337601030101
478  48312701020101
463  50062301040101
59   48429802030101
327  50565901040101
442  49716501030101
313  49968401020101
95   48529001080101
491  50499001030101
15   49925001060101
