# Importing 

In [None]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import torch

In [None]:
dataset= pd.read_csv("articles.csv")

## Cleaning Dataset

#### Only choosing columns with word descriptions

In [None]:
df= dataset.select_dtypes(include=["object"])


#### Examining which columns we could need and which not

In [None]:
df.head()

In [None]:
df.tail()

#### dropping unwanted columns

In [None]:
df=df.drop(columns=["index_code", "index_group_name", "prod_name","perceived_colour_value_name","perceived_colour_master_name"])

In [None]:
df.sample(2
       )

#### Sorting required and non required Columns

##### Sections and department columns

In [None]:
top_combos = df[['department_name','index_name','section_name','garment_group_name']].value_counts().sample(15)

In [None]:
top_combos
#because the recommendations only require the classification of garment based on garment type and not if it is for ladies or men we can remove those columns

In [None]:
df[["product_group_name","garment_group_name"]].value_counts().sample(10)

In [None]:
df=df.drop(columns=["department_name", "index_name", "section_name"])

In [None]:
df.head(2)

#### Combining columns

In [None]:
# columns product_group_name and garment_group_name make the columns redundant as they contain similar information
# going to keep garment_group_name as it provides more key information about articles. Only contains some categories that dont provide much
# information such as special offer. For this we combine the two columns where these key words occur so that we can drop product_group_names
df["garment_group_name"].unique()


In [None]:
df["product_group_name"].unique()

In [None]:
unwanted_values_garment=['Special Offers','Unknown']
unwanted_values_product=['Items','Unknown','Fun']
df['garment_group_name'] = df['garment_group_name'].replace(unwanted_values_garment, np.nan).fillna(
    df['product_group_name'].replace(unwanted_values_product, np.nan)
)


In [None]:
df=df.drop(columns=["product_group_name"])
#dropping cause not needed anymore

In [None]:
df.head(2)

#### Renaming columns

In [None]:
new_names_columns={"product_type_name":"product_name",
                   "graphical_appearance_name": "material_pattern", "detail_desc":"clothing_description", "colour_group_name":"colour"
                  }
df=df.rename(columns=new_names_columns)

In [None]:
df.head(2)

#### Dropping Duplicates

In [None]:
df

In [None]:
df=df.drop_duplicates()
df

#### Finding Missing values

In [None]:
df[df.isna().all(axis=1)]

In [None]:
df[df.isna().any(axis=1)]

In [None]:
df["clothing_description"]=df["clothing_description"].fillna("no description given") 

In [None]:
df

In [None]:
def get_value_from_description(row, garments_set):
    if pd.isna(row["garment_group_name"]):
        desc = str(row["clothing_description"] or "")
        if desc != "no description given":
            words = set(desc.lower().split())
            hit = next((g for g in garments_set if g in words), None)
            if hit:
                row["garment_group_name"] = hit
            else:
                row["garment_group_name"] = "no garment type provided"
        else:
            row["garment_group_name"] = "no garment type provided"
    return row

garments_set = set(
    df["garment_group_name"].dropna().astype(str).str.lower().unique()
)

df = df.apply(get_value_from_description, axis=1, garments_set=garments_set)


In [None]:
df

In [None]:
# need to make sure values have similar syntax i.e. capital, small etc


#### Removing unwanted categories

In [None]:
df.columns

In [None]:
df["product_name"].unique()

In [None]:
unwanted_categories=['Hair clip','Umbrella','Hair string','Sleep Bag','Swimwear bottom','Underwear bottom','Swimsuit','Kids Underwear top'
                     'Alice band', 'Straw hat','Giftbox','Sleeping sack','Wallet','Swimwear set','Swimwear top','Waterbottle','Fine cosmetics',
                     'Nipple covers', 'Chem. cosmetics', 'Soft Toys','Hair ties', 'Bra extender', 'Blanket', 'Hairband','Side table','Keychain', 'Dog Wear', 'Washing bag', 
                     'Sewing kit','Towel', 'Wood balls','Bumbag', 'Dog wear','Wireless earphone case','Stain remover spray', 'Clothing mist','Baby Bib'
                     'Mobile case', 'Pre-walkers','Toy','Alice band','Kids Underwear top']

df= df[~df["product_name"].isin(unwanted_categories)]
df

In [None]:
df["material_pattern"].unique()

In [None]:
df["garment_group_name"].unique()

In [None]:
unwanted_garments=['Swimwear', 'Woven/Jersey/Knitted mix Baby','Stationery']
df= df[~df["garment_group_name"].isin(unwanted_garments)]

In [None]:
df["product_name"].unique()

## Creating Designer DNA

In [None]:
designer_dna={
    "Ann Demulemeester" :  """Poetic avant-garde minimalism rooted in romantic darkness. Dominated 
    by black, off-white, and muted neutrals, with elongated, draped, and layered silhouettes. 
    Common materials include washed cotton, silk, wool, linen, leather, and shearling, often left 
    raw or softly distressed. Patterns are minimal or absent, occasionally subtle stripes or textural 
    contrasts rather than prints. Core categories include tailoring, fluid coats, blazers, shirts, 
    trousers, boots, and scarves. Appeals to an introspective, artistic audience drawn to emotional 
    expression, literary references, and understated rebellion rather than overt trends.""" ,

    "Rick Owens" : """Brutalist avant-garde with exaggerated proportions and architectural 
                silhouettes. Heavy use of leather, calfskin, lambskin, thick jersey, wool, denim, 
                and cashmere blends. Predominantly monochrome palettes—black, dust, bone, grey—with 
                minimal prints and focus on texture and structure. Signature categories include oversized
                outerwear, elongated tops, drop-crotch trousers, boots, platform footwear, and statement 
                leather jackets. Targets a subcultural, fashion-forward audience interested in radical 
                self-expression, dystopian aesthetics, and sculptural clothing.""" ,

    
    "Dirk Bikkembergs": """Performance-driven masculinity combining sportswear and tailoring. 
    Materials emphasize technical fabrics, leather, neoprene, cotton blends, and structured knits. 
    Patterns are minimal, often graphic stripes or logo elements inspired by football culture. 
    Categories focus on menswear staples: tailored suits, athletic outerwear, boots, sneakers, and 
    body-conscious silhouettes. Designed for a confident, physically expressive audience that values 
    strength, discipline, and movement.""" ,

    "Dries Van Noten" : """Intellectual maximalism blending refined tailoring with rich surface design. 
    Luxurious fabrics such as silk, brocade, jacquard, velvet, wool, and embroidered textiles are 
    central. Known for complex patterns, florals, abstract prints, and layered textures. Garment 
    categories include tailored jackets, coats, blouses, trousers, skirts, and eveningwear. Appeals 
    to culturally curious wearers who value craftsmanship, color mastery, and expressive elegance.""",

    "Carol Christian Poell" : """Radical experimental fashion focused on material research and 
    anatomical construction. Uses hand-treated leather, horse leather, rubberized cotton, 
    resin-coated fabrics, and garment-dyed textiles. Patterns are absent; emphasis is on texture, 
    seams, scars, and construction marks. Categories include leather jackets, boots, trousers, and 
    tightly engineered garments. Attracts a niche audience of avant-garde collectors who value 
    obsessive craftsmanship and conceptual depth over wearability.""",

    "Boris Bidjan Saberi" : """Dark utilitarian avant-garde with nomadic and industrial 
    undertones. Materials include treated cotton, linen, leather, technical blends, and 
    hand-dyed fabrics. Patterns are subtle or absent, relying on layered construction and 
    surface treatment. Categories focus on jackets, hooded outerwear, trousers, boots, and 
    functional layering pieces. Designed for an urban, experimental audience drawn to ritualistic 
    aesthetics and artisanal streetwear.""",

    "Isaac Sellam Experience": """Experimental luxury leatherwear emphasizing innovation and precision. 
    Primary materials are high-grade leather, bonded leather, metal hardware, and technical textiles.
    Patterns are minimal; detailing comes from cuts, panels, and closures. Core categories include leather 
    jackets, coats, vests, and modular garments. Appeals to a design-conscious audience seeking futuristic,
    engineered luxury with tactile depth.""",

    "Roberto Cavalli": """Glamorous maximalism centered on sensuality and visual impact.
    Materials include silk, velvet, leather, satin, and embellished textiles. 
    Signature patterns include animal prints, baroque motifs, and bold graphic designs. 
    Categories range from body-hugging dresses and eveningwear to statement outerwear and denim.
    Targets a confident, extroverted audience drawn to luxury, drama, and overt sexuality.""",

    "Yohji Yamamoto" : """Philosophical avant-garde defined by volume, asymmetry, and monochrome 
    palettes. Materials include wool gabardine, cotton, linen, and fluid synthetics. Prints are rare,
    with occasional abstract or calligraphic motifs. Categories focus on oversized coats, trousers, 
    shirts, and layered silhouettes. Appeals to intellectual wearers who appreciate conceptual design 
    and nonconformity.""",

    "Balenciaga": """Conceptual contemporary fashion blending irony and exaggeration. Materials span 
    denim, technical synthetics, leather, and jersey. Patterns often reference logos, graphics, or 
    distorted classics. Categories include oversized outerwear, streetwear, footwear, and reimagined
    basics. Designed for trend-aware, culturally engaged audiences who value provocation and modern 
    commentary.""", 

    "Vivienne Westwood": """Punk rebellion mixed with historical tailoring. Materials include tartan 
    wool, corsetry fabrics, tweed, and structured cottons. Patterns feature plaids, slogans, and 
    historical references. Categories include corsets, tailoring, dresses, and statement outerwear. 
    Appeals to politically aware, expressive individuals who embrace fashion as protest.""",

    "Marc le Bihan": """Subtle avant-garde with refined craftsmanship. 
    Uses wool, silk, cotton, and muted textured fabrics. Patterns are minimal or tonal. 
    Categories include delicate tailoring, dresses, and understated outerwear. Appeals to a 
    quiet, design-literate audience valuing restraint and nuance.""",

    "Yves Saint Laurent": """Timeless Parisian elegance with sensual edge. Materials include fine 
    wool, silk, leather, and velvet. Patterns are classic or minimal. Categories include tuxedos, 
    tailoring, eveningwear, and leather jackets. Appeals to confident wearers seeking sophistication 
    with attitude.""",

    "Schiaparelli": """Surrealist couture emphasizing sculptural artistry. 
    Uses embellished fabrics, metallics, embroidery, and couture techniques. 
    Patterns are symbolic and artistic. Categories include couture gowns and 
    statement pieces. Appeals to collectors and art-driven audiences.""",

    "Dior by John Galliano": """Theatrical couture blending history and fantasy. 
    Luxurious silks, embroidery, corsetry, and layered textiles dominate. 
    Patterns are ornate and narrative-driven. Categories include gowns, tailored suits, 
    and couture pieces. Appeals to romantic, expressive luxury consumers.""",

    "Dior by Hedi Slimane": """Lean, rock-inspired tailoring. Materials include fine wool, leather, 
    and slim-cut textiles. Patterns are minimal. Categories include suits, jackets, and slim 
    silhouettes. Appeals to youth-driven, minimalist audiences.""",

    "Gucci by Tom Ford": """Polished sensual minimalism. Uses velvet, silk, leather, and 
    metallic finishes. Patterns are sleek or animal-inspired. Categories include tailoring, 
    eveningwear, and statement accessories. Appeals to confident, glamorous consumers.""",

    "Hermes by Maison Margiela":"""Intellectual quiet luxury. Materials include fine leather, cashmere,
    silk, and wool. Patterns are minimal. Categories include timeless tailoring and accessories. 
    Appeals to discerning, understated luxury buyers.""",

    "A.F Vandervorst": """Conceptual minimalism with military influence. Materials include structured 
    cotton, wool, and leather. Patterns are restrained. Categories include utilitarian tailoring and 
    outerwear. Appeals to design-focused minimalists.""",

    "Leon Emmanuel Blanck": """Extreme experimental tailoring. Uses heavy cotton, leather, and 
    technical blends. Patterns absent; focus on construction. Categories include engineered 
    outerwear and trousers. Appeals to avant-garde purists.""",

    "Maison Margiela": """Deconstruction and conceptual experimentation. Materials include repurposed 
    textiles, leather, and unconventional fabrics. Patterns are secondary to construction. Categories 
    include tailored garments and conceptual pieces. Appeals to intellectual fashion audiences.""",

    "Alexander MyQueen": """Dark romantic tailoring with theatrical intensity. Materials include structured 
    wool, silk, and embellished textiles. Patterns include dramatic motifs. Categories include sharp 
    tailoring and couture-inspired garments. Appeals to emotionally expressive wearers.""",

    "Balmain": """Power-driven glamour. Materials include structured wool, leather, and embellishments. 
    Patterns emphasize symmetry and bold detailing. Categories include sharp tailoring and statement 
    pieces. Appeals to confident luxury consumers.""",

    "Mugler":"""Futuristic sensuality with sculptural silhouettes. Materials include latex, 
    synthetics, and structured fabrics. Patterns are bold and graphic. Categories include 
    body-conscious dresses and statement pieces. Appeals to performance-driven, bold audiences.""" } 
                       

In [None]:
designer_df= pd.DataFrame(data= designer_dna.values(), index= designer_dna.keys(), columns=["Description"])
designer_df

## Extracting Clothing descriptions from DF

In [None]:
model_small = SentenceTransformer('all-MiniLM-L6-v2')

#### Preparing the corpus for the dataset and the designer dna


In [None]:

corpus_of_clothes=[]

for i, (product_name, material_pattern,	colour,	garment_group_name,	clothing_description) in enumerate(zip(df["product_name"].tolist(),df["material_pattern"].tolist(),df["colour"].tolist(),df["garment_group_name"].tolist(),df["clothing_description"].tolist())):
    corpus_of_clothes.append("Product Name is "+product_name+", the material is a "+ material_pattern + " of the colour "+ colour+ " and belongs to the garment group "+ garment_group_name+ ". The garment can be described as follows: "+ clothing_description)

corpus_of_designers=[]

for i, (designer_name, designer_description) in enumerate(zip(designer_df.index.tolist(), designer_df["Description"].tolist())):
    corpus_of_designers.append(designer_name+": "+designer_description)


## Making the embeddings for corpus of clothes and designers

In [None]:
corpus_of_clothes_embedding=model_small.encode_document(corpus_of_clothes, batch_size=32, convert_to_tensor=True)
corpus_of_designers_embedding=model_small.encode_document(corpus_of_designers, batch_size=32, convert_to_tensor=True)

## Generating the similarity_scores for both embedding groups

In [None]:
similarity_scores = model_small.similarity(corpus_of_clothes_embedding, corpus_of_designers_embedding)

## Adding the designers along with their similarity scores as new column to the original dataframe

In [None]:
df["Compatible_designers"] = None

for row in range(len(similarity_scores)):
    compatible_designers = []

    for j, score in enumerate(similarity_scores[row]):
        designer_name = designer_df.index[j]
        compatible_designers.append((designer_name, score))

    df.loc[row, "Compatible_designers"] = sorted(
        compatible_designers,
        key=lambda x: x[1],
        reverse=True
    )


## Comparing using similarity to user query

In [None]:
user_query=" "
corpus_of_user_query= [s.strip() for s in user_query.split(".") if s.strip()]

## Choosing designer based on similary

In [None]:
top_k = min(5, len(corpus))
for query in corpus_of_user_query:
    query_embedding = embedder.encode_query(query, convert_to_tensor=True)

    similarity_scores = embedder.similarity(query_embedding, corpus_of_clothes_embedding)[0]
    scores, indices = torch.topk(similarity_scores, k=top_k)


top_designers_for_query=[]
for score, index in zip(scores,indices):
    top_designers_for_query.append(df.iloc[index, -1][:6])
    print(df.iloc[index, -1][:6])


for designer, score in top_designers_for_query:
    print(f" Your top designers are {designer} with a score of {score.4f}")
    

## Making Pinterest API call

## Presenting to user