In [305]:
import pandas as pd
df = pd.read_parquet("data/vogue_data.parquet")
df = df[df.groupby("fashion_house")["fashion_house"].transform("count") >= 20]
#df = df.drop(columns=[col for col in ["designer_name"] if col in df.columns])
problem_rows = df[df["designer_name"].apply(lambda x: len(x) ==0)]

In [306]:
len(problem_rows)

80

In [289]:
many_rows = df[df["designer_name"].apply(lambda x: len(x) >=2)]
many_rows.fashion_house.unique()

array(['Apiece Apart', 'Au Jour Le Jour', 'Babyghost', 'Badgley Mischka',
       'Baja East', 'Balenciaga', 'Botter', 'Calvin Klein Collection',
       'Clements Ribeiro', 'Coperni', 'Costello Tagliapietra',
       'Dolce Gabbana', 'Eckhaus Latta', 'Fendi', 'Hyke', 'Interior',
       'Kwaidan Editions', 'Libertine', 'Marchesa', 'Marchesa Notte',
       'Monse', 'Rag Bone', 'Saint Laurent', 'Talbot Runhof',
       'United Bamboo', 'Valentino', 'Vaquera', 'Vena Cava',
       'Veronica Beard'], dtype=object)

In [300]:
import numpy as np


designer_per_house = (
    df.groupby("fashion_house")["designer_name"]
      .apply(lambda arrs: sorted({
          name
          for sublist in arrs
          for name in list(sublist) if name is not None
      }))
)



In [301]:
from collections import defaultdict

def fashion_house_designer_periods(df):
    # First explode designer_name so each designer has its own row
    df_exp = df.explode("designer_name")
    
    # Remove None or empty string designers if any
    df_exp = df_exp[df_exp["designer_name"].notna() & (df_exp["designer_name"] != "")]
    
    # Group by fashion_house and designer_name, aggregate min and max year
    grouped = df_exp.groupby(["fashion_house", "designer_name"])["year"].agg(["min", "max"]).reset_index()
    
    # Build dictionary with desired structure
    result = defaultdict(list)
    for _, row in grouped.iterrows():
        result[row["fashion_house"]].append({
            "designer": row["designer_name"],
            "start_year": int(row["min"]),
            "end_year": int(row["max"])
        })
    return dict(result)

In [302]:
df_exp = df.explode("designer_name")

# Remove None or empty strings if any
df_exp = df_exp[df_exp["designer_name"].notna() & (df_exp["designer_name"] != "")]

# Count distinct designers per fashion house
designer_counts = df_exp.groupby("fashion_house")["designer_name"].nunique()

# Filter fashion houses with more than one distinct designer
fh_multiple_designers = designer_counts[designer_counts > 1].index.tolist()

# Optionally get df filtered by these fashion houses
df_multiple_designers = df[df["fashion_house"].isin(fh_multiple_designers)]

In [303]:
df_multiple_designers.fashion_house.unique()

array(['3 1 Phillip Lim', 'A F Vandevorst', 'Akris', 'Alexander Mcqueen',
       'Ambush', 'Ann Demeulemeester', 'Anna Sui', 'Apiece Apart',
       'Ashish', 'Au Jour Le Jour', 'Babyghost', 'Badgley Mischka',
       'Baja East', 'Balenciaga', 'Banana Republic', 'Berluti', 'Bevza',
       'Bottega Veneta', 'Botter', 'Boudicca', 'By Malene Birger',
       'Cacharel', 'Calvin Klein Collection', 'Canali', 'Celine',
       'Chanel', 'Chloe', 'Christian Dior', 'Christian Lacroix',
       'Clements Ribeiro', 'Coperni', 'Costume National', 'Damir Doma',
       'Deveaux', 'Diesel Black Gold', 'Dkny', 'Dolce Gabbana', 'Dondup',
       'Dries Van Noten', 'Duckie Brown', 'Dunhill', 'Eckhaus Latta',
       'Elie Tahari', 'Ellery', 'Emanuel Ungaro', 'Emporio Armani',
       'Ermanno Scervino', 'Escada', 'Etro', 'Eytys', 'Facetasm',
       'Faith Connexion', 'Fendi', 'Fumito Ganryu', 'Givenchy', 'Gucci',
       'Hermes', 'Hyke', 'Iceberg', 'Interior', 'Issey Miyake', 'J Brand',
       'J Mendel', 'Ji

In [304]:
periods_dict = fashion_house_designer_periods(df_multiple_designers)
periods_dict

{'3 1 Phillip Lim': [{'designer': 'Lim', 'start_year': 2013, 'end_year': 2020},
  {'designer': 'Phillip Lim', 'start_year': 2007, 'end_year': 2024},
  {'designer': 'Tim Blanks', 'start_year': 2008, 'end_year': 2012}],
 'A F Vandevorst': [{'designer': 'An Vandevorst',
   'start_year': 2002,
   'end_year': 2018},
  {'designer': 'An Vandevorst and Filip Arickx',
   'start_year': 2003,
   'end_year': 2019}],
 'Akris': [{'designer': 'Albert Kriemler',
   'start_year': 2005,
   'end_year': 2021},
  {'designer': 'Madame Kreimier', 'start_year': 2004, 'end_year': 2025}],
 'Alexander Mcqueen': [{'designer': 'Alexander McQueen',
   'start_year': 1995,
   'end_year': 2025},
  {'designer': 'McQueen', 'start_year': 2003, 'end_year': 2007},
  {'designer': 'Tim Blanks', 'start_year': 2010, 'end_year': 2012}],
 'Ambush': [{'designer': 'Verbal', 'start_year': 2018, 'end_year': 2025},
  {'designer': 'Yoon Ahn', 'start_year': 2018, 'end_year': 2025}],
 'Ann Demeulemeester': [{'designer': 'Ann Demeulemees

In [None]:
def assign_designer_to_fashion_house(df, fashion_house, designer_name):

    mask = df["fashion_house"] == fashion_house
    if mask.any():
        df.loc[mask, "designer_name"] = pd.Series([designer_name] * mask.sum(), index=df.index[mask])
    else:
        print(f"No rows found for fashion house: {fashion_house}")
    return df

df = assign_designer_to_fashion_house(df, "Aquascutum", ["John Emary"])
df = assign_designer_to_fashion_house(df, "Area", ["Beckett Fogg and Piotrek Panszczyk"])
df = assign_designer_to_fashion_house(df, "Nehera", ["Samuel Drira"])
df = assign_designer_to_fashion_house(df, "Matthew Williamson", ["Matthew Williamson"])

df = assign_designer_to_fashion_house(df, "Limi Feu" ,[ "Limi Yamamoto"])
df = assign_designer_to_fashion_house(df,"Lazoschmidl" ,["Johannes Schmidl"] )
df = assign_designer_to_fashion_house(df,"Kolor" ,["Junichi Abe"] )
df = assign_designer_to_fashion_house(df,"Kiton" , ["Ciro Paone"] )
df = assign_designer_to_fashion_house(df, "Audra", ["Audra Noyes" ])
df = assign_designer_to_fashion_house(df, "Au Jour Le Jour", [ "Diego Marquez", "Mirko Fontana"])
df = assign_designer_to_fashion_house(df, "Babyghost", ["Qiaoran Huang","Joshua Hupper"] )
df = assign_designer_to_fashion_house(df,"Badgley Mischka" , ["James Mischka", "Mark Badgley"] )
df = assign_designer_to_fashion_house(df,"Baja East" , ["John Targon", "Scott Studenberg"] )
df = assign_designer_to_fashion_house(df, "Bally", ["Rhuigi Villaseñor"] )
df = assign_designer_to_fashion_house(df, "Blumarine", ["Anna Molinari"] )
df = assign_designer_to_fashion_house(df, "Boglioli", ["Davide Marello"] )
df = assign_designer_to_fashion_house(df, "Brian Reyes", ["Brian Reyes"] )


In [293]:
df[
    (df["fashion_house"] == "Au Jour Le Jour") &
    (df["designer_name"].apply(lambda x: "Jeremy Scott" in list(x)))
]

Unnamed: 0,fashion_house,show,URL,image_urls,location,season,year,category,image_urls_sample,description,editor,publish_date,designer_names,designer_name


In [294]:
df_extracted = pd.read_json("data/extracted_KG/extracted_KG_fmd_fashion_houses.json", lines=True)

In [296]:
df_extracted.KG.iloc[0]

"{'founded_by': [['Tuomas Merikoski', '2015']], 'designer_employed': []}"

In [297]:
df[df["fashion_house"]=="Charles Anastase"]

Unnamed: 0,fashion_house,show,URL,image_urls,location,season,year,category,image_urls_sample,description,editor,publish_date,designer_names,designer_name


In [None]:
unique_houses = df['fashion_house'].dropna().unique()
mask = df_extracted['brand_name'].apply(lambda x: is_close_match(x, unique_houses))
df_extracted_filtered = df_extracted[mask]
df_extracted_filtered['names_in_KG'] = df_extracted_filtered['KG'].apply(extract_names_from_KG)
df_extracted_filtered['founder'] = df_extracted_filtered['KG'].apply(extract_names_from_KG, properties=["founded_by"])

founders = {name for sublist in df_extracted_filtered["founder"] for name in sublist}
founders = clean_and_merge_names(founders, threshold=90)

Unnamed: 0,brand_name,URL,KG,model,founder
0,Aalto,https://www.fashionmodeldirectory.com/brands/a...,"{'founded_by': [['Tuomas Merikoski', '2015']],...",gemma2,[Tuomas Merikoski]
1,Acne Studios,https://www.fashionmodeldirectory.com/brands/a...,"{'founded_by': [['Jonny Johansson', '1996']], ...",gemma2,[Jonny Johansson]
2,Adam Kimmel,https://www.fashionmodeldirectory.com/brands/a...,"{'founded_by': [['Adam Kimmel', '2002']], 'des...",gemma2,[Adam Kimmel]
3,Adam Lippes,https://www.fashionmodeldirectory.com/brands/a...,"{'founded_by': [['Adam Lippes', '2004']], 'des...",gemma2,[Adam Lippes]
4,Adam Selman,https://www.fashionmodeldirectory.com/brands/a...,"{'founded_by': [['Adam Selman', '2011']], 'des...",gemma2,[Adam Selman]
...,...,...,...,...,...
722,Stella Mccartney,https://www.fashionmodeldirectory.com/brands/s...,"{'founded_by': [['Stella McCartney', None]], '...",gemma2,[Stella McCartney]
723,Viktor Rolf,https://www.fashionmodeldirectory.com/brands/v...,{'founded_by': [['Viktor Horsting & Rolf Snoer...,gemma2,[Viktor Horsting & Rolf Snoeren]
724,Vpl,https://www.fashionmodeldirectory.com/brands/vpl/,"{'founded_by': [['Victoria Bartlett', '2003']]...",gemma2,[Victoria Bartlett]
725,Chadwick Bell,https://www.fashionmodeldirectory.com/brands/c...,"{'founded_by': [], 'designer_employed': []}",gemma2,[]
