## AI-Powered Size Chart Generator for Apparel Sellers
### After data cleaning

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min

# Load the data
data = pd.read_csv('refined_cloth.csv')

# Data preprocessing
# Convert categorical variables into numerical ones
label_encoders = {}
for column in ['cup_size', 'hips', 'bra_size', 'category', 'length', 'fit', 'shoe_size', 'shoe_width']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column].astype(str))
    label_encoders[column] = le

# Fill missing values with the most frequent value
imputer = SimpleImputer(strategy='most_frequent')
data_filled = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

# Group data by category
category_groups = data_filled.groupby('category')

# Initialize the dictionary to store size charts
size_charts = {}

# Iterate over each category and create size charts
for category, group in category_groups:
    size_chart = {}

    for size in range(39):  # For sizes 0 to 38
        size_group = group[group['size'] == size]

        if size_group.empty:
            continue

        # Determine the number of clusters based on the number of samples
        n_clusters = min(5, len(size_group))

        if n_clusters < 1:
            continue

        # Clustering based on height and other features
        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
        features = size_group[['height', 'cup_size', 'hips', 'bra_size', 'shoe_size', 'shoe_width']].values
        kmeans.fit(features)
        size_group['cluster'] = kmeans.labels_

        # For each cluster, calculate the average measurements and fit
        clusters = size_group.groupby('cluster').mean()
        confidence_scores = 1 - pairwise_distances_argmin_min(kmeans.cluster_centers_, features)[1] / len(features)

        # Build the size chart for this size
        size_chart[size] = {
            'height': clusters['height'].mean(),
            'cup_size': clusters['cup_size'].mean(),
            'hips': clusters['hips'].mean(),
            'bra_size': clusters['bra_size'].mean(),
            'shoe_size': clusters['shoe_size'].mean(),
            'shoe_width': clusters['shoe_width'].mean(),
            'confidence_score': np.mean(confidence_scores)
        }

    size_charts[category] = size_chart

# Function to adjust sizes based on fit and length feedback
def adjust_sizes_based_on_fit(group):
    for index, row in group.iterrows():
        if row['fit'] != 0 or row['length'] != 0:  # 0 indicates 'fit' and 'just right'
            if row['fit'] == 1 or row['length'] == 1:  # 1 indicates 'large' or 'slightly long'
                group.at[index, 'size'] = min(row['size'] + 1, 38)
            elif row['fit'] == 2 or row['length'] == 2:  # 2 indicates 'small' or 'slightly short'
                group.at[index, 'size'] = max(row['size'] - 1, 0)
    return group

# Adjust sizes based on fit
data_adjusted = category_groups.apply(adjust_sizes_based_on_fit)

# Output the size charts
for category, chart in size_charts.items():
    print(f"Size chart for {category}:")
    for size, measurements in chart.items():
        print(f"Size {size}: {measurements}")
    print("\n")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_in

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_in

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_in

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_in

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_in

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  size_group['cluster'] = kmeans.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_in

Size chart for 0.0:
Size 0: {'height': 154.94, 'cup_size': 2.5, 'hips': 2.5, 'bra_size': 4.0, 'shoe_size': 10.25, 'shoe_width': 0.75, 'confidence_score': 1.0}
Size 1: {'height': 161.87159843003687, 'cup_size': 1.9262607761377297, 'hips': 3.4298075918095874, 'bra_size': 4.8937724065093535, 'shoe_size': 9.964305046466668, 'shoe_width': 0.8529057839400374, 'confidence_score': 0.9987438849630816}
Size 2: {'height': 167.92717327586206, 'cup_size': 2.5223563218390805, 'hips': 2.462528735632184, 'bra_size': 4.212025862068965, 'shoe_size': 11.841551724137933, 'shoe_width': 0.34485632183908044, 'confidence_score': 0.9853814185049725}
Size 3: {'height': 147.32, 'cup_size': 4.0, 'hips': 2.0, 'bra_size': 2.0, 'shoe_size': 14.0, 'shoe_width': 0.0, 'confidence_score': 1.0}
Size 4: {'height': 164.21153354476183, 'cup_size': 2.8060617872272635, 'hips': 2.206317013463454, 'bra_size': 4.579011995549079, 'shoe_size': 11.904899275619703, 'shoe_width': 0.3224418328466664, 'confidence_score': 0.999409586734

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  data_adjusted = category_groups.apply(adjust_sizes_based_on_fit)


In [2]:
for category, chart in size_charts.items():
    # Create an empty DataFrame
    df = pd.DataFrame(columns=['size', 'height', 'cup_size', 'hips', 'bra_size', 'shoe_size', 'shoe_width', 'confidence_score'])
    
    for size, measurements in chart.items():
        # Append measurements to the DataFrame
        df = df.append({
            'size': size,
            'height': measurements['height'],
            'cup_size': measurements['cup_size'],
            'hips': measurements['hips'],
            'bra_size': measurements['bra_size'],
            'shoe_size': measurements['shoe_size'],
            'shoe_width': measurements['shoe_width'],
            'confidence_score': measurements['confidence_score']
        }, ignore_index=True)
    
    # Save the DataFrame as a CSV file
    df.to_csv(f'size_chart__{category}.csv', index=False)

    print(f"\nSaved size chart for {category} as {category}_size_chart.csv")


Saved size chart for 0.0 as 0.0_size_chart.csv

Saved size chart for 1.0 as 1.0_size_chart.csv

Saved size chart for 2.0 as 2.0_size_chart.csv

Saved size chart for 3.0 as 3.0_size_chart.csv

Saved size chart for 4.0 as 4.0_size_chart.csv

Saved size chart for 5.0 as 5.0_size_chart.csv

Saved size chart for 6.0 as 6.0_size_chart.csv


  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.append({
  df = df.ap