###### Step 1: Get the Style code, color, size, quantity, brand and category from the main sheet into a new sheet


In [1]:
import pandas as pd

input_file = '../data/product_data/main_data.csv'
df = pd.read_csv(input_file)

# Rename specific columns
df = df.rename(columns={
    'STYLE NO.': 'Code',
    'SIZE_US': 'Size',
    'COLOR': 'Color',
    'QUANTITY': 'Quantity',
    'BRAND NAME': 'Brand',
    # 'CATEGORIES': 'Category',
})

# Define validation criteria for each column


def validate_row(row):
    # Check for missing or blank values
    if pd.isnull(row['Brand']) or pd.isnull(row['Code']) or pd.isnull(row['Color']) or pd.isnull(row['Quantity']) or pd.isnull(row['Size']):
        return False
    if row['Brand'].strip() == '' or row['Code'].strip() == '' or row['Color'].strip() == '':
        return False

    # Check for correct data types
    if not isinstance(row['Brand'], str) or not isinstance(row['Code'], str) or not isinstance(row['Color'], str):
        return False
    try:
        float(row['Quantity'])
        float(row['Size'])
    except ValueError:
        return False

    return True


# Apply validation to each row
valid_rows = df.apply(validate_row, axis=1)

# Filter the DataFrame to retain only valid rows
filtered_df = df[valid_rows]

# Select the specified columns based on the new names
# selected_columns = ['Code', 'Brand', 'Color', 'Quantity', 'Size', 'Category']
selected_columns = ['Code', 'Brand', 'Color', 'Quantity', 'Size']
new_df = filtered_df[selected_columns]

# Write the filtered data to a new CSV file
# output_file = '../../scripts/catalog/filtered_data.csv'
output_file = '../data/main/filtered_data.csv'
new_df.to_csv(output_file, index=False)

# Print number of unique Codes
print("Number of unique Codes in the original DataFrame: ",new_df['Code'].nunique())
print(f"Selected columns have been written to {output_file}")


# Identify rows that were dropped
invalid_rows = df[~valid_rows]

output_file_invalid = '../data/main/invalid_data.csv'
invalid_rows.to_csv(output_file_invalid, index=False)

# Print number of unique Codes
print(invalid_rows['Code'].nunique())
print(f"Invalid rows have been written to {output_file_invalid}")


Number of unique Codes in the original DataFrame:  476
Selected columns have been written to ../data/main/filtered_data.csv
177
Invalid rows have been written to ../data/main/invalid_data.csv


###### Step 2: Group this sheet by style code, color, and size and sort all these by asc order, sum of quantity by group to get the total quantity


In [2]:
import pandas as pd

# Load the original CSV file
df = pd.read_csv("../data/main/filtered_data.csv")

# Ensure Quantity is numeric, coercing errors to NaN
df['Quantity'] = pd.to_numeric(df['Quantity'], errors='coerce')

# Fill NaN values in Quantity with 0 (or handle as needed)
df['Quantity'] = df['Quantity'].fillna(0)

# First aggregation: Sum quantities based on Code, Color, and Size
aggregated_df = df.groupby(['Code', 'Color', 'Size'], as_index=False).agg({
    'Brand': 'first',  # Assuming you want to keep the first Brand name found
    'Quantity': 'sum',
    # 'Category': 'first'
})

# Sort the aggregated data by Code, Color, and Size in ascending order
aggregated_df = aggregated_df.sort_values(by=['Code', 'Color', 'Size'], ascending=[True, True, True])


# Print the number of unique codes after aggregation
print(f"Number of unique Codes in the original DataFrame: {df['Code'].nunique()}")
print(f"Number of unique Codes after aggregation: {aggregated_df['Code'].nunique()}")

# Write the aggregated data to a new CSV file
aggregated_df.to_csv('../data/main/step_2_output.csv', index=False)

Number of unique Codes in the original DataFrame: 476
Number of unique Codes after aggregation: 476


###### Step 3: Get the prices and Description of available product codes


In [3]:
import pandas as pd

# Load file A and file B
original_file = "../data/main/step_2_output.csv"
data_file = "../data/product_data/product_data.csv"
df_original_file = pd.read_csv(original_file)
df_data_file = pd.read_csv(data_file)

# Merge file A with file B on the "Code" column, keeping all rows from file A
merged_df = pd.merge(df_original_file, df_data_file[['Code', 'Description', 'Price', 'ai_description','Category','Collection','Type']], on='Code', how='left')

print(merged_df['Code'].nunique())
# # Drop rows where Price is NaN or 0
# merged_df = merged_df.dropna(subset=['Price'])
# merged_df = merged_df[merged_df['Price'] != 0]

# Drop rows where Price is NaN
merged_df = merged_df.dropna(subset=['Price'])


# print(merged_df['Code'].nunique())
# merged_df = merged_df[merged_df['Price'] >= 150]


print(merged_df['Code'].nunique())
# Save the result to a new CSV file
output_file = "../data/main/step_3_output.csv"
merged_df.to_csv(output_file, index=False)

print(f"The updated file with Price and Description added: {output_file}")


476
353
The updated file with Price and Description added: ../data/main/step_3_output.csv


###### Step 5: Combine image URLs with the product code and add additional URLs


In [4]:
import pandas as pd
import re

# Load the CSV files
file1_path = '../data/main/step_3_output.csv'
file2_path = '../data/product_data/shopify_urls.csv'  # Replace with the actual file path

file1 = pd.read_csv(file1_path)
file2 = pd.read_csv(file2_path)

# Group URLs by 'Code' and create a sorted list of URLs for each 'Code' based on 'file_name'
def sort_file_names(file_name_list, url_list):
    # Extract color and number suffix for sorting
    def get_sort_key(file_name):
        match = re.search(r'_([A-Za-z]+)_(\d+)', file_name)
        if match:
            color = match.group(1)
            number = int(match.group(2))
            return color, number
        return "", float('inf')  # Default sort key if no match is found

    # Sort both file names and URLs based on the file_name sort key
    sorted_files_with_urls = sorted(zip(file_name_list, url_list), key=lambda x: get_sort_key(x[0]))
    sorted_urls = [url for _, url in sorted_files_with_urls]
    return ','.join(sorted_urls)

# Group and sort URLs for each Code based on 'file_name'
url_grouped = (
    file2.groupby('Code')
    .apply(lambda x: sort_file_names(x['file_name'].tolist(), x['url'].tolist()))
    .reset_index(name='Urls')
)

# Merge the grouped and sorted URLs back to the original DataFrame
final_df = pd.merge(file1, url_grouped, on='Code', how='left')

# Discard rows where 'Urls' is NaN (no URL available)
final_df = final_df.dropna(subset=['Urls'])

# Save the final DataFrame to a new CSV file
output_file_path = '../data/main/step_5_output.csv'
final_df.to_csv(output_file_path, index=False)
print(f"Final CSV file with sorted URLs has been saved to {output_file_path}")
print(f"Number of unique handles: {final_df['Code'].nunique()}")

Final CSV file with sorted URLs has been saved to ../data/main/step_5_output.csv
Number of unique handles: 307


  .apply(lambda x: sort_file_names(x['file_name'].tolist(), x['url'].tolist()))


###### Step 6: Creating shopify csv file to upload


In [2]:
import pandas as pd
import re
from datetime import datetime

# Load the CSV file
file1_path = '../data/main/step_5_output.csv'
file1 = pd.read_csv(file1_path)

# Shopify CSV structure
shopify_columns = [
    'Handle', 'Title', 'Body (HTML)', 'Vendor', 'Type', 'Tags', 'Published',
    'Option1 Name', 'Option1 Value', 'Option2 Name', 'Option2 Value',
    'Variant SKU', 'Variant Grams', 'Variant Inventory Tracker',
    'Variant Inventory Qty', 'Variant Inventory Policy',
    'Variant Fulfillment Service', 'Variant Price',
    'Variant Compare at Price', 'Variant Requires Shipping', 'Variant Taxable',
    'Variant Barcode', 'Image Src', 'Image Position', 'Image Alt Text',
    'Variant Image'  # New column for variant-specific image
]

# Initialize an empty DataFrame
shopify_df = pd.DataFrame(columns=shopify_columns)
all_entries = []
current_code = None
other_images = []
count = 0

# Iterate over rows in the CSV
for index, row in file1.iterrows():

    # Function to handle NaN values safely
    def safe_str(value):
        return str(value) if pd.notna(value) else ""

    def replace_commas(*args):
        if all(not arg for arg in args):
            return ""
        return '; '.join(f'"{arg.replace(",", ";")}"' for arg in args)

    def combine_tags(brand="", collection="", category="", dress_length=""):
        def safe_str(value):
            # Convert to string and strip whitespace; return empty string if None or empty
            return str(value).strip() if value and str(value).strip() else ""

        # Create a list of values, conditionally adding "Wedding" based on brand
        values = [
            "Wedding" if safe_str(brand) == "ANDY ANAND COUTURE" else "",
            safe_str(collection),
            safe_str(category),
            safe_str(dress_length)
        ]

        # Filter out empty values and remove duplicates while preserving order
        seen = set()
        unique_values = [v for v in values if v and v not in seen and not seen.add(v)]

        # Join unique values with commas
        result = ", ".join(unique_values)
        return result


    # Check if we have encountered a new code
    if current_code is not None and current_code != row['Code']:
        # Append additional images for the previous code
        for idx, url in enumerate(other_images):
            image_entry = {
                'Handle': current_code.lower(),
                'Image Src': url,
                'Image Position': idx + 2,
            }
            all_entries.append(pd.DataFrame([image_entry]))
        other_images = []  # Reset for new code

    current_code = safe_str(row['Code'])  # Set the current code safely
    # if current_code.lower() != 'e1264':
    #     continue

    # Prepare product description

    def generate_body_html(ai_description, description):
        def safe_str(value):
            return str(value).strip() if value and str(value).strip() else ""

        # Check conditions for generating body_html
        if safe_str(ai_description):  # Use ai_description if it is not empty
            body_html = f"<p>{safe_str(ai_description)}</p>"
        elif safe_str(description):  # Use Description if ai_description is empty
            body_html = safe_str(description)
        else:  # Return an empty string if both are empty
            body_html = ""

        return body_html

    body_html = generate_body_html(safe_str(row['ai_description']), safe_str(row['Description']))

    def limit_seo_description(text, limit=300):
        return text[:limit] if len(text) > limit else text

    # Extract and filter image URLs based on the color
    if isinstance(row['Urls'], str):
        all_images = row['Urls'].split(",")
        # Filter images for the variant's color
        color = safe_str(row['Color']).upper()
        color_images = [
            img for img in all_images if color in img.upper()
        ]

        # Sort images by the number suffix (front, back, additional)
        color_images.sort(key=lambda x: int(re.search(r'_(\d+)', x).group(1)) if re.search(r'_(\d+)', x) else 99)

        # Assign images based on the suffix meaning
        first_image = color_images[0] if color_images else all_images[0]  # Fallback to the first URL in all_images if color_images is empty
        other_images = color_images[1:] if len(color_images) > 1 else []

        # Set the 'Variant Image' as the primary color image (suffix `_1`)
        variant_image = next((img for img in color_images if "_1" in img), first_image)
    else:
        first_image = ""
        variant_image = ""



    # Create a new product entry
    product_entry = {
        'Handle': current_code.lower(),
        'Title': f"{safe_str(row['Brand'])} {current_code}",
        'Body (HTML)': body_html,
        'Vendor': "Nova Vogue",
        'Type': safe_str(row['Type']) + " Dress",
        'Tags': combine_tags(row['Brand'],row['Collection'],row['Category'], row['Type']),
        'Published': True,
        'Product Category': "Apparel & Accessories > Clothing > Dresses",
        'Option1 Name': 'Color',
        'Option1 Value': safe_str(row['Color']),
        'Option2 Name': 'Size',
        'Option2 Value': safe_str(row['Size']),
        'Variant SKU': f"{current_code}_{safe_str(row['Color']).upper()}_{safe_str(row['Size'])}",
        'Variant Grams': '',
        'Variant Inventory Tracker': 'shopify',
        'Variant Inventory Qty': safe_str(row['Quantity']),
        'Variant Inventory Policy': 'deny',
        'Variant Fulfillment Service': 'manual',
        'Variant Price': safe_str(row['Price']),
        'Variant Compare at Price': '',
        'Variant Requires Shipping': 'TRUE',
        'Variant Taxable': 'TRUE',
        'Variant Barcode': '',
        'Image Src': first_image,
        'Image Position': 1,
        'Image Alt Text': f"Nova Vogue CODE: {current_code}_{safe_str(row['Color']).upper()}_{safe_str(row['Size'])}",
        'Variant Image': variant_image,  # Add Variant Image field
        # SEO
        'SEO Title': f"{safe_str(row['Brand'])} {current_code}",
        'SEO Description': limit_seo_description(body_html),
        'Google Shopping / Google Product Category': "Apparel & Accessories > Clothing > Dresses",
        'Google Shopping / Gender': 'Female',
        'Google Shopping / Age Group': 'Adult',
        'Google Shopping / AdWords Grouping': 'Women Dresses',
        'Google Shopping / AdWords Labels': combine_tags(row['Brand'],row['Collection'],row['Category'], row['Type']),
        'Google Shopping / Condition': 'new',
        'Google Shopping / Custom Product': False,
        'Google Shopping / Custom Label 0': safe_str(row['Brand']),
        'Google Shopping / Custom Label 1': safe_str(row['Collection']),
        'Google Shopping / Custom Label 2': safe_str(row['Category']),
        'Google Shopping / Custom Label 3': safe_str(row['Type']),
        'Google Shopping / Custom Label 4': safe_str(row['Color']).upper() + ' ' +safe_str(row['Size']),
        # metafields
        'product.metafields.custom.color': safe_str(row['Color']),
        'product.metafields.custom.size': safe_str(row['Size']),
        'product.metafields.custom.dress_occasion': replace_commas(safe_str(row['Category']), safe_str(row['Collection']), safe_str(row['Type'])),
        'product.metafields.custom.dress_style': replace_commas(safe_str(row['Category']), safe_str(row['Collection']), safe_str(row['Type'])),
        'product.metafields.custom.skirt_length_type': safe_str(row['Type']),
        'product.metafields.custom.sleeve_length_type': safe_str(row['Category']),
    }

    # Add the main product entry to the list
    all_entries.append(pd.DataFrame([product_entry]))

    # Append additional images for the last code
    if other_images:
        for idx, url in enumerate(other_images):
            image_entry = {
                'Handle': current_code.lower(),
                'Image Src': url,
                'Image Position': idx + 2,
            }
            all_entries.append(pd.DataFrame([image_entry]))

# Concatenate all entries into a single DataFrame
shopify_df = pd.concat(all_entries, ignore_index=True)

# Save the final DataFrame to a CSV file
output_file_path = f'../data/shopify_sheets/shopify_final_sheet_{datetime.now().strftime("%m-%d-%Y_%H-%M")}.csv'

shopify_df.to_csv(output_file_path, index=False, sep=",")

# Print the number of unique handles
print(shopify_df['Handle'].nunique())
print(shopify_df['Option1 Value'].unique())

print(f"Shopify CSV file has been saved to {output_file_path}")


307
['IVORY' nan 'WHITE' 'WHITE-IVORY' 'APPLE-RED' 'BURGUNDY' 'LAVENDER'
 'WHITE-BURGUNDY' 'WHITE-NAVY' 'WHITE-RED' 'LILAC' 'PINK' 'SKY-BLUE'
 'BLACK' 'IVORY-BLACK' 'IVORY-NAVY' 'NAVY-IVORY' 'TRUFFLE' 'BROWN'
 'CLARET' 'POOL' 'PURPLE' 'ROYAL' 'SAGE' 'SANGRIA' 'SILVER' 'CHAMPAGNE'
 'CORAL' 'LIGHT-GREEN' 'RED' 'WINE' 'FUCHSIA' 'SUNSHINE' 'SUNRISE'
 'PISTACHIO' 'PEACH-FIZZ' 'NAVY' 'TEAL' 'VICTORIA-LILAC' 'GOLD' 'JADE'
 'ORANGE' 'BLUE' 'COFFEE' 'GREEN' 'PEACH' 'DARK-LILAC' 'CAFE' 'WATERMELON'
 'WHITE-BLACK' 'YELLOW' 'PLATINUM' 'APPLERED-WHITE' 'TRUFFLE-WHITE'
 'PEACOCK' 'PLUM' 'SEA-BLACK' 'GREY' 'VICTORIA' 'EMERALD' 'PURPLE-BLACK'
 'NUDE' 'WHITE-BLUE' 'BURNT-ORANGE' 'BLACK-NUDE' 'BLUE-PINK' 'BANANA'
 'MINT' 'PRINT' 'CHARCOAL' 'BLACK-SILVER' 'POWDER-BLUE' 'BISCOTTI'
 'PURPLE-GOLD' 'GUNMETAL' 'MIDNIGHT' 'POOL-WHITE' 'ROYAL-BLUE' 'TIFFANY'
 'TURQUOISE' 'JET-COBALT' 'BERRY' 'BLACK-GUNMETAL' 'JET' 'ROSE-QUARTZ'
 'ROSE' 'BLUE-BLACK' 'BLACK-BURGUNDY' 'FLORAL-PRINT' 'APRICOT' 'COBALT'
 'DIRTY-ROSE

In [None]:
# # Fixing 570 for top 100 variants

import pandas as pd

file_path = '../data/shopify_sheets/shopify_final_sheet_12-10-2024_23-42.csv'  # Replace with the actual file path
df = pd.read_csv(file_path)
df = pd.read_csv(file_path)

# Function to filter and prioritize the variants
def get_100_variants(df):
    # Sort by inventory quantity (descending) and size (ascending)
    df_sorted = df.sort_values(
        by=['Variant Inventory Qty', 'Option2 Value'],  # Sort by quantity first, then size
        ascending=[False, True]
    )

    # Filter only the unique handle (F570)
    df_filtered = df_sorted[df_sorted['Handle'] == 'F570']

    # Initialize result list
    result = []

    # Get all unique colors
    unique_colors = df_filtered['Option1 Value'].unique()

    # Add at least one size for each color
    for color in unique_colors:
        color_group = df_filtered[df_filtered['Option1 Value'] == color]

        # Add the size with the highest quantity for this color
        top_size = color_group.iloc[0]
        result.append(top_size.to_dict())

        # Add additional sizes for the color if space allows
        additional_sizes = color_group.iloc[1:]  # Exclude the first (already added)
        for _, row in additional_sizes.iterrows():
            result.append(row.to_dict())

    # If we still have less than 100 rows, keep adding rows to reach 100
    result_df = pd.DataFrame(result)
    while len(result_df) < 100:
        for color in unique_colors:
            color_group = df_filtered[df_filtered['Option1 Value'] == color]
            for _, row in color_group.iterrows():
                if len(result_df) < 100:
                    result_df = result_df.append(row, ignore_index=True)
                else:
                    break

    # Ensure only the top 100 rows are returned
    return result_df.head(100)

# Apply the function
filtered_products = get_100_variants(df)

# Save or display the results
output_file = 'exactly_100_variants.csv'
filtered_products.to_csv(output_file, index=False)
print(f"Filtered products saved to {output_file}")

In [11]:
import pandas as pd

# Load the CSV file into a pandas DataFrame
file_path = "../data/shopify_sheets/570.csv"  # Update this with the actual file path
df = pd.read_csv(file_path)

# Check if the required columns exist
if "Option1 Value" not in df.columns or "Option2 Value" not in df.columns:
    raise ValueError("CSV must contain 'color' and 'size' columns")

# Group by color
groups = df.groupby("Option1 Value")

# Initialize variables
result = []
colors = list(groups.groups.keys())
color_indices = {color: 0 for color in colors}  # Track index for each color

# Loop until we have 100 rows
while len(result) < 100:
    for color in colors:
        group = groups.get_group(color)  # Get the group for the color

        # Check if there are remaining rows for this color
        if color_indices[color] < len(group):
            row = group.iloc[color_indices[color]]  # Get the next row for the color
            result.append(row)
            color_indices[color] += 1  # Move to the next row for this color

            if len(result) >= 100:
                break

# Convert the result to a DataFrame
final_df = pd.DataFrame(result)

# Save the result to a new CSV file
output_file = "grouped_dresses.csv"
final_df.to_csv(output_file, index=False)
print(f"Final 100 rows saved to {output_file}")


Final 100 rows saved to grouped_dresses.csv


In [21]:
# Read the CSV file
# file_path = '../data/shopify_sheets/570.csv'  # Replace with the actual file path
# df = pd.read_csv(file_path)
file_path_1 = '570-uploads.csv'  # Replace with the actual file path
df = pd.read_csv(file_path_1)


# Check if the required columns exist
if "Color" not in df.columns or "Size" not in df.columns or "Code" not in df.columns or "Quantity" not in df.columns:
    raise ValueError("CSV must contain 'Code', 'Color', 'Size', and 'Quantity' columns")

# Group by Color, Size, and Code
grouped = df.groupby(["Color", "Size", "Code"])

# Convert the grouped data into a new DataFrame
result = grouped.size().reset_index(name='Count')

# Save the grouped data to a new CSV file
output_file = "F570_color_size_Uploads.csv"
result.to_csv(output_file, index=False)
print(f"Grouped data saved to {output_file}")


Grouped data saved to F570_color_size_Uploads.csv
