###### Step 1: Get the Style code, color, quantity, and size from the main sheet into a new sheet


In [22]:
import pandas as pd

input_file = '../../data/product_data/main_sheet.csv'
df = pd.read_csv(input_file)

# Rename specific columns
df = df.rename(columns={
    'Handle': 'Handle',
    'SIZE_US': 'Size',
    'COLOR': 'Color',
    'QUANTITY': 'Quantity',
    'BRAND NAME': 'Brand',
})

# Define validation criteria for each column


def validate_row(row):
    # Check for missing or blank values
    if pd.isnull(row['Brand']) or pd.isnull(row['Handle']) or pd.isnull(row['Color']) or pd.isnull(row['Quantity']) or pd.isnull(row['Size']):
        return False
    if row['Brand'].strip() == '' or row['Handle'].strip() == '' or row['Color'].strip() == '':
        return False

    # Check for correct data types
    if not isinstance(row['Brand'], str) or not isinstance(row['Handle'], str) or not isinstance(row['Color'], str):
        return False
    try:
        float(row['Quantity'])
        float(row['Size'])
    except ValueError:
        return False

    return True


# Apply validation to each row
valid_rows = df.apply(validate_row, axis=1)

# Filter the DataFrame to retain only valid rows
filtered_df = df[valid_rows]



# Select the specified columns based on the new names
selected_columns = ['Handle', 'Brand', 'Color', 'Quantity', 'Size']
new_df = filtered_df[selected_columns]

# Write the filtered data to a new CSV file
# output_file = '../../scripts/catalog/filtered_data.csv'
output_file = '../../data/Homecoming/filtered_data.csv'
new_df.to_csv(output_file, index=False)

# Print number of unique Codes
print(new_df['Handle'].nunique())
print(f"Selected columns have been written to {output_file}")

559
Selected columns have been written to ../../data/Homecoming/filtered_data.csv


###### Step 2: Group this sheet by style code, color, and size and sum of quantity by group to get the total quantity


In [23]:
import pandas as pd

# Load the original CSV file
df = pd.read_csv("../../data/Homecoming/filtered_data.csv")

# Ensure Quantity is numeric, coercing errors to NaN
df['Quantity'] = pd.to_numeric(df['Quantity'], errors='coerce')

# Fill NaN values in Quantity with 0 (or handle as needed)
df['Quantity'] = df['Quantity'].fillna(0)

# First aggregation: Sum quantities based on Code, Color, and Size
aggregated_df = df.groupby(['Handle', 'Color', 'Size'], as_index=False).agg({
    'Brand': 'first',  # Assuming you want to keep the first Brand name found
    'Quantity': 'sum'
})

# Print the number of unique codes after aggregation
print(f"Number of unique Codes in the original DataFrame: {
      df['Handle'].nunique()}")
print(f"Number of unique Codes after aggregation: {
      aggregated_df['Handle'].nunique()}")

# Write the aggregated data to a new CSV file
aggregated_df.to_csv('../../data/Homecoming/step_2_output.csv', index=False)

Number of unique Codes in the original DataFrame: 559
Number of unique Codes after aggregation: 559


###### Step 3: Get the prices and Description of available product codes


In [24]:
import pandas as pd

# Load the Homecoming CSV file
homecoming_file_path = '../../data/product_data/Homecoming.csv'
homecoming_df = pd.read_csv(homecoming_file_path)

# Load the step_2_output CSV file
step2_file_path = '../../data/Homecoming/step_2_output.csv'
step2_df = pd.read_csv(step2_file_path)

# Remove duplicates from Homecoming based on 'Handle'
homecoming_df = homecoming_df.drop_duplicates(subset=['Handle'], keep='first')

# Merge the two DataFrames on the 'Handle' column to get both 'Price' and 'Description' from Homecoming
merged_df = pd.merge(step2_df, homecoming_df[['Handle', 'Price', 'Description']], on='Handle', how='left')

# Keep only the rows where both 'Price' and 'Description' are not missing
filtered_df = merged_df.dropna(subset=['Price', 'Description'])

# Save the filtered DataFrame to a new CSV file
output_file_path = '../../data/Homecoming/step_3_output.csv'
filtered_df.to_csv(output_file_path, index=False)

# Print the number of unique handles in the filtered data
print(f"Filtered file has been saved to {output_file_path}")
print(f"Number of unique handles: {filtered_df['Handle'].nunique()}")


Filtered file has been saved to ../../data/Homecoming/step_3_output.csv
Number of unique handles: 59


###### Step 5: Combine image URLs with the product code and add additional URLs


In [25]:
import pandas as pd

# Load the CSV files
file1_path = '../../data/Homecoming/step_3_output.csv'
file2_path = '../../data/product_data/s3_urls.csv'  # Replace with the actual file path

file1 = pd.read_csv(file1_path)
file2 = pd.read_csv(file2_path)

print(file2['Handle'].nunique())

# Merge the two DataFrames on the 'Handle' column
merged_df = pd.merge(file1, file2[['Handle', 'url']], on='Handle', how='left')

# Group URLs by 'Handle' and create a list of URLs for each 'Handle'
url_grouped = file2.groupby('Handle')['url'].apply(lambda x: ','.join(x)).reset_index()

# Merge the grouped URLs back to the original DataFrame
final_df = pd.merge(file1, url_grouped, on='Handle', how='left')

# Rename the 'url' column to 'Urls'
final_df = final_df.rename(columns={'url': 'Urls'})

# Discard rows where 'Urls' is NaN (no URL available)
final_df = final_df.dropna(subset=['Urls'])

# Save the final DataFrame to a new CSV file
output_file_path = '../../data/Homecoming/step_5_output.csv'
final_df.to_csv(output_file_path, index=False)

print(f"Final CSV file with appended URLs has been saved to {output_file_path}")
print(f"Number of unique handles: {final_df['Handle'].nunique()}")


342
Final CSV file with appended URLs has been saved to ../../data/Homecoming/step_5_output.csv
Number of unique handles: 53


###### Step 6: Creating shopify csv file to upload


In [26]:

import pandas as pd
import re


# Load the CSV file
# Replace with the actual file path
file1_path = '../../data/Homecoming/step_5_output.csv'


file1 = pd.read_csv(file1_path)


# Create Shopify CSV structure


shopify_columns = ['Handle', 'Title', 'Body (HTML)', 'Vendor', 'Type', 'Tags', 'Published', 'Option1 Name', 'Option1 Value', 'Option2 Name', 'Option2 Value', 'Variant SKU', 'Variant Grams', 'Variant Inventory Tracker', 'Variant Inventory Qty', 'Variant Inventory Policy', 'Variant Fulfillment Service', 'Variant Price', 'Variant Compare at Price', 'Variant Requires Shipping', 'Variant Taxable', 'Variant Barcode', 'Image Src', 'Image Position',



                   'Image Alt Text'



                   ]


# Initialize an empty DataFrame


shopify_df = pd.DataFrame(columns=shopify_columns)
all_entries = []

current_code = None

other_images = []


# def check_sleeve_preference(combined_list):
#     # Define sleeve-related terms
#     sleeve_terms = ["Sleeveless", "Short Sleeve",
#                     "Long Sleeve", "Spaghetti Strap"]

#     # Check for the presence of sleeve terms
#     found_sleeves = set()
#     for term in sleeve_terms:
#         # Check each column in the row for the term
#         if any(re.search(rf'\b{term}\b', col, re.IGNORECASE) for col in combined_list):
#             found_sleeves.add(term)

#     # Determine the preferred sleeve term
#     if "Spaghetti Strap" in found_sleeves:
#         return "Spaghetti Strap"
#     elif "Short Sleeve" in found_sleeves:
#         return "Short Sleeve"
#     elif found_sleeves:
#         # Return any other found term if Spaghetti Strap or Short Sleeve is not found
#         return list(found_sleeves)[0]
#     else:
#         return None


# Iterate over rows using iterrows()
for index, row in file1.iterrows():

    # Check if we have encountered a new code

    if current_code is not None and current_code != row['Handle']:

        # Append additional images for the previous code

        for idx, url in enumerate(other_images):

            image_entry = {'Handle': current_code.lower(),
                           'Image Src': url,
                           'Image Position': idx + 2, }

            all_entries.append(pd.DataFrame([image_entry]))

        # Reset the other_images list for the new code

        other_images = []

    # Set the current code

    # current_code = row['Code']
    current_code = row['Handle']

    # Generate handle from product name

    # keywords = str(row['Keywords']) if pd.notna(row['Keywords']) else ''

    # if keywords != '':

    #     keywords = keywords.split(", ")
    #     meta_features = ", ".join(keywords)

    # description = str(row['Long Description']) if pd.notna(
    #     row['Long Description']) else ''

    # body_html = f'<p>{description}</p>\n<ul>'
    body_html = f'<p>{row['Description']}</p>'

    # for i in range(0, len(keywords)):

    #     body_html += f'\n<li>{keywords[i]}</li>'

    # body_html += '\n</ul>'

    if isinstance(row['Urls'], str):

        all_images = row['Urls'].split(",")

        first_image, *other_images = all_images

    # else:

    #     continue

    # title = row['Title'] + " " + row['Code']
    # title = title.replace('"', "")
    # title = re.sub(r'\s+', ' ', title)

    # temp_extract = [row['Occasions'], row['Keywords'],  row['Tags']]

    # meta_sleeve_length_type = check_sleeve_preference(temp_extract)

    # Create a new row for the main product entry

    product_entry = {



        'Handle': row['Handle'].lower(),



        'Title': "Nova Vogue " +  row['Handle'],



        'Body (HTML)': body_html,



        'Vendor': "Nova Vogue",
        # 'Vendor': row['Brand'],



        # 'Type': row['Dress Type'],  # You can customize this
        'Type': 'Short',  # You can customize this



        'Tags': "Homecoming, Homecoming 2024, Short Dress",



        'Published': 'TRUE',



        "Product Category": "Apparel & Accessories > Clothing > Dresses",



        'Option1 Name': 'Color',



        'Option1 Value': row['Color'],



        'Option2 Name': 'Size',



        'Option2 Value': row['Size'],



        'Variant SKU': f"{row['Handle']}_{row['Color'].upper()}_{row['Size']}",



        'Variant Grams': '',  # Add weight if available



        'Variant Inventory Tracker': 'shopify',



        'Variant Inventory Qty': row['Quantity'],



        'Variant Inventory Policy': 'deny',



        'Variant Fulfillment Service': 'manual',



        'Variant Price': row['Price'],



        'Variant Compare at Price': '',



        'Variant Requires Shipping': 'TRUE',



        'Variant Taxable': 'TRUE',



        'Variant Barcode': '',



        'Image Src': first_image,  # First image URL



        'Image Position': 1,



        # 'Image Alt Text': "Nova Vogue " + str(row['Handle']).lower() + " " + row['Description'],
        'Image Alt Text': "Nova Vogue CODE: {} Color: {} Size: {}".format(str(row['Handle']).lower(), str(row['Color']).lower(), str(row['Size']).lower()),
        # 'metafield.custom.clothing_features': meta_features,
        # 'metafield.custom.dress_occasion': re.sub(r'\s+', ' ', row['Occasions']),
        'metafield.custom.dress_style': "Short Dress " + row['Handle'].upper(),
        'metafield.custom.skirt_dress_length_type': "Short",
        # 'metafield.custom.sleeve_length_type': meta_sleeve_length_type




    }

    # Add the main product entry to the list

    all_entries.append(pd.DataFrame([product_entry]))


# Append additional images for the last code


if other_images:

    for idx, url in enumerate(other_images):

        image_entry = {



            'Handle': current_code.lower(),



            'Image Src': url,



            'Image Position': idx + 2,  # Start from position 2



        }

        all_entries.append(pd.DataFrame([image_entry]))


# Concatenate all entries into a single DataFrame


shopify_df = pd.concat(all_entries, ignore_index=True)


print(shopify_df['Handle'].nunique())
output_file_path = '../../data/shopify_sheets/Homecoming_shopify_output_demo.csv'
shopify_df.to_csv(output_file_path, index=False)
print(f"Shopify CSV file has been saved to {output_file_path}")

53
Shopify CSV file has been saved to ../../data/shopify_sheets/Homecoming_shopify_output_demo.csv


###### Some verificatiosn for understanding
