###### Step 1: Get the Style code, color, quantity, and size from the main sheet into a new sheet

In [35]:
import pandas as pd

# Load the original CSV file
input_file = '../../data/sheets/main_sheet.csv'  # Replace with your input file path
df = pd.read_csv(input_file)

# Rename specific columns
df = df.rename(columns={
    'STYLE NO.': 'Code',
    'SIZE_US': 'Size',
    'COLOR': 'Color',
    'QUANTITY': 'Quantity',
    'BRAND NAME': 'Brand'
})

# Select the specified columns based on the new names
selected_columns = ['Brand', 'Code', 'Color', 'Quantity', 'Size']
new_df = df[selected_columns]

# Save the selected columns to a new CSV file
output_file = '../../data/sheets/step_1_output.csv'  # Replace with your desired output file path
new_df.to_csv(output_file, index=False)

print(f"Selected columns have been written to {output_file}")

Selected columns have been written to ../../data/sheets/step_1_output.csv


###### Step 2: Group this sheet by style code, color, and size and sum of quantity by group to get the total quantity

In [36]:
import pandas as pd

# Load the original CSV file
df = pd.read_csv("../../data/sheets/step_1_output.csv")

# Ensure QUANTITY is numeric, coercing errors to NaN
df['Quantity'] = pd.to_numeric(df['Quantity'], errors='coerce')

# Fill NaN values in QUANTITY with 0 (or handle as needed)
df['Quantity'] = df['Quantity'].fillna(0)

# First aggregation: Sum quantities based on STYLE NO. and COLOR
aggregated_df = df.groupby(['Code', 'Color', 'Size'], as_index=False).agg({
    'Brand': 'first',
    'Quantity': 'sum'
})

# Second aggregation: Sum quantities based on STYLE NO. to get total quantity for each style
style_quantity_df = aggregated_df.groupby('Code', as_index=False).agg({
    'Quantity': 'sum'
})

# print(style_quantity_df['STYLE NO.'].unique())
# print(style_quantity_df['STYLE NO.'].nunique())

aggregated_df.to_csv('../../data/sheets/step_2_output.csv', index=False)

###### Step 3: Get the prices of available product codes

In [37]:
import pandas as pd

# Load the first CSV file
file1_path = '../../data/transformed_products.csv'  # Replace with your actual file path
file1 = pd.read_csv(file1_path)

# Load the second CSV file
file2_path = '../../data/sheets/step_2_output.csv'  # Replace with your actual file path
file2 = pd.read_csv(file2_path)

# Merge the two DataFrames on the 'Code' column to get the 'Price' column from file1
merged_df = pd.merge(file2, file1[['Code', 'Price']], on='Code', how='left')

# Update the 'Price' column in file2 with the values from the merged DataFrame
file2['Price'] = merged_df['Price']

# Save the updated file2 DataFrame to a new CSV file
output_file_path = '../../data/sheets/step_3_output.csv'  # Replace with your desired output file path
file2.to_csv(output_file_path, index=False)

print(f"Updated file has been saved to {output_file_path}")


Updated file has been saved to ../../data/sheets/step_3_output.csv


###### Step 4: Combine title, description, keywords, and tags with this new sheet based on product code

In [50]:
import pandas as pd

# Load the first CSV file (file_1)
file1_path = '../../data/sheets/product_desc.csv'  # Replace with the actual file path
file1 = pd.read_csv(file1_path)
# removing copies
file1 = file1.drop_duplicates(subset=['Code'], keep='first')
# Load the second CSV file (file_2)
file2_path = '../../data/sheets/step_3_output.csv'  # Replace with the actual file path
file2 = pd.read_csv(file2_path)

# Remove 'Color' column from file_1 if it exists
if 'Color' in file1.columns:
    file1 = file1.drop(columns=['Color'])

# Merge the two DataFrames on the 'Code' column, keeping details from file_1
merged_df = pd.merge(file2, file1, on='Code', how='inner')

# Define the columns from file_1 to copy to file_2
columns_to_copy = ['Long Description', 'Short Description', 'Keywords', 'Tags']

# For each column to copy, fill NaN values in merged_df with the corresponding values from file_1
for column in columns_to_copy:
    merged_df[column] = merged_df.groupby('Code')[column].transform('first')

# Save the merged DataFrame to a new CSV file
output_file_path = '../../data/sheets/step_4_output.csv'  # Replace with your desired output file path
merged_df.to_csv(output_file_path, index=False)

print(f"Merged file has been saved to {output_file_path}")

Merged file has been saved to ../../data/sheets/step_4_output.csv


###### Step 5: Combine image URLs with the product code and add additional URLs

In [81]:
import pandas as pd

# Load the CSV files
file1_path = '../../data/sheets/step_4_output.csv'  # Replace with the actual file path
file2_path = '../../data/s3_url_images.csv'  # Replace with the actual file path

file1 = pd.read_csv(file1_path)
file2 = pd.read_csv(file2_path)

# Rename columns in file2 to match the columns in file1 for merging
file2 = file2.rename(columns={'Product Code': 'Code'})

# Merge the two DataFrames on the 'Code' column
merged_df = pd.merge(file1, file2[['Code', 'url']], on='Code', how='left')

# Group URLs by 'Code' and create a list of URLs for each 'Code'
url_grouped = file2.groupby('Code')['url'].apply(lambda x: ','.join(x)).reset_index()

# Merge the grouped URLs back to the original DataFrame
final_df = pd.merge(file1, url_grouped, on='Code', how='left')
final_df = final_df.rename(columns={'url': 'Urls'})

# Save the final DataFrame to a new CSV file
output_file_path = '../../data/sheets/step_5_output.csv'  # Replace with your desired output file path
final_df.to_csv(output_file_path, index=False)

print(f"Final CSV file with appended URLs has been saved to {output_file_path}")


Final CSV file with appended URLs has been saved to ../../data/sheets/step_5_output.csv


###### Step 6: Creating shopify csv file to upload 

In [121]:
import pandas as pd

# Load the CSV file
file1_path = '../../data/sheets/step_5_output.csv'  # Replace with the actual file path
file1 = pd.read_csv(file1_path)

# Create Shopify CSV structure
shopify_columns = [
    'Handle', 'Title', 'Body (HTML)', 'Vendor', 'Type', 'Tags',
    'Published', 'Option1 Name', 'Option1 Value', 'Option2 Name', 'Option2 Value', 'Variant SKU',
    'Variant Grams', 'Variant Inventory Tracker', 'Variant Inventory Qty',
    'Variant Inventory Policy', 'Variant Fulfillment Service', 'Variant Price',
    'Variant Compare at Price', 'Variant Requires Shipping',
    'Variant Taxable', 'Variant Barcode', 'Image Src', 'Image Position',
    'Image Alt Text'
]

# Initialize an empty DataFrame
shopify_df = pd.DataFrame(columns=shopify_columns)

# Populate Shopify DataFrame with data from file1
all_entries = []

# Initialize variables to track the current code and its associated images
current_code = None
other_images = []

# Iterate over rows using iterrows()
for index, row in file1.iterrows():
    # Check if we have encountered a new code
    if current_code is not None and current_code != row['Code']:
        # Append additional images for the previous code
        for idx, url in enumerate(other_images):
            image_entry = {
                'Handle': current_code.lower(),
                'Image Src': url,
                'Image Position': idx + 2,  # Start from position 2
            }
            all_entries.append(pd.DataFrame([image_entry]))

        # Reset the other_images list for the new code
        other_images = []

    # Set the current code
    current_code = row['Code']

    # Generate handle from product name
    keywords = str(row['Keywords']) if pd.notna(row['Keywords']) else ''
    if keywords != '':
        keywords = keywords.split(", ")
    description = str(row['Long Description']) if pd.notna(row['Long Description']) else ''

    body_html = f'<p>{description}</p>\n<ul>'
    for i in range(1, len(keywords) - 1):
        body_html += f'\n<li>{keywords[i]}</li>'
    body_html += '\n</ul>'
    product_type = "Dress"

    if isinstance(row['Urls'], str):
        all_images = row['Urls'].split(",")
        first_image, *other_images = all_images

    else:
        continue

    # Create a new row for the main product entry
    product_entry = {
        'Handle': row['Code'].lower(),
        'Title': row['Brand'] + " " + row['Code'],
        'Body (HTML)': body_html,
        'Vendor': row['Brand'],
        'Type': product_type,  # You can customize this
        'Tags': row['Tags'],
        'Published': 'TRUE',
        "Product Category": "Apparel & Accessories > Clothing > Dresses",
        'Option1 Name': 'Size',
        'Option1 Value': row['Size'],
        'Option2 Name': 'Color',
        'Option2 Value': row['Color'],
        'Variant SKU': f"{row['Code'].lower()}_{row['Color'].lower()}_{row['Size']}",
        'Variant Grams': '',  # Add weight if available
        'Variant Inventory Tracker': 'shopify',
        'Variant Inventory Qty': row['Quantity'],
        'Variant Inventory Policy': 'deny',
        'Variant Fulfillment Service': 'manual',
        'Variant Price': row['Price'],
        'Variant Compare at Price': '',
        'Variant Requires Shipping': 'TRUE',
        'Variant Taxable': 'TRUE',
        'Variant Barcode': '',
        'Image Src': first_image,  # First image URL
        'Image Position': 1,
        'Image Alt Text': row['Brand'] + " " + row['Code'].lower() + " " + row['Filename']
    }

    # Add the main product entry to the list
    all_entries.append(pd.DataFrame([product_entry]))

# Append additional images for the last code
if other_images:
    for idx, url in enumerate(other_images):
        image_entry = {
            'Handle': current_code.lower(),
            'Image Src': url,
            'Image Position': idx + 2,  # Start from position 2
        }
        all_entries.append(pd.DataFrame([image_entry]))

# Concatenate all entries into a single DataFrame
shopify_df = pd.concat(all_entries, ignore_index=True)

print(shopify_df['Handle'].nunique())
print(shopify_df['Handle'].unique())

# Save the Shopify DataFrame to a new CSV file
output_file_path = '../../data/sheets/shopify_output_demo.csv'  # Replace with your desired output file path
shopify_df.to_csv(output_file_path, index=False)

print(f"Shopify CSV file has been saved to {output_file_path}")


228
['aa219' 'aa221' 'aa222' 'aa226' 'aa229' 'aa233' 'aa9254' 'aa9256'
 'aa9257' 'aa9270' 'aa9298' 'aa9301' 'aa9304' 'aa9306' 'aa9308' 'aa9309'
 'aa9310' 'aa9312' 'aa9313' 'aa9314' 'aa9315' 'aa9316' 'aa9317' 'aa9318'
 'aa9319' 'aa9320' 'aa9321' 'aa9322' 'aa9326' 'aa9327' 'aa9329' 'aa9330'
 'aa9331' 'aa9332' 'aa9335' 'aa9336' 'e1342' 'e1344' 'e1347' 'e1348'
 'e1354' 'e1356' 'e1378' 'e1379' 'e1381' 'e1396' 'e1420' 'e1421' 'e1422'
 'e1424' 'e1428' 'e1438' 'e1448' 'e1452' 'e1463' 'e1472' 'e1502' 'e1508'
 'e1510' 'e1520' 'e1531' 'e1544' 'e1546' 'e1573' 'e1579' 'e1581' 'e1582'
 'e1637' 'e1657' 'e1663' 'e1694' 'e1696' 'e1715' 'e1729' 'e1730' 'e1767'
 'e1774' 'e1778' 'e1796' 'e1799' 'e1822' 'e1824' 'e1827' 'e1833' 'e1838'
 'e1844' 'e1852' 'e1868' 'e1882' 'e1890' 'e1907' 'e1923' 'e1931' 'e1937'
 'e1940' 'e1965' 'e1967' 'e1974' 'e1978' 'e1979' 'e1981' 'e1982' 'e1983'
 'e1986' 'e1988' 'e2016' 'e2019' 'e2020' 'e2021' 'e2023' 'e2025' 'e2027'
 'e2028' 'e2030' 'e2031' 'e2032' 'e2034' 'e2035' 'e2036' 

###### Some verificatiosn for understanding

In [122]:
import pandas as pd

# Load the CSV files
file1_path = '../../data/sheets/shopify_output_demo.csv'  # Replace with the actual file path
file2_path = '../../data/s3_url_images.csv'  # Replace with the actual file path
file1 = pd.read_csv(file1_path)
file2 = pd.read_csv(file2_path)

# Get unique values from the 'Code' and 'Product Code' columns
codes_file1 = set(file1['Handle'].unique())
codes_file2 = set(file2['Product Code'].unique())

# Find the intersection of the two sets
common_codes = codes_file1.intersection(codes_file2)

# Print the number of unique values and the common values
print(f"Unique codes in file1: {len(codes_file1)}")
print(f"Unique product codes in file2: {len(codes_file2)}")
print(f"Common codes: {len(common_codes)}")
print(f"Common codes list: {common_codes}")


# Group by 'Code' and count unique combinations of 'Size' and 'Color'
variant_counts = file1.groupby('Handle').apply(lambda x: x[['Size', 'Color']].drop_duplicates().shape[0]).reset_index(name='Variant Count')

# Print the variant counts
print(variant_counts)


# Calculate the sum of the variant counts
total_variants = variant_counts['Variant Count'].sum()

# Print the total number of variants
print(f"Total number of variants: {total_variants}")


Unique codes in file1: 228
Unique product codes in file2: 299
Common codes: 0
Common codes list: set()


KeyError: "None of [Index(['Size', 'Color'], dtype='object')] are in the [columns]"

In [119]:
import pandas as pd

# Load the CSV files
file1_path = '../../data/sheets/step_5_output.csv'  # Replace with the actual file path
file2_path = '../../data/s3_url_images.csv'  # Replace with the actual file path

file1 = pd.read_csv(file1_path)
file2 = pd.read_csv(file2_path)

# Identify common codes between file1 and file2
common_codes = set(file1['Code']).intersection(set(file2['Product Code']))

# Filter file1 to include only rows with common codes
filtered_file1 = file1[file1['Code'].isin(common_codes)]

# Group by 'Code' and count unique combinations of 'Size' and 'Color'
variant_counts = filtered_file1.groupby('Code').apply(lambda x: x[['Size', 'Color']].drop_duplicates().shape[0]).reset_index(name='Variant Count')

# Print the variant counts
# print(variant_counts)

# Calculate the sum of the variant counts
total_variants = variant_counts['Variant Count'].sum()

# Print the total number of variants
print(f"Total number of variants: {total_variants}")


Total number of variants: 1335


  variant_counts = filtered_file1.groupby('Code').apply(lambda x: x[['Size', 'Color']].drop_duplicates().shape[0]).reset_index(name='Variant Count')
