###### Step 1: Get the Style code, color, quantity, and size from the main sheet into a new sheet

In [35]:
import pandas as pd

# Load the original CSV file
input_file = '../../data/sheets/main_sheet.csv'  # Replace with your input file path
df = pd.read_csv(input_file)

# Rename specific columns
df = df.rename(columns={
    'STYLE NO.': 'Code',
    'SIZE_US': 'Size',
    'COLOR': 'Color',
    'QUANTITY': 'Quantity',
    'BRAND NAME': 'Brand'
})

# Select the specified columns based on the new names
selected_columns = ['Brand', 'Code', 'Color', 'Quantity', 'Size']
new_df = df[selected_columns]

# Save the selected columns to a new CSV file
output_file = '../../data/sheets/step_1_output.csv'  # Replace with your desired output file path
new_df.to_csv(output_file, index=False)

print(f"Selected columns have been written to {output_file}")

Selected columns have been written to ../../data/sheets/step_1_output.csv


###### Step 2: Group this sheet by style code, color, and size and sum of quantity by group to get the total quantity

In [36]:
import pandas as pd

# Load the original CSV file
df = pd.read_csv("../../data/sheets/step_1_output.csv")

# Ensure QUANTITY is numeric, coercing errors to NaN
df['Quantity'] = pd.to_numeric(df['Quantity'], errors='coerce')

# Fill NaN values in QUANTITY with 0 (or handle as needed)
df['Quantity'] = df['Quantity'].fillna(0)

# First aggregation: Sum quantities based on STYLE NO. and COLOR
aggregated_df = df.groupby(['Code', 'Color', 'Size'], as_index=False).agg({
    'Brand': 'first',
    'Quantity': 'sum'
})

# Second aggregation: Sum quantities based on STYLE NO. to get total quantity for each style
style_quantity_df = aggregated_df.groupby('Code', as_index=False).agg({
    'Quantity': 'sum'
})

# print(style_quantity_df['STYLE NO.'].unique())
# print(style_quantity_df['STYLE NO.'].nunique())

aggregated_df.to_csv('../../data/sheets/step_2_output.csv', index=False)

###### Step 3: Get the prices of available product codes

In [37]:
import pandas as pd

# Load the first CSV file
file1_path = '../../data/transformed_products.csv'  # Replace with your actual file path
file1 = pd.read_csv(file1_path)

# Load the second CSV file
file2_path = '../../data/sheets/step_2_output.csv'  # Replace with your actual file path
file2 = pd.read_csv(file2_path)

# Merge the two DataFrames on the 'Code' column to get the 'Price' column from file1
merged_df = pd.merge(file2, file1[['Code', 'Price']], on='Code', how='left')

# Update the 'Price' column in file2 with the values from the merged DataFrame
file2['Price'] = merged_df['Price']

# Save the updated file2 DataFrame to a new CSV file
output_file_path = '../../data/sheets/step_3_output.csv'  # Replace with your desired output file path
file2.to_csv(output_file_path, index=False)

print(f"Updated file has been saved to {output_file_path}")


Updated file has been saved to ../../data/sheets/step_3_output.csv


###### Step 4: Combine title, description, keywords, and tags with this new sheet based on product code

In [50]:
# import pandas as pd

# # Load the first CSV file
# file1_path = '../../data/sheets/step_3_output.csv'  # Replace with your actual file path
# file1 = pd.read_csv(file1_path)

# # Load the second CSV file
# file2_path = '../../data/sheets/product_desc.csv'  # Replace with your actual file path
# file2 = pd.read_csv(file2_path)

# # Merge the two DataFrames on the 'Code' column
# merged_df = pd.merge(file1, file2, on='Code', how='outer')

# # Save the merged DataFrame to a new CSV file
# output_file_path = '../../data/sheets/step_4_output.csv'  # Replace with your desired output file path
# merged_df.to_csv(output_file_path, index=False)

# print(f"Merged file has been saved to {output_file_path}")

# import pandas as pd

# # Load the first CSV file (file_1)
# file1_path = '../../data/sheets/product_desc.csv'  # Replace with the actual file path
# file1 = pd.read_csv(file1_path)

# # print(file1['Code'].unique())
# print(file1['Code'].nunique())
# print(len(file1))

# df_no_duplicates = file1.drop_duplicates(subset=['Code'], keep='first')
# print(df_no_duplicates['Code'].nunique())
# print(len(df_no_duplicates))

# # # Load the second CSV file (file_2)
# file2_path = '../../data/sheets/step_3_output.csv'  # Replace with the actual file path
# file2 = pd.read_csv(file2_path)

# # print(file2['Code'].unique())
# print(file2['Code'].nunique())





import pandas as pd

# Load the first CSV file (file_1)
file1_path = '../../data/sheets/product_desc.csv'  # Replace with the actual file path
file1 = pd.read_csv(file1_path)
# removing copies
file1 = file1.drop_duplicates(subset=['Code'], keep='first')
# Load the second CSV file (file_2)
file2_path = '../../data/sheets/step_3_output.csv'  # Replace with the actual file path
file2 = pd.read_csv(file2_path)

# Remove 'Color' column from file_1 if it exists
if 'Color' in file1.columns:
    file1 = file1.drop(columns=['Color'])

# Merge the two DataFrames on the 'Code' column, keeping details from file_1
merged_df = pd.merge(file2, file1, on='Code', how='inner')

# Define the columns from file_1 to copy to file_2
columns_to_copy = ['Long Description', 'Short Description', 'Keywords', 'Tags']

# For each column to copy, fill NaN values in merged_df with the corresponding values from file_1
for column in columns_to_copy:
    merged_df[column] = merged_df.groupby('Code')[column].transform('first')

# Save the merged DataFrame to a new CSV file
output_file_path = '../../data/sheets/step_4_output.csv'  # Replace with your desired output file path
merged_df.to_csv(output_file_path, index=False)

print(f"Merged file has been saved to {output_file_path}")

Merged file has been saved to ../../data/sheets/step_4_output.csv


###### Step 5: Combine image URLs with the product code and add additional URLs

In [None]:
import pandas as pd

# Load the CSV files
file1_path = '../../data/sheets/step_4_output.csv'  # Replace with the actual file path
file2_path = '../../data/s3_url_images.csv'  # Replace with the actual file path

file1 = pd.read_csv(file1_path)
file2 = pd.read_csv(file2_path)

# Rename columns in file2 to match the columns in file1 for merging
file2 = file2.rename(columns={'Product Code': 'Code'})

# Merge the two DataFrames on the 'Code' column
merged_df = pd.merge(file1, file2, on='Code', how='inner')

# Create Shopify CSV structure
shopify_columns = [
    'Handle', 'Title', 'Body (HTML)', 'Vendor', 'Type', 'Tags',
    'Published', 'Option1 Name', 'Option1 Value','Option2 Name', 'Option2 Value', 'Variant SKU',
    'Variant Grams', 'Variant Inventory Tracker', 'Variant Inventory Qty',
    'Variant Inventory Policy', 'Variant Fulfillment Service', 'Variant Price',
    'Variant Compare at Price', 'Variant Requires Shipping',
    'Variant Taxable', 'Variant Barcode', 'Image Src', 'Image Position',
    'Image Alt Text'
]

shopify_df = pd.DataFrame(columns=shopify_columns)

# Populate Shopify DataFrame with data from merged_df
for index, row in merged_df.iterrows():
    # Generate handle from product name
    # handle = row['Code'].lower().replace(' ', '-')

    description = row["Long Description"]
    keywords = row["Keywords"]
    body_html = f'<p>{description}</p>\n<ul>'
    for i in range(1,len(keywords)-1):
        body_html += f'\n<li>{keywords[i]}</li>'
    body_html += '\n</ul>'

    # Create a new row for the main product entry
    product_entry = {
        'Handle': row['Code'].lower(),
        'Title': row['product_name'],
        'Body (HTML)': body_html,
        'Vendor': row['Brand'],
        'Type': 'Product',  # You can customize this
        'Tags': row['Tags'],
        'Published': 'TRUE',
        'Option1 Name': 'Size',
        'Option1 Value': row['Size'],
        "Option2 Name": "Color",
        "Option2 Value": row['color'],
        "Variant SKU": f"{row['code']}_{row['Color']}_{row['Size']}",
        'Variant Grams': '',  # Add weight if available
        'Variant Inventory Tracker': 'shopify',
        'Variant Inventory Qty': row['Quantity'],
        'Variant Inventory Policy': 'deny',
        'Variant Fulfillment Service': 'manual',
        'Variant Price': row['Price'],
        'Variant Compare at Price': '',
        'Variant Requires Shipping': 'TRUE',
        'Variant Taxable': 'TRUE',
        'Variant Barcode': '',
        'Image Src': row['url'],
        'Image Position': 1,
        'Image Alt Text': row['Keywords']
    }

    # Add the main product entry to the DataFrame
    shopify_df = shopify_df.append(product_entry, ignore_index=True)

    # Assume additional images are stored in the 'URL' column separated by commas (if applicable)
    if isinstance(row['url'], str) and ',' in row['url']:
        urls = row['url'].split(',')
        for pos, url in enumerate(urls[1:], start=2):  # Start from 2 for additional images
            image_entry = {
                'Handle': row['Code'].lower(),
                'Image Src': url.strip(),
                'Image Position': pos
            }
            shopify_df = shopify_df.append(image_entry, ignore_index=True)

# Save the Shopify DataFrame to a new CSV file
output_file_path = '../../data/sheets/merged_data.csv'  # Replace with your desired output file path
shopify_df.to_csv(output_file_path, index=False)

print(f"Shopify CSV file has been saved to {output_file_path}")


In [64]:
import pandas as pd

# Load the CSV files
file1_path = '../../data/sheets/step_4_output.csv'  # Replace with the actual file path
file2_path = '../../data/s3_url_images.csv'   # Replace with the actual file path

file1 = pd.read_csv(file1_path)
file2 = pd.read_csv(file2_path)

# Rename columns in file2 to match the columns in file1 for merging
file2 = file2.rename(columns={'Product Code': 'Code'})

# Merge the two DataFrames on the 'Code' column
merged_df = pd.merge(file1, file2, on='Code', how='inner')

# Create Shopify CSV structure
shopify_columns = [
    'Handle', 'Title', 'Body (HTML)', 'Vendor', 'Type', 'Tags',
    'Published', 'Option1 Name', 'Option1 Value', 'Option2 Name', 'Option2 Value', 'Variant SKU',
    'Variant Grams', 'Variant Inventory Tracker', 'Variant Inventory Qty',
    'Variant Inventory Policy', 'Variant Fulfillment Service', 'Variant Price',
    'Variant Compare at Price', 'Variant Requires Shipping',
    'Variant Taxable', 'Variant Barcode', 'Image Src', 'Image Position',
    'Image Alt Text'
]

shopify_df = pd.DataFrame(columns=shopify_columns)

# Populate Shopify DataFrame with data from merged_df
all_entries = []


# Populate Shopify DataFrame with data from merged_df
for code, group in merged_df.groupby('Code'):
    # Generate handle from product name
    keywords = str(group.iloc[0]['Keywords']) if pd.notna(group.iloc[0]['Keywords']) else ''
    if keywords != "":
        keywords = keywords.split(", ")
    description = str(group.iloc[0]['Long Description']) if pd.notna(group.iloc[0]['Long Description']) else ''

    # print(type( keywords))
    # print(keywords)
    # break

    # description = group.iloc[0]["Long Description"]
    # keywords = group.iloc[0]["Keywords"]
    body_html = f'<p>{description}</p>\n<ul>'
    for i in range(1,len(keywords)-1):
        body_html += f'\n<li>{keywords[i]}</li>'
    body_html += '\n</ul>'

    product_name = group.iloc[0]['product_name']
    handle = code.lower().replace(' ', '-')
    product_type = "Dress"

    # Create a new row for the main product entry
    product_entry = {
        'Handle': handle,
        'Title': group.iloc[0]['Brand'] + " "+ code,
        'Body (HTML)':body_html,
        'Vendor': group.iloc[0]['Brand'],
        'Type': product_type,  # You can customize this
        'Tags': group.iloc[0]['Tags'],
        'Published': 'TRUE',
        "Product Category": "Apparel & Accessories > Clothing > Dresses",
        'Option1 Name': 'Size',
        'Option1 Value': group.iloc[0]['Size'],
        'Option2 Name': 'Color',
        'Option2 Value': group.iloc[0]['Color'],
        'Variant SKU': f"{handle}_{group.iloc[0]['Color']}_{group.iloc[0]['Size']}",
        'Variant Grams': '',  # Add weight if available
        'Variant Inventory Tracker': 'shopify',
        'Variant Inventory Qty': group.iloc[0]['Quantity'],
        'Variant Inventory Policy': 'deny',
        'Variant Fulfillment Service': 'manual',
        'Variant Price': group.iloc[0]['Price'],
        'Variant Compare at Price': '',
        'Variant Requires Shipping': 'TRUE',
        'Variant Taxable': 'TRUE',
        'Variant Barcode': '',
        'Image Src': group.iloc[0]['url'],  # First image URL
        'Image Position': 1,
        'Image Alt Text': group.iloc[0]['Keywords']
    }

    # # Add the main product entry to the DataFrame
    # shopify_df = shopify_df.append(product_entry, ignore_index=True)

      # Add the main product entry to the list
    all_entries.append(pd.DataFrame([product_entry]))

    # Add additional images if there are more URLs for the same product
    for pos, url in enumerate(group['url'].iloc[1:], start=2):  # Start from 2 for additional images
        image_entry = {
            'Handle': handle,
            'Image Src': url.strip(),
            'Image Position': pos
        }
        # shopify_df = shopify_df.append(image_entry, ignore_index=True)
        all_entries.append(pd.DataFrame([image_entry]))


#  Concatenate all entries into a single DataFrame
shopify_df = pd.concat(all_entries, ignore_index=True)
print(shopify_df['Handle'].nunique())


# Save the Shopify DataFrame to a new CSV file
output_file_path = '../../data/sheets/shopify_output.csv'  # Replace with your desired output file path
shopify_df.to_csv(output_file_path, index=False)

print(f"Shopify CSV file has been saved to {output_file_path}")


293
Shopify CSV file has been saved to ../../data/sheets/shopify_output.csv
