##### DESCRIPTION GENERATOR

In [1]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Access your secret using os.getenv
OPEN_AI_API = os.getenv("OPEN_AI_API")
print(OPEN_AI_API)

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


In [None]:
from openai import OpenAI
import pandas as pd
import time

# Set up your OpenAI API key
api_key = OPEN_AI_API
client = OpenAI(api_key=api_key)

def improve_description(description):
    try:
        # Call OpenAI API to improve the description
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a product description assistant that writes engaging and high-quality descriptions for fashion items. "
                        "Each description should be 4-6 sentences, highlighting the style, material, unique features, and ideal occasions "
                        "for wearing the item. Maintain proper grammar and punctuation. Please do not use similar sentence structure. "
                        "Try to use different action words for each output and make it more creative."
                    )
                },
                {
                    "role": "user",
                    "content": f"Please write a detailed and engaging description for the following dress:\n\n{description}"
                }
            ],
            max_tokens=200,
            temperature=0.5,
        )
        # Extract the improved description from the API response
        improved_text = response.choices[0].message.content
        return improved_text.strip()
    except Exception as e:
        print(f"Error: {e}")
        return description  # Return the original description if there's an error

# Load descriptions from CSV
input_file = "../../data/product_data/product_data.csv"
df = pd.read_csv(input_file, encoding="ISO-8859-1")

# Create a new column for improved descriptions
df['ai_description'] = ""

# Process descriptions, with a counter to limit processing to 10 descriptions
# processed_count = 0
for index, row in df.iterrows():
    description = row['Description']

    if pd.notna(description) and description.strip():
        improved_desc = improve_description(description)
        df.at[index, 'ai_description'] = improved_desc
        # processed_count += 1

        # if processed_count >= 10:
        #     break  # Stop after processing 10 descriptions

        time.sleep(1)  # Delay after each API call
    else:
        df.at[index, 'ai_description'] = ""  # Keep empty description if original was empty

# Save the updated DataFrame back to the same file
df.to_csv(input_file, index=False)
print("Improved descriptions saved.", input_file)


Error: Error code: 400 - {'error': {'message': "This model's maximum context length is 16385 tokens. However, your messages resulted in 24751 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
Error: Error code: 400 - {'error': {'message': "This model's maximum context length is 16385 tokens. However, your messages resulted in 24740 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}
Improved descriptions saved. ../../data/product_data/product_data.csv


##### File_Name renamer

In [4]:
import pandas as pd
import re

# Load the CSV file
file_path = '../../data/product_data/shopify_urls_copy.csv'  # Replace with the actual file path
df = pd.read_csv(file_path)


# Function to standardize file_name
def standardize_file_name(file_name):
    # Regular expression to capture CODE, COLOR, and NUMBER
    match = re.match(r"^([A-Z0-9]+)_([A-Za-z-_]+)_(\d+)", file_name)
    if match:
        code = match.group(1)
        color = match.group(2).replace('_', '-')  # Replace underscores with hyphens
        number = match.group(3)

        # Capitalize each word in color
        color = '-'.join([word.upper() for word in color.split('-')])

        # Reformat the file_name to CODE_COLOR_NUMBER
        standardized_file_name = f"{code}_{color}_{number} PNG"
        return standardized_file_name
    else:
        return file_name  # If it doesn't match, return the original

# Apply the function to the 'file_name' column
df['file_name'] = df['file_name'].apply(standardize_file_name)

# Save the updated DataFrame back to the same file
df.to_csv(file_path, index=False)
print(f"Updated file has been saved to {file_path}")


Updated file has been saved to ../../data/product_data/shopify_urls_copy.csv


##### STYLE COLOR PRICE

In [6]:
import pandas as pd

# Load data from an Excel file
input_file = '../data/product_data/main_data.csv'  # Replace with your actual file name
df = pd.read_csv(input_file)

# Rename columns as specified
df = df.rename(columns={"STYLE NO.": "CODE", "COLOR": "COLOR"})

# Drop duplicate combinations of CODE and COLOR to get unique color entries for each CODE
df_unique = df.drop_duplicates(subset=["CODE", "COLOR"])

# Select only the CODE and COLOR columns
df_unique = df_unique[["CODE", "COLOR"]]


df_unique.to_csv("style_color_pictures.csv", index=False)

print("Data saved successfully in 'output.xlsx' with unique CODE and COLOR values.")


Data saved successfully in 'output.xlsx' with unique CODE and COLOR values.


##### MAPPING CODES,COLORS AND PICTURES

In [2]:
import pandas as pd

# Load the CSV files
file1 = pd.read_csv('./style_color_pictures.csv')  # Assuming the file with headers CODE, COLOR
file2 = pd.read_csv('../data/product_data/shopify_urls.csv')  # Assuming the file with headers url, file_name, Code

# Extract the color from file_name between the first and second underscores
file2['color_in_file_name'] = file2['file_name'].str.extract(r'_(.*?)_')  # Extract color between underscores
file2['Code_Color'] = file2['Code'].astype(str) + '_' + file2['color_in_file_name'].astype(str)

# Add PHOTO and COUNT columns to file1 with default values
file1['PHOTO'] = 'NO'
file1['COUNT'] = 0
file1['NOT IN INVENTORY'] = ''  # Initialize with empty strings

# Process each row in file1 to check matches in file2
for i, row in file1.iterrows():
    code_color = f"{row['CODE']}_{row['COLOR']}"

    # Check if there are any matches for this code_color in file2
    matching_files = file2[file2['Code_Color'] == code_color]

    if not matching_files.empty:
        file1.at[i, 'PHOTO'] = 'YES'
        file1.at[i, 'COUNT'] = len(matching_files)

    # Check if there are any matches by CODE but with a different color in file2
    unmatched_files = file2[(file2['Code'] == row['CODE']) & (file2['color_in_file_name'] != row['COLOR'])]

    # If unmatched color found for an existing CODE, add it as a new row in file1 with 'NOT IN INVENTORY' as 'YES'
    for _, unmatched_row in unmatched_files.iterrows():
        new_color = unmatched_row['color_in_file_name']

        # Check if this color does not already exist in file1 for the given code
        if not ((file1['CODE'] == row['CODE']) & (file1['COLOR'] == new_color)).any():
            new_row = {
                'CODE': row['CODE'],
                'COLOR': new_color,
                'PHOTO': 'YES',
                'COUNT': 1,
                'NOT IN INVENTORY': 'YES'
            }
            file1 = pd.concat([file1, pd.DataFrame([new_row])], ignore_index=True)

# Save the updated file
file1.to_csv('updated_codes_pictures.csv', index=False)
print("Updated file saved as 'updated_codes_pictures.csv'")


Updated file saved as 'updated_codes_pictures.csv'


In [4]:
import pandas as pd

file1 = pd.read_csv('./style_color_pictures.csv')  # Assuming the file with headers CODE, COLOR
file2 = pd.read_csv('../data/product_data/shopify_urls.csv')  # Assuming the file with headers url, file_name, Code

# Get unique codes from each file
codes_file1 = set(file1['CODE'])
codes_file2 = set(file2['Code'])

# Calculate totals
total_styles_file1 = len(codes_file1)
total_styles_file2 = len(codes_file2)

# Calculate codes missing in file2 but present in file1
missing_in_file2 = codes_file1 - codes_file2
missing_count = len(missing_in_file2)

# Print results
print(f"Total unique codes in file1: {total_styles_file1}")
print(f"Total unique codes in file2: {total_styles_file2}")
print(f"Total codes in file1 but missing in file2: {missing_count}")
print(f"Missing codes: {list(missing_in_file2)}")


Total unique codes in file1: 494
Total unique codes in file2: 342
Total codes in file1 but missing in file2: 152
Missing codes: ['B8661', 'E1918', 'E5105', 'E2196', 'BD2007/10', 'E2111', 'B8667', 'E1449', 'E1877', 'AA9307', 'E2578', 'E2496', 'E1326', 'E1465', 'E1385', 'E1127', 'E1831', 'B8898', 'E2775', 'E1892', 'B8654', 'E1250', 'B8921', 'E1089', 'E1942', 'E1303', 'E1294', 'E2082', 'E1849', 'E1875', 'E1470', 'E2771', 'E2495', 'E1762', 'B8320', 'E1950', 'E2557', 'E1968', 'E2000', 'E2520', 'E1540', 'E2494', 'BD2006/344', 'E2599', 'B8468', 'E1064', 'E1745', 'BD2007/09', 'E2078', 'BD2006/66', 'E1605', 'B8486', 'E1334', 'E1333', 'E1442', 'B8506', 'E1068', 'B9000', 'E1552', 'E1636', 'BD2006/412', 'E1537', 'E2755', 'B8660', 'E2501', 'E2128', 'B8749', 'E1761', 'E2497', 'E2106', 'E2180', 'E1930', 'B8401', 'E1904', 'E1885', nan, 'E2301', 'B8751', 'E1909', 'BD2007/417', 'E2610', 'E2524', 'E1879', 'B8716', 'E2503', 'E1538', 'E1024', 'E2778', 'E1278', 'B8896', 'BD2007/182', 'E1593', 'E1946', 'BD20