In [3]:
import os
import re
import glob
import pandas as pd

# 0. Create an empty DataFrame with columns "ID", "slot1", and "Description"
df = pd.DataFrame(columns=["ID", "Description", "Notes", "slot1", "slot2"])

# Define the folder containing Excel files
folder_path = "Promos"  # Update this to your actual folder path
excel_files = glob.glob(os.path.join(folder_path, "*.xlsx"))  # Adjust pattern if needed (e.g., *.xls)

# Regular expression pattern to extract the ID (format: 3 digits, a dash, 3 digits)
id_pattern = re.compile(r'(\d{3}-\d{3})')

for file in excel_files:
    # Extract the ID from the filename
    filename = os.path.basename(file)
    id_match = id_pattern.search(filename)
    if not id_match:
        # Skip files that don't match the ID pattern
        continue
    file_id = id_match.group(1)

    # 5. Skip this file if the ID is already in the DataFrame
    if file_id in df["ID"].values:
        continue

    # Read the Excel file without assuming headers
    try:
        data = pd.read_excel(file, header=None)
    except Exception as e:
        print(f"Error reading {file}: {e}")
        continue

    # Find the row where column A equals "Product Name" (case-insensitive, stripping spaces)
    product_name_rows = data[data[0].astype(str).str.strip().str.lower() == "product name"].index
    if not product_name_rows.empty:
        # Get the value from column B (index 1)
        product_desc = data.iloc[product_name_rows[0], 1]
    else:
        product_desc = ""

    # 3. Find the row where column A equals "material" (case-insensitive, stripping spaces)
    material_rows = data[data[0].astype(str).str.strip().str.lower() == "material"].index
    if material_rows.empty:
        # If no "material" row is found, skip this file
        continue
    start_row = material_rows[0] + 1

    # 2. Initialize an empty string variable for accumulating values from column A
    slot1_str = ""

    # 4. Iterate over the rows starting at start_row
    for idx in range(start_row, len(data)):
        # Get the value from column A (index 0) and the corresponding n from column C (index 2)
        cell_value = data.iloc[idx, 0]
        try:
            n = int(data.iloc[idx, 2])
        except (ValueError, TypeError):
            # If conversion to integer fails, skip this row
            continue

        # Only process if n is positive
        if n > 0:
            # Repeat the value n times, separated by commas
            repeated_value = ",".join([str(cell_value)] * n)
            # Append to slot1_str, adding a comma if slot1_str is not empty
            if slot1_str:
                slot1_str += "," + repeated_value
            else:
                slot1_str = repeated_value

    # Add the result to the DataFrame using .loc
    new_row = {"ID": file_id, "slot2": "AddPart:"+slot1_str, "Description": product_desc,"slot1":"MISC:1:1:P"}
    df.loc[len(df)] = new_row

# Optional: display or save the resulting DataFrame
df.to_csv('Promos.csv',index=False)

TypeError: sequence item 15: expected str instance, int found

In [11]:
file

'Promos\\Promotion Creation 999-342 Ti2E Plan Apochromat Objective Promo Package - Special.xlsx'