In [3]:
!pip install pandas openpyxl xlrd

Defaulting to user installation because normal site-packages is not writeable
Collecting xlrd
  Using cached xlrd-2.0.2-py2.py3-none-any.whl.metadata (3.5 kB)
Using cached xlrd-2.0.2-py2.py3-none-any.whl (96 kB)
Installing collected packages: xlrd
Successfully installed xlrd-2.0.2


In [40]:
import pandas as pd

df = pd.read_excel(r'C:\Users\Admin\Desktop\Aanya Inc\PO 6273.xls', skiprows=9)

selected_columns = ['Serial No', 'Style No', 'Description', 
                    'Diamonds', 'Qty', 'Sizes']

# Explicit copy
df_selected = df[selected_columns].copy()

# Rename columns
df_selected.rename(columns={
    'Style No': 'StyleCode',
    'Description': 'MetalR',
    'Diamonds': 'CustomerProductionInstruction',
    'Qty': 'OrderQty',
    'Sizes': 'ItemSize'
}, inplace=True)

# Remove rows where StyleCode is NaN
df_selected.dropna(subset=['StyleCode'], inplace=True)

df_selected['OrderQty'] = df_selected['OrderQty'].fillna(0).astype(int)

# Convert "Size-X" to "UPXX" and replace NaN with blank
def convert_size(size):
    if pd.isna(size):
        return ''
    size_str = str(size)
    if 'Size-' in size_str:
        num = int(size_str.split('-')[1])
        return f'UP{num:02d}'
    return size_str

df_selected['ItemSize'] = df_selected['ItemSize'].apply(convert_size)

# Move ItemSize column after StyleCode
itemsize = df_selected.pop('ItemSize')
df_selected.insert(df_selected.columns.get_loc('StyleCode') + 1, 'ItemSize', itemsize)

# Move OrderQty column after StyleCode
orderqty = df_selected.pop('OrderQty')
df_selected.insert(df_selected.columns.get_loc('ItemSize') + 1, 'OrderQty', orderqty)

# Remove newline characters from MetalR
df_selected['MetalR'] = df_selected['MetalR'].astype(str).str.replace('\n', ' ', regex=False).str.strip()

# Create Metal column with short codes
def metal_code(metal_str):
    metal_str = metal_str.upper()
    code = ''
    # Karat
    if '14KT' in metal_str:
        code += '14'
    elif '18KT' in metal_str:
        code += '18'
    elif '10KT' in metal_str:
        code += '10'
    else:
        code += 'XX'  # fallback

    # Metal type
    if 'WHITE GOLD' in metal_str:
        code = 'G' + code + 'W'
    elif 'YELLOW GOLD' in metal_str:
        code = 'G' + code + 'Y'
    elif 'PINK GOLD' in metal_str:
        code = 'G' + code + 'P'
    elif 'PLATINUM' in metal_str:
        code = 'PC950' + code
    else:
        code = 'G' + code + 'X'  # unknown type
    return code

# Insert Metal column before MetalR
df_selected.insert(df_selected.columns.get_loc('MetalR'), 'Metal', df_selected['MetalR'].apply(metal_code))

# Create Tone column (last letter of Metal) after Metal
df_selected.insert(df_selected.columns.get_loc('Metal') + 1, 'Tone', df_selected['Metal'].astype(str).str[-1])

# Insert ItemPoNo. column after Tone with value from G5
item_po_no = pd.read_excel(
    r'C:\Users\Admin\Desktop\Aanya Inc\PO 6273.xls',
    header=None,
    usecols="G",
    nrows=5
).iloc[4, 0]  # G5 cell

df_selected.insert(
    df_selected.columns.get_loc('Tone') + 1,
    'ItemPoNo.',
    item_po_no
)

# Insert ItemRefNo, StockType, MakeType after ItemPoNo.
additional_cols = ['ItemRefNo', 'StockType', 'MakeType']
pos = df_selected.columns.get_loc('ItemPoNo.') + 1
for col in additional_cols:
    df_selected.insert(pos, col, '')
    pos += 1

# Insert SpecialRemarks column after CustomerProductionInstruction
df_selected.insert(
    df_selected.columns.get_loc('CustomerProductionInstruction') + 1, 
    'SpecialRemarks', 
    'Need Hallmark "A" and Trademark on Every piece'
)

#DesignProductionInstruction
df_selected.insert(
    df_selected.columns.get_loc('SpecialRemarks') + 1, 
    'DesignProductionInstruction', 
    value=''
)

# InsertStampInstruction column 
df_selected.insert(
    df_selected.columns.get_loc('DesignProductionInstruction') + 1, 
    'StampInstruction', 
    '"A" on one side and metal KT on other side of the ring'
)

# Add multiple new columns after StampInstruction
new_columns = [
    'OrderGroup', 'Certificate', 'SKUNo', 'Basestoneminwt', 'Basestonemaxwt',
    'Basemetalminwt', 'Basemetalmaxwt', 'Productiondeliverydate',
    'Expecteddeliverydate', 'Blank_Column', 'SetPrice', 'StoneQuality'
]
pos = df_selected.columns.get_loc('StampInstruction') + 1
for col in new_columns:
    df_selected.insert(pos, col, '')
    pos += 1

df_selected.insert(
    df_selected.columns.get_loc('OrderQty') + 1, 
    'OrderItemPcs', 
    value=''
)

df_selected.drop(columns=['MetalR'], inplace=True) 
df_selected.to_csv(r'C:\Users\Admin\Desktop\Anaya_PO_Cleaned.csv', index=False)
#df_selected.head(20)
