### DATA MAPPING


In [5]:
import pandas as pd
import numpy as np
from datetime import datetime
import pytz

# Load mapping data
mapping_df = pd.read_excel('../proddata/mapping.xlsx', sheet_name='Inventory_Items', header=None, nrows=2)
output_headers = mapping_df.iloc[0].tolist()
source_columns = mapping_df.iloc[1].tolist()

# Clean source columns
source_columns = ['' if pd.isna(x) else x for x in source_columns]
source_columns = [x.strip() if isinstance(x, str) else str(x).strip() for x in source_columns]

# Load source data
source_df = pd.read_csv('../proddata/source.csv')
source_df.columns = [col.strip() for col in source_df.columns]

# Create output DataFrame
output_df = pd.DataFrame(columns=output_headers)

# Process each column in mapping
for i, out_col in enumerate(output_headers):
    src_col = source_columns[i]
    
    if not src_col or src_col not in source_df.columns:
        output_df[out_col] = np.nan
        continue
        
    if src_col in ['createdAt', 'updatedAt']:
        # Date conversion to Taipei time (UTC+8)
        taipei_tz = pytz.timezone('Asia/Taipei')
        formatted_dates = []
        
        for dt_str in source_df[src_col]:
            if pd.isna(dt_str) or dt_str == '':
                formatted_dates.append('')
                continue
                
            try:
                # Parse ISO format and convert to Taipei time
                dt_utc = datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
                dt_taipei = dt_utc.astimezone(taipei_tz)
                # Format as MM/DD/YYYY HH:MM:SS AM/PM
                formatted_dates.append(dt_taipei.strftime('%m/%d/%Y %I:%M:%S %p').replace(' AM', ' AM').replace(' PM', ' PM'))
            except Exception as e:
                print(f"Error processing date {dt_str}: {e}")
                formatted_dates.append('')
                
        output_df[out_col] = formatted_dates
    else:
        output_df[out_col] = source_df[src_col]

# Save to Excel
output_df.to_excel('../output/formatted_output.xlsx', index=False)
print("Processing complete! Output saved to 'formatted_output.xlsx'")

Processing complete! Output saved to 'formatted_output.xlsx'
