In [1]:
import pandas as pd
from pathlib import Path

def create_composite_key(df, key_columns, separator='_'):

    # Validate columns exist
    missing_cols = [col for col in key_columns if col not in df.columns]
    if missing_cols:
        raise ValueError(f"Columns not found: {missing_cols}")
    
    # Handle null values - convert to string
    key_data = df[key_columns].fillna('NULL').astype(str)
    
    # Concatenate with separator
    composite_key = key_data.apply(lambda x: separator.join(x), axis=1)
    
    return composite_key

def add_composite_key_to_csv(input_file, key_columns, output_file=None, separator='_', pk_column_name='composite_key'):

    try:
        # Read CSV
        print(f"Reading {input_file}...")
        df = pd.read_csv(input_file)
        print(f"Loaded {len(df)} rows, {len(df.columns)} columns")
        
        # Display available columns
        print(f"Available columns: {list(df.columns)}")
        
        # Create composite key
        print(f"Creating composite key from: {key_columns}")
        composite_key = create_composite_key(df, key_columns, separator)
        
        # Check for duplicates
        duplicates = composite_key.duplicated().sum()
        if duplicates > 0:
            print(f"WARNING: {duplicates} duplicate keys found!")
        
        # Add composite key as first column
        df.insert(0, pk_column_name, composite_key)
        
        # Generate output filename if not provided
        if output_file is None:
            input_path = Path(input_file)
            output_file = input_path.parent / f"{input_path.stem}_with_pk{input_path.suffix}"
        
        # Save to CSV
        df.to_csv(output_file, index=False)
        print(f"✓ Saved to {output_file}")
        print(f"✓ Added column '{pk_column_name}' with {len(composite_key.unique())} unique values")
        
        # Show sample of new keys
        print("\nSample composite keys:")
        print(df[[pk_column_name] + key_columns].head())
        
        return df
        
    except FileNotFoundError:
        print(f"Error: File '{input_file}' not found")
        return None
    except Exception as e:
        print(f"Error: {str(e)}")
        return None

# Example usage in Jupyter Notebook:
# 
# # Basic usage

# 
# # With custom separator and column name
# df = add_composite_key_to_csv('data.csv', ['col1', 'col2'], separator='|', pk_column_name='entity_id')
# 
# # Specify output file
# df = add_composite_key_to_csv('data.csv', ['col1', 'col2'], output_file='processed_data.csv')

# print("Composite Key Generator for Jupyter Notebook")
# print("Usage: df = add_composite_key_to_csv('file.csv', ['col1', 'col2'])")
# print("Parameters: input_file, key_columns, output_file=None, separator='_', pk_column_name='composite_key'")

In [None]:
df = add_composite_key_to_csv('output/Demand Fulfillment.csv', ['Product ID','date'],pk_column_name = 'product_date_id')

# transcational data - original

In [48]:
import pandas as pd
import os

def add_composite_key(input_file: str,primary_key_name: str = "CompositeKey"):
    """
    Processes the input CSV file to generate a Composite Key from 'ProductID', 'CustomerID', and 'KeyFigure'.
    Saves the result in a structured folder: composite_key_files/{file_name_without_spaces}/output.csv

    Args:
        input_file (str): Path to the input CSV file.
    """

    # Extract file name without extension and remove spaces
    base_name = os.path.splitext(os.path.basename(input_file))[0].replace(" ", "")
    output_folder = os.path.join("composite_key_files")
    #os.makedirs(output_folder, exist_ok=True)
    file_name = base_name + ".csv"
    output_file = os.path.join(output_folder, file_name)

    # Load and clean column headers
    df = pd.read_csv(input_file)
    df.columns = [col.replace(" ", "").replace("-", "") for col in df.columns]
    #df.columns = [col.replace(" ", "") for col in df.columns]

    # Abbreviate Key Figure
    def abbreviate_key_figure(kf):
        kf = str(kf).replace('%', 'Percent')
        words = kf.split()
        return ''.join(word[0].upper() for word in words)

    # Abbreviate Customer ID
    def abbreviate_customer_id(cid):
        if 'Customer Group' in str(cid):
            return 'CG' + cid.split()[-1]
        return str(cid).replace(" ", "")

    # Apply transformations
    df['KeyAbbr'] = df['KeyFigure'].apply(abbreviate_key_figure)
    #df['CustomerAbbr'] = df['CustomerID'].apply(abbreviate_customer_id)

    # Create Composite Key
    #df.insert(0, primary_key_name, df['ProductID'] + "_" + df['LocationID'] + "_" + df['KeyAbbr'])
    #df.insert(0, primary_key_name, df['ProductID'] + "_" + df['CustomerAbbr'] + "_" + df['KeyAbbr'])  # demand fulfillmenet
    df.insert(0, primary_key_name, df['ProductID'] + "_" + df['KeyAbbr']) # profit margin
    #df.insert(0, primary_key_name, df['ProductID'] + "_" + df['ResourceID'] + "_" + df['KeyAbbr']) # review caacity
    # df['ShipToLocationID'] = df['ShipToLocationID'].fillna('').replace('(None)', '').astype(str).str.strip()
    # df.insert(0, primary_key_name, df['ProductID'] + "_" + df['LocationID'] + "_" + df['ShipToLocationID']+"_"+ df['KeyAbbr']) # review vendor
    # Drop helper columns
    df.drop(['KeyAbbr'], axis=1, inplace=True)
    #df.drop(['CustomerAbbr'], axis=1, inplace=True)
    # Save to output file
    df.to_csv(output_file, index=False)

    new_df = pd.read_csv(output_file)
    
    print(f"Output saved to: {output_file}")
    print(df.head())


In [36]:
add_composite_key('output/Demand Fulfillment.csv','delivery_record_id')

Output saved to: composite_key_files\DemandFulfillment.csv
      delivery_record_id ProductID           CustomerID  \
0      FG-1000_CG1000_CD   FG-1000  Customer Group 1000   
1      FG-1000_CG1000_CR   FG-1000  Customer Group 1000   
2    FG-1000_CG1000_CDDL   FG-1000  Customer Group 1000   
3     FG-1000_CG1000_DFP   FG-1000  Customer Group 1000   
4  FG-1000_CG1000_ADDOCD   FG-1000  Customer Group 1000   

                                   KeyFigure    Total  W222025  W232025  \
0                           Consensus Demand  28248.0      389    397.0   
1                          Customer Receipts  28248.0      389    397.0   
2             Customer Demand Delivered Late      0.0        0      0.0   
3                       Demand Fulfillment %      1.0        1      1.0   
4  Average Delivery Delay of Customer Demand      0.0        0      0.0   

   W242025  W252025  W262025  ...  W352026  W362026  W372026  W382026  \
0    372.0      363      389  ...    356.0    358.0    365.0  

In [40]:
add_composite_key('output/Review DC.csv','delivery_record_id')

Output saved to: composite_key_files\ReviewDC.csv
   delivery_record_id ProductID LocationID                    KeyFigure  \
0   FG-1000_DC1000_DD   FG-1000     DC1000             Dependent Demand   
1  FG-1000_DC1000_SOH   FG-1000     DC1000                Stock on Hand   
2  FG-1000_DC1000_ITR   FG-1000     DC1000  Incoming Transport Receipts   
3   FG-1000_DC1000_OS   FG-1000     DC1000              Outgoing Supply   
4  FG-1000_DC1000_SS(   FG-1000     DC1000           Safety Stock (SOP)   

     Total  W222025  W232025  W242025  W252025  W262025  ...  W352026  \
0  28248.0    389.0    397.0    372.0    363.0    389.0  ...    356.0   
1   3100.0   3100.0      NaN      NaN      NaN      NaN  ...      NaN   
2  25148.0      NaN      NaN      NaN      NaN      NaN  ...    356.0   
3  28248.0    389.0    397.0    372.0    363.0    389.0  ...    356.0   
4    130.0    112.0    140.0    116.0    107.0    142.0  ...    142.0   

   W362026  W372026  W382026  W392026  W402026  W412026  W42

In [41]:
add_composite_key('output/Review Plant.csv','product_location_id')

Output saved to: composite_key_files\ReviewPlant.csv
  product_location_id ProductID LocationID                      KeyFigure  \
0   FG-1000_PL1000_DD   FG-1000     PL1000               Dependent Demand   
1  FG-1000_PL1000_SOH   FG-1000     PL1000                  Stock on Hand   
2  FG-1000_PL1000_PPR   FG-1000     PL1000    Planned Production Receipts   
3  FG-1000_PL1000_IPR   FG-1000     PL1000   Incoming Production Receipts   
4  FG-1000_PL1000_CPR   FG-1000     PL1000  Confirmed Production Receipts   

   Total  W222025  W232025  W242025  W252025  W262025  ...  W352026  W362026  \
0  20275       20       20        0      138      326  ...      288      288   
1      0        0        0        0        0        0  ...        0        0   
2  20413       20       20        0      288      288  ...      288      288   
3  20413       20       20        0      288      288  ...      288      288   
4     40       20       20        0        0        0  ...        0        0   

   

In [43]:
add_composite_key('output/Review Vendors.csv','product_location_id')

Output saved to: composite_key_files\ReviewVendors.csv
          product_location_id ProductID LocationID ShipToLocationID  \
0         RM-1000_VEN1000__DD   RM-1000    VEN1000                    
1        RM-1000_VEN1000__SOH   RM-1000    VEN1000                    
2  RM-1000_VEN1000_PL1000_OTS   RM-1000    VEN1000           PL1000   
3  RM-1000_VEN1000_PL2000_OTS   RM-1000    VEN1000           PL2000   
4         RM-1000_VEN2000__DD   RM-1000    VEN2000                    

                   KeyFigure     Total  W222025  W232025  W242025  W252025  \
0           Dependent Demand  259272.0   3005.0      NaN   3815.0   3920.0   
1              Stock on Hand       NaN      NaN      NaN      NaN      NaN   
2  Outgoing Transport Supply  134818.0   1542.0      NaN   1931.0   2291.0   
3  Outgoing Transport Supply  124454.0   1463.0      NaN   1884.0   1629.0   
4           Dependent Demand    2000.0      NaN      NaN   2000.0      NaN   

   ...  W352026  W362026  W372026  W382026  W3920

In [45]:
add_composite_key('output/Review Capacity.csv','location_resource_id')

Output saved to: composite_key_files\ReviewCapacity.csv
        location_resource_id LocationID   ResourceID           SourceID  \
0     FG-1000_RES1000_001_CS     PL1000  RES1000_001  PL1000_FG1000_PV1   
1    FG-1000_RES1000_001_CSE     PL1000  RES1000_001  PL1000_FG1000_PV1   
2     FG-1000_RES1000_001_CU     PL1000  RES1000_001  PL1000_FG1000_PV1   
3  FG-1000_RES1000_001_CUOPR     PL1000  RES1000_001  PL1000_FG1000_PV1   
4     FG-1000_RES1000_002_CS     PL1000  RES1000_002  PL1000_FG1000_PV1   

  ProductID                              KeyFigure         Total     W222025  \
0   FG-1000                        Capacity Supply  10800.000000  144.000000   
1   FG-1000              Capacity Supply Expansion      0.000000    0.000000   
2   FG-1000                   Capacity Utilization      0.945046    0.069444   
3   FG-1000  Capacity Usage of Production Resource  10206.500000   10.000000   
4   FG-1000                        Capacity Supply   7200.000000   96.000000   

      W23202

In [47]:
add_composite_key('output/Review Component.csv','product_location_id')

Output saved to: composite_key_files\ReviewComponent.csv
  product_location_id ProductID LocationID                    KeyFigure  \
0   RM-1000_PL1000_DD   RM-1000     PL1000             Dependent Demand   
1  RM-1000_PL1000_SOH   RM-1000     PL1000                Stock on Hand   
2  RM-1000_PL1000_PPR   RM-1000     PL1000  Planned Production Receipts   
3  RM-1000_PL1000_PTR   RM-1000     PL1000    Planned Transport Receipt   
4  RM-1000_PL1000_ITR   RM-1000     PL1000  Incoming Transport Receipts   

    Total  W222025  W232025  W242025  W252025  W262025  ...  W352026  W362026  \
0  134360        0      100        0     2880     2325  ...     2215     1595   
1       0        0        0        0        0        0  ...        0        0   
2       0        0        0        0        0        0  ...        0        0   
3  135818        0     1542        0     2931     2291  ...     2241     1567   
4  135818        0     1542        0     2931     2291  ...     2241     1567   

   W3

In [49]:
add_composite_key('output/Profit Margin.csv','product_key_id')

Output saved to: composite_key_files\ProfitMargin.csv
  product_key_id ProductID                        KeyFigure     Total  \
0     FG-1000_CR   FG-1000                Customer Receipts     45960   
1    FG-1000_PP(   FG-1000           Planned Price (Supply)       299   
2    FG-1000_CDR   FG-1000       Constrained Demand Revenue  13742040   
3    FG-1000_IPR   FG-1000     Incoming Production Receipts     40756   
4  FG-1000_PCROP   FG-1000  Production Cost Rate of Product       199   

   W222025  W232025  W242025  W252025  W262025  W272025  ...  W352026  \
0      674      682      636      628      645      691  ...      576   
1      299      299      299      299      299      299  ...      299   
2   201526   203918   190164   187772   192855   206609  ...   172224   
3       20       20        0      576      576      576  ...      576   
4      199      199      199      199      199      199  ...      199   

   W362026  W372026  W382026  W392026  W402026  W412026  W422026  W4