In [2]:
import pandas as pd
import numpy as np

In [1]:
### File Paths

pri_path = '../data/predicted/pri_comp_inv.csv'
sec_path = '../data/predicted/sec_comp_inv.csv'
tri_path = '../data/predicted/tri_comp_inv.csv'

In [3]:
def read_and_clean_csv(file_path):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    
    # Drop rows with NaN values
    df_cleaned = df.dropna(how='all')
    
    return df_cleaned

In [4]:
# Read and clean the CSV files
pri_df = read_and_clean_csv(pri_path)
sec_df = read_and_clean_csv(sec_path)
tri_df = read_and_clean_csv(tri_path)

---

In [5]:
def update_inventory_values(data):
    supply_columns = ['B01_supply', 'B02_supply', 'B03_supply', 'S01_supply', 'S02_supply', 'S03_supply']
    inv_columns = ['B01_inv', 'B02_inv', 'B03_inv', 'S01_inv', 'S02_inv', 'S03_inv']
    
    for supply_col, inv_col in zip(supply_columns, inv_columns):
        data[inv_col] = data.apply(lambda row: row[inv_col] if row[supply_col] > 0 else 0, axis=1)
    
    return data

In [6]:
pri_df_cln = update_inventory_values(pri_df)
sec_df_cln = update_inventory_values(sec_df)
tri_df_cln = update_inventory_values(tri_df)

---


In [9]:
supply_columns = ['B01_supply', 'B02_supply', 'B03_supply', 'S01_supply', 'S02_supply', 'S03_supply']
inv_columns = ['B01_inv', 'B02_inv', 'B03_inv', 'S01_inv', 'S02_inv', 'S03_inv']


In [22]:
comparison_results = pd.DataFrame(columns=['Supply_Column', 'Inv_Comparison'])


In [23]:
for supply_col, inv_col in zip(supply_columns, inv_columns):
    pri_inv = pri_df_cln[inv_col]
    sec_inv = sec_df_cln[inv_col]
    tri_inv = tri_df_cln[inv_col]
    
    # Calculate Absolute difference and ratio between pri and sec
    diff_pri_sec = (pri_inv - sec_inv).abs().mean()  
    ratio_pri_sec = (pri_inv / sec_inv).fillna(0).mean()  
    diff_pri_tri = (pri_inv - tri_inv).abs().mean()  
    ratio_pri_tri = (pri_inv / tri_inv).fillna(0).mean()  
    
    
    comparison_results = comparison_results.append({
        'Supply_Column': supply_col,
        'Pri-Sec Diff': diff_pri_sec,
        'Pri-Sec Ratio': ratio_pri_sec,
        'Pri-Tri Diff': diff_pri_tri,
        'Pri-Tri Ratio': ratio_pri_tri
    }, ignore_index=True)

  comparison_results = comparison_results.append({
  comparison_results = comparison_results.append({
  comparison_results = comparison_results.append({
  comparison_results = comparison_results.append({
  comparison_results = comparison_results.append({
  comparison_results = comparison_results.append({


In [24]:
comparison_results

Unnamed: 0,Supply_Column,Inv_Comparison,Pri-Sec Diff,Pri-Sec Ratio,Pri-Tri Diff,Pri-Tri Ratio
0,B01_supply,,2765.461248,inf,0.0,0.574713
1,B02_supply,,1867.006368,inf,0.0,0.886335
2,B03_supply,,17.521364,inf,0.0,0.038314
3,S01_supply,,3.448683,inf,0.0,0.002554
4,S02_supply,,31.462563,inf,0.0,0.049808
5,S03_supply,,12.222107,inf,0.0,0.019157


---


In [25]:
inv_columns = ['B01_inv', 'B02_inv', 'B03_inv', 'S01_inv', 'S02_inv', 'S03_inv']

In [28]:
average_predictions = pd.merge(pri_df_cln, sec_df_cln[inv_columns], left_index=True, right_index=True)


In [29]:
average_predictions = pd.merge(average_predictions, tri_df_cln[inv_columns], left_index=True, right_index=True)


In [30]:
for inv_col in inv_columns:
    average_predictions[inv_col] = average_predictions[[f'{inv_col}_x', f'{inv_col}_y', inv_col]].mean(axis=1)

In [32]:
columns_to_drop = [f'{inv_col}_x' for inv_col in inv_columns] + [f'{inv_col}_y' for inv_col in inv_columns]
average_predictions.drop(columns=columns_to_drop, inplace=True)

In [34]:
average_predictions.head(4)

Unnamed: 0,CUST_ID,CUST__NAME,SKU_ID,sku_name,POS_TYPE_focus,POS_Sec,POS_Ter,B01_supply,B02_supply,B03_supply,...,POS_Revenue_$10-20 MILLION,POS_Revenue_$2.5-5 MILLION,POS_Revenue_$5-10 MILLION,"POS_Revenue_LESS THAN $500,000",B01_inv,B02_inv,B03_inv,S01_inv,S02_inv,S03_inv
0,2001000,Banyan Foods,1001018,tofu,FOOD STORES,GENERAL STORES,SPECIALTY FOODS,0.0,0.0,0.0,...,0,0,0,1,0.0,0.0,0.0,0.0,63.78281,0.0
1,2001000,Banyan Foods,1001018,tofu,FOOD STORES,GENERAL STORES,SPECIALTY FOODS,0.0,0.0,0.0,...,0,0,0,1,0.0,0.0,0.0,0.0,51.067153,0.0
2,2001000,Banyan Foods,1001018,tofu,FOOD STORES,GENERAL STORES,SPECIALTY FOODS,0.0,0.0,0.0,...,0,0,0,1,0.0,0.0,0.0,0.0,247.9913,0.0
3,2001000,Banyan Foods,1001018,tofu,FOOD STORES,GENERAL STORES,SPECIALTY FOODS,0.0,0.0,0.0,...,0,0,0,1,0.0,0.0,0.0,0.0,227.236527,0.0


In [None]:
average_predictions.to_csv('../analysis/avg_encoded_predictions.csv')