#### **Setting up Libraries**

In [1]:
# 잠긴 엑셀 파일 풀기
!pip install msoffcrypto-tool -q

In [2]:
!pip install pyxlsb



In [3]:
import pandas as pd # 데이터 분석
import numpy as np # 계산 및 배열처리
import gcsfs # GCS 접근
from datetime import datetime # 날짜, 시간 정보
from datetime import date, timedelta # 날짜 계산

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

pd.set_option('display.max_columns', None)

#### **Daily Backorder Raw Data** (Regular)

In [4]:
today_date = datetime.today().strftime('%Y%m%d')

folder_path = 'gs://daily_backorder/'
file_name = '20250509 - Backorder and Hold Report .xlsb'
file_path = folder_path + file_name

fs = gcsfs.GCSFileSystem()

df_back = pd.read_excel(
    fs.open(file_path),
    sheet_name='Awaiting Shipping Details',
    header=1,
    usecols="B:AR",
    engine='pyxlsb'
)

In [5]:
# Getting active items that are on back order

df_back_filter = df_back[[
    'Order Date','Awaiting Shipping Reason', 'Picking Release Date', 'Item Status',
    'Parts No', 'Parts Class Code','Item Description', 'Key Parts Category', 'Part Functionality',
    'Customer Type with FOC', 'Company Code', 'Division Code', 'Order Qty'
]].copy()

df_back_filter = df_back_filter[(df_back_filter['Awaiting Shipping Reason'] == 'Back Order Hold') & (df_back_filter['Picking Release Date'].isna()) & (df_back_filter['Item Status'] == 'Active')]

# deletion on unnecessary columns
df_back_filter = df_back_filter.drop(columns=['Awaiting Shipping Reason', 'Picking Release Date', 'Item Status'])
df_back_filter['Order Date'] = pd.to_datetime('1899-12-30') + pd.to_timedelta(df_back_filter['Order Date'], unit='D')

# column name change
df_back_filter = df_back_filter.rename(columns={
    'Item Description': 'Desc',
    'Part Functionality': 'Functionality',
    'Customer Type with FOC': 'Customer',
    'Key Parts Category': 'Key Parts',
    'Order Qty': 'BO qty'})

In [6]:
df_back_filter.head(2)

Unnamed: 0,Order Date,Parts No,Parts Class Code,Desc,Key Parts,Functionality,Customer,Company Code,Division Code,BO qty
0,2024-07-05,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2
1,2024-07-10,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2


In [7]:
df_back_filter.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1496 entries, 0 to 11245
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Order Date        1496 non-null   datetime64[ns]
 1   Parts No          1496 non-null   object        
 2   Parts Class Code  1496 non-null   object        
 3   Desc              1496 non-null   object        
 4   Key Parts         1496 non-null   object        
 5   Functionality     1496 non-null   object        
 6   Customer          1496 non-null   object        
 7   Company Code      1496 non-null   object        
 8   Division Code     1496 non-null   object        
 9   BO qty            1496 non-null   int64         
dtypes: datetime64[ns](1), int64(1), object(8)
memory usage: 128.6+ KB


#### **Current Inventory, Transit volume, and ETA**

In [8]:
summary = pd.read_excel(fs.open(file_path), sheet_name='Summary PN', header=1, usecols="B:BK", skiprows=[2])

summary = summary[['Parts No', 'On Hand Qty', 'In Staging Qty', 'WH Qty', 'Transit Qty', 'ETA']]
summary['ETA'] = summary['ETA'].replace('No ETA', 'N/A')
summary = summary.rename(columns={'On Hand Qty': 'On Hand', 'WH Qty': 'WHAR', 'In Staging Qty': 'In Stage', 'Transit Qty': 'Transit'})

In [9]:
summary.head(1)

Unnamed: 0,Parts No,On Hand,In Stage,WHAR,Transit,ETA
0,0CZZA20001L,29,0,0,0,


In [10]:
backorder_only = df_back_filter.merge(summary, on='Parts No', how='left')

In [11]:
backorder_only.head(3)

Unnamed: 0,Order Date,Parts No,Parts Class Code,Desc,Key Parts,Functionality,Customer,Company Code,Division Code,BO qty,On Hand,In Stage,WHAR,Transit,ETA
0,2024-07-05,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,
1,2024-07-10,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,
2,2024-07-26,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,1,0,0,0,0,


#### **Purchasing Order Data** (Regular)

In [12]:
import gcsfs

folder_path = 'gs://daily_backorder/'
file_name = '4. Original Part Inventory.xlsx'
file_path = folder_path + file_name

fs = gcsfs.GCSFileSystem()
file_list = fs.ls(folder_path)

with fs.open(file_path) as f:
    purchasing_order = pd.read_excel(f, sheet_name='Original Part Master', header=1, usecols="A:P", engine='openpyxl')
    purchasing_order = purchasing_order[['Original Part', 'open', 'OP AIR', 'OP TRK', 'OP SEA']].rename(
        columns={'Original Part': 'Parts No', 'open': 'Open', 'OP AIR': 'Air PO', 'OP TRK': 'Trk PO', 'OP SEA': 'Sea PO'})

In [13]:
bo_all_info = pd.merge(backorder_only, purchasing_order, how='left', on='Parts No')
cols_to_convert = ['Open', 'Air PO', 'Trk PO', 'Sea PO']
bo_all_info[cols_to_convert] = bo_all_info[cols_to_convert].fillna(0).astype(int)

In [14]:
bo_all_info.head(2)

Unnamed: 0,Order Date,Parts No,Parts Class Code,Desc,Key Parts,Functionality,Customer,Company Code,Division Code,BO qty,On Hand,In Stage,WHAR,Transit,ETA,Open,Air PO,Trk PO,Sea PO
0,2024-07-05,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0
1,2024-07-10,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0


#### **GERP Data** (Not Regular Update)

In [15]:
folder_path = 'gs://daily_backorder/'
file_name = 'GERP Part Master 2025 0505.xlsx'
file_path = folder_path + file_name

fs = gcsfs.GCSFileSystem()
file_list = fs.ls(folder_path)

if file_list:
    detail_info = pd.read_excel(fs.open(file_path), sheet_name='Data', header=1, usecols="A:Ak")
    detail_info = detail_info[['Part', 'sales_model', 'first_receipt_date', 'parts_grade']].rename(columns={'Part': 'Parts No',
                                                                                                      'sales_model': 'Model', 'parts_grade': 'grade'})
else:
    print("No files found in the specified folder.")

In [16]:
info_added = pd.merge(bo_all_info, detail_info, how='left', on='Parts No')

In [17]:
info_added.head(2)

Unnamed: 0,Order Date,Parts No,Parts Class Code,Desc,Key Parts,Functionality,Customer,Company Code,Division Code,BO qty,On Hand,In Stage,WHAR,Transit,ETA,Open,Air PO,Trk PO,Sea PO,Model,first_receipt_date,grade
0,2024-07-05,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020.11.12,C
1,2024-07-10,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020.11.12,C


#### **Accesories Data** (Not Regular Update)

In [18]:
# Accesorries Data

folder_path = 'gs://daily_backorder/'
file_name = 'Acc List_25 03 27.xlsx'
file_path = folder_path + file_name

fs = gcsfs.GCSFileSystem()

df_acc = pd.read_excel(fs.open(file_path), header=1, usecols="A:C")
df_acc = df_acc.rename(columns ={'Part#': 'Parts No'})
df_acc = df_acc.drop(columns='Desc.')
df_acc = df_acc.rename(columns={'Acc. - SH Parts': 'Acc'})

In [19]:
acc_added = pd.merge(info_added, df_acc, how='left', on='Parts No')

In [20]:
acc_added.head(3)

Unnamed: 0,Order Date,Parts No,Parts Class Code,Desc,Key Parts,Functionality,Customer,Company Code,Division Code,BO qty,On Hand,In Stage,WHAR,Transit,ETA,Open,Air PO,Trk PO,Sea PO,Model,first_receipt_date,grade,Acc
0,2024-07-05,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020.11.12,C,
1,2024-07-10,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020.11.12,C,
2,2024-07-26,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,1,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020.11.12,C,


In [21]:
acc_added['Acc'].value_counts().reset_index()

Unnamed: 0,Acc,count
0,Y,6


#### **BER Data** (Not Regular Update)

In [22]:
file_path = 'gs://daily_backorder/LTV BER list.xlsx'
fs = gcsfs.GCSFileSystem()

with fs.open(file_path) as f:
    df_ber_e = pd.read_excel(f, sheet_name='Sheet1', header=1, usecols='E')

df_ber_e = df_ber_e.dropna().drop_duplicates().reset_index(drop=True)
df_ber_e = df_ber_e.rename(columns={'Part no': 'BER'})

In [23]:
ber_added = pd.merge(acc_added, df_ber_e, how='left', left_on='Parts No', right_on='BER')

In [24]:
ber_added.head(2)

Unnamed: 0,Order Date,Parts No,Parts Class Code,Desc,Key Parts,Functionality,Customer,Company Code,Division Code,BO qty,On Hand,In Stage,WHAR,Transit,ETA,Open,Air PO,Trk PO,Sea PO,Model,first_receipt_date,grade,Acc,BER
0,2024-07-05,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020.11.12,C,,
1,2024-07-10,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020.11.12,C,,


In [25]:
non_acc_ber = ber_added[(ber_added['Acc'].isna()) & (ber_added['BER'].isna())]
non_acc_ber = non_acc_ber.drop(columns=['Acc', 'BER'])

#### **Substitute Part Inventory Data**

In [26]:
fs = gcsfs.GCSFileSystem()

folder_path = 'gs://daily_backorder/'
file_name = '5. Sub Master Inventory.xlsx'

file_path = f'{folder_path}{file_name}'

file_list = fs.glob(f'{folder_path}*.xlsx')
sub_inv = pd.read_excel(fs.open(file_path), sheet_name='Sub Master Inventory', header=1, usecols="A:D")

In [27]:
sub_inv_filter = sub_inv[sub_inv['sub inventory'] > 0]
sub_inv_filter['sub inventory'] = sub_inv_filter['sub inventory'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_inv_filter['sub inventory'] = sub_inv_filter['sub inventory'].astype(int)


In [28]:
summed_sub_inv = sub_inv_filter.groupby('original_part', as_index=False)['sub inventory'].sum()
summed_sub_inv.rename(columns={'sub inventory': 'total_sub'}, inplace=True)

In [29]:
sub_merged = non_acc_ber.merge(summed_sub_inv, left_on='Parts No', right_on='original_part', how='left')
sub_merged.drop(columns='original_part', inplace=True)
sub_merged['total_sub'] = sub_merged['total_sub'].fillna(0).astype(int)

In [30]:
sub_merged.head(2)

Unnamed: 0,Order Date,Parts No,Parts Class Code,Desc,Key Parts,Functionality,Customer,Company Code,Division Code,BO qty,On Hand,In Stage,WHAR,Transit,ETA,Open,Air PO,Trk PO,Sea PO,Model,first_receipt_date,grade,total_sub
0,2024-07-05,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020.11.12,C,0
1,2024-07-10,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020.11.12,C,0


#### **EDW Data (Historic Demand)**

In [31]:
# EDW Data (Demand for each months per parts)

new_column_names = ['Part No', 'Desc', 'Division']


fs = gcsfs.GCSFileSystem()

folder_path = 'gs://daily_backorder/'
file_name = '9. EDW Demand (6M).xlsx'

file_path = f'{folder_path}{file_name}'

edw_demand = pd.read_excel(fs.open(file_path), header=None, skiprows=1)
header_row = pd.read_excel(fs.open(file_path), header=None, nrows=1).iloc[0]
edw_demand.columns = new_column_names + header_row[3:].tolist()
edw_demand = edw_demand.iloc[:, :10]

edw_demand_filter = edw_demand[edw_demand['Part No'].notnull()]
edw_demand_filter = edw_demand_filter.drop(columns='Desc')

In [32]:
edw_demand_filter.head(2)

Unnamed: 0,Part No,Division,202411,202412,202501,202502,202503,202504,202505
0,AGF80300705,REF,25709.0,54966.0,73206.0,11014.0,158586.0,14636.0,0.0
1,AGF80232402,REF,30128.0,36852.0,42693.0,41090.0,44177.0,40775.0,0.0


In [33]:
print(edw_demand_filter.columns.tolist())

['Part No', 'Division', 202411, 202412, 202501, 202502, 202503, 202504, 202505]


In [34]:
demand_merged = pd.merge(sub_merged, edw_demand_filter, left_on='Parts No', right_on='Part No', how='left')

In [36]:
demand_merged.columns = demand_merged.columns.astype(str).str.strip()

columns_to_convert = ['202411', '202412', '202501', '202502', '202503', '202504', '202505']
demand_merged[columns_to_convert] = (
    demand_merged[columns_to_convert]
    .fillna(0)
    .astype(int)
)

In [37]:
demand_merged = demand_merged.drop(columns=['Part No', 'Division'])

In [38]:
demand_merged.head(10)

Unnamed: 0,Order Date,Parts No,Parts Class Code,Desc,Key Parts,Functionality,Customer,Company Code,Division Code,BO qty,On Hand,In Stage,WHAR,Transit,ETA,Open,Air PO,Trk PO,Sea PO,Model,first_receipt_date,grade,total_sub,202411,202412,202501,202502,202503,202504,202505
0,2024-07-05,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020.11.12,C,0,0,0,1,0,0,2,0
1,2024-07-10,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020.11.12,C,0,0,0,1,0,0,2,0
2,2024-07-26,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,1,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020.11.12,C,0,0,0,1,0,0,2,0
3,2024-08-01,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,1,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020.11.12,C,0,0,0,1,0,0,2,0
4,2024-11-14,ADW73550411,ADW0000,Funnel Assembly,N,FUNCTIONAL,ASC,HA,CNT,1,0,0,0,3,5/12_3,11,3,0,8,LF29S8330D.ASBCNA0,,D,0,1,0,0,0,0,1,0
5,2025-01-09,MCK71548534,MCK0092,"Cover,Cabinet",N,FUNCTIONAL,ASC,HA,DFT,1,1,0,0,0,,0,0,0,0,DLHC5502V.ASSEEUS,2024.07.09,C,0,0,1,1,0,0,2,0
6,2025-02-03,ACQ30645805,ACQ0633,"Cover Assembly,Rear",N,FUNCTIONAL,ASC,HE,GLT,1,0,0,0,0,,0,0,0,0,OLED65C4PUA.AUS,,D,0,0,0,0,1,0,1,0
7,2025-02-05,ACQ30224030,ACQ0000,Cover Assembly,N,FUNCTIONAL,ASC,HE,GLT,1,0,0,0,0,,0,0,0,0,65QNED85AQA.AUS,,D,0,0,0,0,1,0,0,0
8,2025-02-05,MAZ67472140,MAZ0000,Bracket,N,FUNCTIONAL,ASC,HE,GLT,1,0,0,0,0,,0,0,0,0,86UR8000AUA.AUS,,D,0,0,0,0,1,0,0,0
9,2025-02-05,MAZ67472274,MAZ0000,Bracket,N,FUNCTIONAL,ASC,HE,GLT,1,0,0,0,0,,0,0,0,0,86UR8000AUA.AUS,,D,0,0,0,0,1,0,0,0


In [39]:
demand_merged['first_receipt_date'] = pd.to_datetime(demand_merged['first_receipt_date'], errors='coerce')

In [40]:
demand_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1392 entries, 0 to 1391
Data columns (total 30 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Order Date          1392 non-null   datetime64[ns]
 1   Parts No            1392 non-null   object        
 2   Parts Class Code    1392 non-null   object        
 3   Desc                1392 non-null   object        
 4   Key Parts           1392 non-null   object        
 5   Functionality       1392 non-null   object        
 6   Customer            1392 non-null   object        
 7   Company Code        1392 non-null   object        
 8   Division Code       1392 non-null   object        
 9   BO qty              1392 non-null   int64         
 10  On Hand             1392 non-null   int64         
 11  In Stage            1392 non-null   int64         
 12  WHAR                1392 non-null   int64         
 13  Transit             1392 non-null   int64       

#### **Part Aging Column Addition**

In [41]:
def categorize_date(date_str):
  given_date = date_str
  current_date = datetime.now()

  diff_in_days = (current_date - given_date).days
  diff_in_months = diff_in_days // 30
  diff_in_years = diff_in_days // 365

  if diff_in_years >= 2:
      return "Over 2 Years"
  elif diff_in_years >= 1:
      return "Over 1 Year"
  elif diff_in_months > 6:
      return "Over 6 Months"
  elif diff_in_months > 3:
      return "Over 3 Months"
  else:
      return "Within 3 Months"

In [42]:
check_part_date_list =[]

for i in range(demand_merged.shape[0]):
  check_part_date_list.append(categorize_date(demand_merged['first_receipt_date'][i]))

demand_merged.insert(0, 'Age', check_part_date_list)

In [43]:
demand_merged.head(1)

Unnamed: 0,Age,Order Date,Parts No,Parts Class Code,Desc,Key Parts,Functionality,Customer,Company Code,Division Code,BO qty,On Hand,In Stage,WHAR,Transit,ETA,Open,Air PO,Trk PO,Sea PO,Model,first_receipt_date,grade,total_sub,202411,202412,202501,202502,202503,202504,202505
0,Over 2 Years,2024-07-05,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020-11-12,C,0,0,0,1,0,0,2,0


In [44]:
demand_merged_unique = demand_merged.drop_duplicates(subset=['Parts No', 'Order Date'])

df_unique = (
    demand_merged_unique
    .groupby('Parts No')['Order Date']
    .apply(lambda x: x.dt.strftime('%Y-%m-%d').tolist())
    .reset_index()
)
df_unique

Unnamed: 0,Parts No,Order Date
0,0IPMGKE053B,[2025-04-14]
1,1TPL0302818,[2025-04-05]
2,2B71165R,[2025-05-06]
3,3040EA1005A,[2025-04-11]
4,3210EL1003A,[2025-04-29]
...,...,...
621,MKC67468030,[2025-04-10]
622,TAW35798802,"[2024-09-06, 2024-10-17, 2025-04-08]"
623,TBZ39255001,[2025-04-23]
624,TCA35893203,[2025-05-05]


In [45]:
def calculate_14_day_streak(order_dates):
    if not order_dates:
        return 0

    order_dates = pd.to_datetime(order_dates)

    max_date = max(order_dates)
    streak_count = sum(1 for date in order_dates if (max_date - pd.Timedelta(days=14)) <= date <= max_date)
    return streak_count

In [46]:
df_unique['BO_14day_count'] = df_unique['Order Date'].apply(calculate_14_day_streak)

In [47]:
df_unique = df_unique.drop('Order Date', axis=1)
streak_added = demand_merged.merge(df_unique, how='left', on='Parts No')

In [48]:
streak_added.head(2)

Unnamed: 0,Age,Order Date,Parts No,Parts Class Code,Desc,Key Parts,Functionality,Customer,Company Code,Division Code,BO qty,On Hand,In Stage,WHAR,Transit,ETA,Open,Air PO,Trk PO,Sea PO,Model,first_receipt_date,grade,total_sub,202411,202412,202501,202502,202503,202504,202505,BO_14day_count
0,Over 2 Years,2024-07-05,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020-11-12,C,0,0,0,1,0,0,2,0,2
1,Over 2 Years,2024-07-10,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020-11-12,C,0,0,0,1,0,0,2,0,2


#### **Supplier Info Data**

In [49]:
# Supplier Master Data

folder_path = 'gs://daily_backorder/'
file_name = 'Supplier Master 2025 0428.xlsx'
file_path = folder_path + file_name

fs = gcsfs.GCSFileSystem()
file_list = fs.ls(folder_path)

df_supp = pd.read_excel(fs.open(file_path), sheet_name='Today', header=1, usecols="A:Z")
df_supp2 = df_supp[['Part No', 'Supplier Code']].rename(columns ={'Part No': 'Parts No', 'Supplier Code': 'Supplier'})

In [50]:
supplier_added = pd.merge(streak_added, df_supp2, how='left', on='Parts No')

In [51]:
supplier_added.sample(n = 3)

Unnamed: 0,Age,Order Date,Parts No,Parts Class Code,Desc,Key Parts,Functionality,Customer,Company Code,Division Code,BO qty,On Hand,In Stage,WHAR,Transit,ETA,Open,Air PO,Trk PO,Sea PO,Model,first_receipt_date,grade,total_sub,202411,202412,202501,202502,202503,202504,202505,BO_14day_count,Supplier
543,Over 2 Years,2025-05-01,MFL30599175,MFL0687,"Manual,Service",N,ACCESSORY,PD Stock,HA,DFT,1,0,0,0,0,,0,0,0,0,WM3360HRCA.ACREEUS,2011-12-15,D,0,0,0,0,0,0,0,0,1,US016180
68,Over 2 Years,2025-05-01,ABQ72940013,ABQ0568,"Case Assembly,PCB",N,FUNCTIONAL,PD Stock,HA,CNT,1,0,0,0,16,5/12_3 ; 5/20_3 ; 5/23_1 ; 6/09_2 ; 6/11_7,8,4,0,4,LFXC24726S.ASTCNA0,2014-08-26,B,0,1,3,3,4,4,11,0,3,KR044335
694,Over 2 Years,2025-05-06,5220FR2008Y,AJU0403,"Valve Assembly,Inlet",N,FUNCTIONAL,ASC,HA,DFT,1,0,7,0,81,5/21_10 ; 6/02_14 ; 6/09_19 ; 6/23_36 ; 6/30_2,72,30,0,42,WM3998HBA.ABLEVUS,2020-02-18,B,0,7,21,19,36,22,31,0,2,VN018311


#### **Parsing ETA**

In [52]:
result = supplier_added.groupby('Parts No').agg({
    'Order Date': 'min',
    'BO qty': 'sum',
    'ETA': 'max',
    'On Hand': 'max',
    'In Stage': 'max',
    'WHAR': 'max',
    'Transit': 'max'
}).reset_index()

In [53]:
result

Unnamed: 0,Parts No,Order Date,BO qty,ETA,On Hand,In Stage,WHAR,Transit
0,0IPMGKE053B,2025-04-14,1,6/09_1,0,0,0,1
1,1TPL0302818,2025-04-05,2,5/14_5 ; 6/02_50 ; 6/16_2,0,0,0,57
2,2B71165R,2025-05-06,2,5/14_278 ; 5/23_3 ; 6/16_147,235,72,0,428
3,3040EA1005A,2025-04-11,1,6/04_2 ; 6/30_1,0,0,0,3
4,3210EL1003A,2025-04-29,1,5/15_2,0,0,0,2
...,...,...,...,...,...,...,...,...
621,MKC67468030,2025-04-10,1,5/29_3,0,0,0,3
622,TAW35798802,2024-09-06,3,,0,0,0,0
623,TBZ39255001,2025-04-23,1,5/12_2 ; 6/30_3,0,3,0,5
624,TCA35893203,2025-05-05,1,,0,0,0,0


In [54]:
def parse_eta(eta, order_date):

    if pd.isna(eta) or eta.strip() == '':
        return []

    eta_entries = eta.split(';')
    eta_list = []

    for entry in eta_entries:
        if '_' in entry:
            date_str, qty_str = entry.split('_')
            date_str = date_str.strip()

            try:
                eta_date = datetime.strptime(date_str, "%m/%d")
                eta_date = eta_date.replace(year=order_date.year)

                if eta_date < order_date:
                    eta_date = eta_date.replace(year=order_date.year + 1)

                eta_qty = int(qty_str)
                eta_list.append((eta_date.strftime("%m/%d/%Y"), eta_qty))

            except ValueError:
                print(f"Invalid date format in ETA: {date_str}")
                continue

    return eta_list

In [55]:
def assign_flag(row):

    eta_list = parse_eta(row['ETA'], row['Order Date'])

    on_hand_qty = row['On Hand']
    in_staging_qty = row['In Stage']
    wh_qty = row['WHAR']
    order_qty = row['BO qty']
    order_date = row['Order Date']

    on_hand_quantities = on_hand_qty + in_staging_qty + wh_qty

    if not eta_list:
      if on_hand_quantities >= order_qty:
        return 'Ignore'
      else:
        return 'No ETA'

    # Check if the parts have ETA within 7 days and enough quantity
    sum_qty_7d = sum(qty for eta_date, qty in eta_list if datetime.strptime(eta_date, "%m/%d/%Y") <= order_date + timedelta(days=7))
    total_qty_7d = sum_qty_7d + on_hand_quantities

    if total_qty_7d >= order_qty:
        return 'ETA_in_1W'

    # Check if the parts have ETA within 14 days and enough quantity
    sum_qty_14d = sum(qty for eta_date, qty in eta_list if datetime.strptime(eta_date, "%m/%d/%Y") <= order_date + timedelta(days=14))
    total_qty_14d = sum_qty_14d + on_hand_quantities

    if total_qty_14d >= order_qty:
        return 'ETA_in_2W'

    return 'Flag'

In [56]:
result['Flag'] = result.apply(assign_flag, axis=1)

In [57]:
result.sample(n=10)

Unnamed: 0,Parts No,Order Date,BO qty,ETA,On Hand,In Stage,WHAR,Transit,Flag
277,AJP73816103,2025-04-30,1,5/16_1 ; 5/30_1,5,2,20,2,ETA_in_1W
17,4930JA3093B,2025-05-05,2,6/09_3,0,0,0,3,Flag
389,EAJ65707601,2025-04-11,12,5/26_1 ; 5/28_1 ; 6/06_1,0,0,0,3,Flag
92,ABQ30314501,2025-05-05,3,5/29_4 ; 6/30_5,0,0,0,9,Flag
359,EAD63748816,2025-04-24,93,5/13_12 ; 5/19_600,1,0,338,612,ETA_in_1W
473,EBR64624601,2025-04-17,5,5/16_11 ; 5/26_4,49,0,0,15,ETA_in_1W
104,ABQ76121061,2025-05-01,1,,41,6,0,0,Ignore
439,EBJ60664626,2025-04-17,1,5/16_3 ; 5/20_3 ; 6/16_3,0,0,0,9,Flag
526,EBU68007001,2025-04-24,6,5/12_1 ; 5/20_16 ; 6/09_46,0,0,0,63,Flag
377,EAD66050101,2025-04-29,1,,0,0,0,0,No ETA


In [58]:
df_all = pd.merge(supplier_added, result[['Parts No', 'Flag']], on='Parts No', how='left')

In [59]:
df_all.head(2)

Unnamed: 0,Age,Order Date,Parts No,Parts Class Code,Desc,Key Parts,Functionality,Customer,Company Code,Division Code,BO qty,On Hand,In Stage,WHAR,Transit,ETA,Open,Air PO,Trk PO,Sea PO,Model,first_receipt_date,grade,total_sub,202411,202412,202501,202502,202503,202504,202505,BO_14day_count,Supplier,Flag
0,Over 2 Years,2024-07-05,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020-11-12,C,0,0,0,1,0,0,2,0,2,KR044335,No ETA
1,Over 2 Years,2024-07-10,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020-11-12,C,0,0,0,1,0,0,2,0,2,KR044335,No ETA


In [60]:
df_all

Unnamed: 0,Age,Order Date,Parts No,Parts Class Code,Desc,Key Parts,Functionality,Customer,Company Code,Division Code,BO qty,On Hand,In Stage,WHAR,Transit,ETA,Open,Air PO,Trk PO,Sea PO,Model,first_receipt_date,grade,total_sub,202411,202412,202501,202502,202503,202504,202505,BO_14day_count,Supplier,Flag
0,Over 2 Years,2024-07-05,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020-11-12,C,0,0,0,1,0,0,2,0,2,KR044335,No ETA
1,Over 2 Years,2024-07-10,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,2,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020-11-12,C,0,0,0,1,0,0,2,0,2,KR044335,No ETA
2,Over 2 Years,2024-07-26,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,1,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020-11-12,C,0,0,0,1,0,0,2,0,2,KR044335,No ETA
3,Over 2 Years,2024-08-01,EAJ65658001,EAJ0202,"LCD,Module-TFT",N,FUNCTIONAL,ASC,BS,GTT,1,0,0,0,0,,0,0,0,0,27GN950-B.AUS,2020-11-12,C,0,0,0,1,0,0,2,0,2,KR044335,No ETA
4,Within 3 Months,2024-11-14,ADW73550411,ADW0000,Funnel Assembly,N,FUNCTIONAL,ASC,HA,CNT,1,0,0,0,3,5/12_3,11,3,0,8,LF29S8330D.ASBCNA0,NaT,D,0,1,0,0,0,0,1,0,1,KR044335,Flag
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1387,Over 6 Months,2025-05-08,MCK71414904,MCK0362,"Cover,Guide",N,FUNCTIONAL,DMS,HA,CVT,1,7,0,0,5,5/14_5,5,0,5,0,LSEL6337D.BBDLLGA,2024-06-28,C,0,0,4,5,4,5,7,0,1,KR044335,ETA_in_1W
1388,Over 2 Years,2025-05-08,MEB65414301,MEB0227,"Handle,Door",N,COSMETIC,DMS,HA,CVT,1,5,21,23,122,5/29_122,398,0,0,398,LREL6325F.FRSLLGA,2020-07-16,B,0,57,49,88,70,76,80,0,1,KR044335,ETA_in_1W
1389,Within 3 Months,2025-05-08,AKC73369912,AKC0391,"Bucket Assembly,Ice",N,FUNCTIONAL,DMS,HA,CNT,1,0,1,0,88,6/09_68 ; 6/11_20,131,0,0,131,LMXC23796S.ASTCNA0,2025-03-03,N,0,0,0,0,6,34,40,0,7,KR044335,Flag
1390,Over 2 Years,2025-05-08,EBZ64488626,<N/A>,"Frame,Outsourcing SVC",N,FUNCTIONAL,DMS,HA,CVT,1,0,2,0,0,,40,0,40,0,SKSMD2401S.FSTESNA,2020-08-07,C,0,2,1,8,0,3,2,0,2,US013333,Ignore


In [61]:
df_all.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1392 entries, 0 to 1391
Data columns (total 34 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Age                 1392 non-null   object        
 1   Order Date          1392 non-null   datetime64[ns]
 2   Parts No            1392 non-null   object        
 3   Parts Class Code    1392 non-null   object        
 4   Desc                1392 non-null   object        
 5   Key Parts           1392 non-null   object        
 6   Functionality       1392 non-null   object        
 7   Customer            1392 non-null   object        
 8   Company Code        1392 non-null   object        
 9   Division Code       1392 non-null   object        
 10  BO qty              1392 non-null   int64         
 11  On Hand             1392 non-null   int64         
 12  In Stage            1392 non-null   int64         
 13  WHAR                1392 non-null   int64       

In [62]:
pip install google-cloud-bigquery



In [63]:
pip install pandas-gbq



In [64]:
from google.cloud import bigquery

client = bigquery.Client()

dataset_id = 'daily_backorder'
table_id = 'raw'

# 테이블에 데이터프레임 업로드
df_all.to_gbq(destination_table=f"{dataset_id}.{table_id}",
              project_id='lgeai-scm',  # 예: 'my_project_id'
              if_exists='replace')  # 테이블이 이미 있으면 덮어쓰기

100%|██████████| 1/1 [00:00<00:00, 7695.97it/s]
