In [1]:
import pandas as pd
import os
import numpy as np
import glob
from pathlib import Path
from datetime import datetime as dt
from datetime import timedelta

In [2]:
home = Path.home()
today = dt.today().date()

dateRange = [today - timedelta(days = x) for x in range(365)]
#dateRange = [i.strftime("%Y%m%d") for i in dateRange]

today = today.strftime("%Y%m%d")


ODMdict = {
    'FWH' : 'WHFXN',
    'Compal' : 'KSCEI',
    'CEI' : 'KSCEI',
    'Wistron' : 'CQWIS',
    'Inventec' : 'CQIEC',
    'Quanta' : 'CQQCI',
    'Pegatron' : 'CQPCQ'
}

In [3]:
dateRange.reverse()

In [4]:
def clean(fname: str, file : pd.DataFrame, externalReportDate : str) -> pd.DataFrame:

    currentYear = dt.now().year
    currentday = fname.split('\\')[-1][-13:-5]
    file = file.assign(LastSGreportDate = currentday)
    
    file['LastSGreportDate'] = file['LastSGreportDate'].apply(lambda x: dt.strptime(x, '%Y%m%d'))
    file['LastSGreportDate'] = pd.to_datetime(file['LastSGreportDate'])

    file = file.assign(reportDate = externalReportDate)

    file['reportDate'] = file['reportDate'].apply(lambda x: dt.strptime(x, '%Y%m%d'))
    file['reportDate'] = pd.to_datetime(file['reportDate'])

    #clean
    file.columns = file.columns.str.strip()

    #drop useless columns and rows
    file = file.drop(columns = ['Description (Item)', 'Schedule (Comments)', 'Hub inventory', 'Vendor'])
    file = file[file['Procurement type'] == 'B/S'].reset_index(drop = True)

    #adjust qty columns name and units
    qtycol = file.filter(like='Single Shortage QTY (K)').columns.tolist()
    

    for i in qtycol:
        file[i] = file[i].apply(lambda x: x.upper() if type(x) == str else x)
        file[i] = file[i].replace("NEW", 0)
        file[i] = file[i].apply(lambda x: x*1000)
    file = file.rename(columns= {qtycol[0]: 'Single Shortage QTY', qtycol[1]: 'Prev_Single Shortage QTY'})

    #replace ODM name
    file['ODM'] = file['ODM'].replace(ODMdict)
    return file   

In [5]:
target = Path (home, 'HP Inc','GPSTW SOP - 2021 日新','Project team','Single shortage')
PNFVPath = Path(home, 'HP Inc','GPSTW SOP - 2021 日新', 'PN FV description mapping table_ALL.xlsx')
ExternalReportFolder = Path(home, 'HP Inc','GPSTW SOP - 2021 日新','Project team','External test destination')

PNFVFile = pd.read_excel(PNFVPath)
PNFVFile = PNFVFile [['PN', 'Descr']]
PNFVFile = PNFVFile.rename(columns = {'PN': 'HP PN'})

In [6]:
fileList = [str(x) for x in Path(target,'Archive').glob("*xlsx")]
fileListDateNum = [dt.strptime(x[-13:-5], "%Y%m%d").date() for x in fileList]

errorList = []
resultList = []

In [7]:
lookupSGdateList = []
i = 0
for _ in dateRange:
    if i == len(fileListDateNum)-1:
        lookupSGdateList.append(fileListDateNum[i])
        continue
    
    if _ < fileListDateNum[i+1]:
        lookupSGdateList.append(fileListDateNum[i])
    else:
        i = i + 1
        lookupSGdateList.append(fileListDateNum[i])

In [8]:
dateRange = [i.strftime("%Y%m%d") for i in dateRange]
lookupSGdateList = [i.strftime("%Y%m%d") for i in lookupSGdateList]

In [9]:
zip(dateRange, lookupSGdateList)

<zip at 0x1ec683c6d80>

dateList = result['reportDate'].tolist()
max(dateList)

In [10]:
def addKey(res: pd.DataFrame) -> tuple[list, pd.DataFrame]:
    LatestSGMaterial = res
    LatestSGMaterial = LatestSGMaterial.merge(PNFVFile, on = 'HP PN', how = 'left')
    LatestSGMaterial['Key'] = LatestSGMaterial['ODM'] + LatestSGMaterial['Descr']
    KeyList = LatestSGMaterial['Key'].tolist()
    print("addKey done!")
    return KeyList, res

### concat current day external report

In [11]:
def concatExternal(extReportPathList: list, KeyList: list) -> pd.DataFrame:
    externalResultDFList = []
    if not extReportPathList:
        return 

    for _ in extReportPathList:
        try: 
            temp = pd.read_excel(_)
            temp['ODM'] = temp['ODM'].ffill()
            temp['ODM'] = temp['ODM'].replace(ODMdict)
            temp['FV/Des'] = temp['FV/Des'].ffill()
            #temp['ETA'] = temp['ETA'].ffill()
            temp['key'] = temp['ODM'] + temp['FV/Des']
            temp = temp[temp.key.isin(KeyList)]
            
            try:
                temp = temp[['ODM', 'FV/Des', 'HP_PN', 'ETA', 'GPS Remark']]
            except:
                temp = temp[['ODM', 'FV/Des', 'HP PN', 'ETA', 'GPS Remark']]
                temp = temp.rename(columns = {'HP PN' : 'HP_PN'})
                print("Rocky wrong format!")
                
            temp = temp.groupby(['ODM', 'FV/Des']).agg({'ETA' : lambda x: '\n'.join(set(x.dropna())),
                                                        'GPS Remark': lambda x: '\n'.join(set(x.dropna()))})
            temp = temp.reset_index()
            if len(temp) > 0:
                print(len(temp))
                externalResultDFList.append(temp)
            else:
                pass
        except Exception as e:
            print(e)
            print(_)
    try:
        externalResultDF = pd.concat(externalResultDFList)
    except ValueError:
        print("No single shortage match!")
        return pd.DataFrame(columns = ['ODM', 'FV/Des', 'HP_PN', 'ETA', 'GPS Remark'])
    print('External process done!')
    return externalResultDF

### lookup PNFV and merge external reportm
### output

In [12]:
def mergeNoutput(SGres: pd.DataFrame, extRes: pd.DataFrame, dateStr: str) -> None:
    SGres = SGres.merge(PNFVFile.rename(columns = {'PN': 'HP PN'}), on = 'HP PN', how = 'left')
    SGres = SGres.merge(extRes.rename(columns = {'FV/Des' : 'Descr'}), on = ['ODM', 'Descr'], how = 'left')
    SGres = SGres.drop_duplicates()
    SGres.to_excel(Path(target, 'test','total singal shortage_' + dateStr +'.xls'), index = False, engine = 'xlwt')
    print("Output done!")

In [13]:
dateRange.reverse()

In [14]:
lookupSGdateList.reverse()

In [15]:
# for f in fileList:
#     try:
#         file = pd.read_excel(f)
#         resultList.append(clean(f, file))
#         print(f + " process done!")
#     except Exception as e:
#         errorList.append([f, e])
#         print(f + " process failed!")

# result = pd.concat(resultList)

In [16]:
for i, j in zip(dateRange[0], lookupSGdateList[0]):
    print(i, j)

2 2
0 0
2 2
2 2
1 1
1 1
1 1
6 0


In [17]:
et = today
sg = '20221115'

fname = Path (target, str('Single shortage ' + sg + '.xlsx'))
ExternalReport = [f for f in glob.glob(str(Path(ExternalReportFolder, et + '*')))]
if not ExternalReport:
    print("No external on " + et)
    exit()

try:
    file = pd.read_excel(str(fname))
    sg_res = clean(str(fname), file, et)
    print(str(fname) + " process done!")
except Exception as e:
    errorList.append([str(fname), e])
    print(str(fname) + " process failed!")
    exit()

k, sg_res = addKey(sg_res)
ext_res = concatExternal(ExternalReport, k)
if ext_res is None:
    print("No external on " + et)
    exit()

mergeNoutput(sg_res, ext_res, et)

C:\Users\lulo\HP Inc\GPSTW SOP - 2021 日新\Project team\Single shortage\Single shortage 20221115.xlsx process done!
addKey done!
3
1
2
External process done!
Output done!


  SGres.to_excel(Path(target, 'test','total singal shortage_' + dateStr +'.xls'), index = False, engine = 'xlwt')


In [18]:
asdgddfg

NameError: name 'asdgddfg' is not defined

In [None]:
for et, sg in zip(dateRange[0], lookupSGdateList[0]):
    fname = Path (target, "Archive", str('Single shortage ' + sg + '.xlsx'))
    ExternalReport = [f for f in glob.glob(str(Path(ExternalReportFolder, et + '*')))]
    if not ExternalReport:
        print("No external on " + et)
        continue

    try:
        file = pd.read_excel(str(fname))
        sg_res = clean(str(fname), file, et)
        print(str(fname) + " process done!")
    except Exception as e:
        errorList.append([str(fname), e])
        print(str(fname) + " process failed!")
        continue

    k, sg_res = addKey(sg_res)
    ext_res = concatExternal(ExternalReport, k)
    if ext_res is None:
        print("No external on " + et)
        continue

    mergeNoutput(sg_res, ext_res, et)

In [None]:
fname = Path (target, "Archive", str('Single shortage ' + lookupSGdateList[-4] + '.xlsx'))
ExternalReport = [f for f in glob.glob(str(Path(ExternalReportFolder, dateRange[-4] + '*')))]

ExternalReport

In [None]:
errorList

In [None]:
import psutil
psutil.cpu_percent()
psutil.virtual_memory()
print(psutil.Process(os.getpid()).memory_info().rss / 1024 ** 2)