In [1]:
from os import listdir
from os.path import isfile, join
import pandas as pd
from datetime import datetime

In [29]:
class Psi:
    def __init__(self, path=r'C:\Users\JGreenw9\JNJ\External Operations Plan - PSI Reports'):
        self.path = path
        self.files = self.get_files(self.path)
        self.filepaths = self.get_filepaths(self.path, self.files)

    def get_files(self, path):
        return [f for f in listdir(path) if isfile(join(path, f)) and '.xl' in f]

    def get_filepaths(self, path, files):
        return [path + '\\'+ x for x in files]

    def read_file(self, filepath):
        df = pd.DataFrame()
        if '.xlsx' in filepath[-5:]:
            df = pd.read_excel(filepath)
        elif '.xlsb' in filepath[-5:]:
            df = pd.read_excel(filepath, engine='pyxlsb')
        else:
            pd.DataFrame()
        return df

    def get_date_header_conv(self, df):
        excel_dates = []
        date_time : list[datetime] = []
        for x in df.columns:
            try:
                int(x)
                excel_dates.append(x)
            except (ValueError, TypeError) as e:
                if type(x) == datetime:
                    date_time.append(x) 
                else:
                    pass               
        dates = {}
        for date in excel_dates:
            dt = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + int(date) - 2)
            result = str(dt.date())
            dates[date] = result
        for date in date_time:
            result = str(date.date())
            dates[date] = result
        return dates

    def init_prep_df(self, df : pd.DataFrame):
        df.columns = df.iloc[0]
        df = df.iloc[1:]
        renames = self.get_date_header_conv(df)
        df = df.rename(columns=renames)
        return df

    def get_oh(self, df):
        df = df[(df['Key Figure'] == 'Total Demand') | (df['Key Figure'] == 'Projected Inventory')]
        df = df[['LocationId', 'ProductId', '2014-01-01']].groupby(by=['LocationId', 'ProductId']).sum().reset_index()
        df['Key Figure'] = 'On Hand Inventory'
        df['Date'] = '2014-01-01'
        df = df.rename(columns={'2014-01-01': 'value'})
        return df

    def get_supply(self, df):
        df = df
        df = df[df['VendorDesc'].str.contains('-')]
        df = df[df['Key Figure'].isin(['Firm PO Receipt', 'Planned Order Receipt', 'Stock Movement'])]
        dates = [x for x in df.columns if '20' in x]
        df = pd.melt(df, ['LocationId', 'ProductId', 'Key Figure'], dates, var_name='Date')
        df = df[df['value'] != 0].reset_index(drop=True)
        return df

    def combine_dfs(self, supply, onhand):
        df = pd.concat([onhand, supply]).reset_index(drop=True)
        df['Key Figure'] = df['Key Figure'].astype(str)
        df = pd.pivot(df, ['LocationId', 'ProductId', 'Date'], 'Key Figure', 'value').reset_index()
        cal = pd.read_csv(r'\\na.jnj.com\dpyusdfsroot\RY_Company\Supply Chain Mgmt\Spine Plan-NPI\Teligen\Alteryx Deployment\Inputs\JNJ Calendar.csv')
        df = df.merge(cal[['Date (YYYY-MM-DD)', 'YYYYMM']],left_on=['Date'],right_on='Date (YYYY-MM-DD)').drop('Date (YYYY-MM-DD)', axis=1)
        df = df.sort_values(['ProductId', 'Date', 'LocationId'])
        return df

    def final_df(self, df):
        supply = self.get_supply(df)
        onhand = self.get_oh(df)
        df = self.combine_dfs(supply, onhand)
        return df

    def all_files(self, filepaths):
        dfs = []
        raw_dfs = []
        for filepath in filepaths:
            print(filepath)
            df = self.read_file(filepath)
            raw_dfs.append(df)
            df = self.init_prep_df(df)
            df = self.final_df(df)
            dfs.append(df)
        df = pd.concat(dfs)
        return df, raw_dfs


In [30]:
psi = Psi()
data, raw_dfs = psi.all_files(psi.filepaths)

C:\Users\JGreenw9\JNJ\External Operations Plan - PSI Reports\PSI 22-06-27 BW.xlsb
C:\Users\JGreenw9\JNJ\External Operations Plan - PSI Reports\PSI 22-06-27 LL.xlsx
C:\Users\JGreenw9\JNJ\External Operations Plan - PSI Reports\PSI 22-06-27 MM.xlsb
C:\Users\JGreenw9\JNJ\External Operations Plan - PSI Reports\PSI 22-06-27 SZ.xlsb


In [31]:
data.to_clipboard()

In [41]:
for filepath in psi.filepaths:
    print(filepath[-5:])
    if '.xlsx' in filepath[-5:]:
        print(f'xlsx: {filepath}')
    elif '.xlsb' in filepath[-5:]:
        print(f'xlsb: {filepath}')

.xlsb
xlsb: C:\Users\JGreenw9\JNJ\External Operations Plan - PSI Reports\PSI 22-06-27 BW.xlsb
.xlsx
xlsx: C:\Users\JGreenw9\JNJ\External Operations Plan - PSI Reports\PSI 22-06-27 LL.xlsx
.xlsb
xlsb: C:\Users\JGreenw9\JNJ\External Operations Plan - PSI Reports\PSI 22-06-27 MM.xlsb
.xlsb
xlsb: C:\Users\JGreenw9\JNJ\External Operations Plan - PSI Reports\PSI 22-06-27 SZ.xlsb
