In [1]:
import pandas as pd

In [2]:
import numpy as np

## Part A : Data Massage

### 1. Declare input and output file name.

Input file needs to be in CSV format for faster loading.

#### me2l_input_file = input/ME2L_20210227.csv

Place the file in the *input* directory.

In [3]:
me2l_input_file = "input/ME2L_20210327.csv"

### 2. Declare output file name.

Output file name in Excel (xslx) format.

#### me2l_output_file = output/output_me2l_20210227.xlsx

In [4]:
me2l_output_file = "output/ME2L_20210327_output.xlsx"

### 3. Indicate US/SGD Dollar currency rate to be used.

In [5]:
# Set USD/SGD Currency rate here.
usd_currency_rate = 4.1
sgd_currency_rate = 3

### 4. Read input file

If file contains large data sets with alot of columns, reading will take a bit of time.

In [6]:
print(' ===> Running..')
df1 = pd.read_csv(me2l_input_file,parse_dates=True, encoding='ISO-8859-1',thousands=',');
print(' ===> DATA LOADED')

 ===> Running..
 ===> DATA LOADED


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


### 5. Remove unwanted columns.

This will allow processing of data faster since unwanted data is discarded.Columns removed:
- Deletion Indicator
- Req. Tracking Number

In [7]:
df1.drop(columns=['Deletion Indicator','Req. Tracking Number'],inplace=True);
print(' ===> DROP COLUMNS COMPLETED')

 ===> DROP COLUMNS COMPLETED


### 6. Insert 'Vendor Code' and 'Vendor' column at the begining.

Extract Vendor Code info and Vendor Name info from 'Vendor/supplying plant' column.

Based on old SAP entries there are multiple Vendor Code entries which are similar but Vendor name spelling is different. Example:
- Vendor Code = 1107060 , Vendor Name = Corporation A Sdn. Bhd.
- Vendor Code = 1107060 , Vendor Name = Corporation A Technologies


Need to rearrange all PO so that it is assigned to one unique Vendor Code and one Vendor Name. So that in final summary there are no multiple vendor name entries that refers to a single vendor.

Once this process done, remove the 'Vendor/supplying plant' column since it is redundant.

In [8]:
print(' ===> Running..')
#Extract Vendor code and Vendor name and insert as new columns
extract_vendor = df1["Vendor/supplying plant"].str.split(" ", n = 1, expand = True) 
df1.insert(0,'Vendor Code', extract_vendor[0]);
df1.insert(1,'Vendor', extract_vendor[1]);

#Create a separate dataframe for vendor code and vendor name
vendor_list={'Vendor Code': extract_vendor[0],'Vendor Unique': extract_vendor[1]}
df_vendor = pd.DataFrame(vendor_list, columns = ['Vendor Code', 'Vendor Unique'])

#Remove duplicate entries in vendor dataframe
df_vendor.drop_duplicates(inplace=True)
#Remove duplicates in vendor code but having multiple versions of vendor name. Only keep the first one found. 
#There will only be one 1x vendor code tied to 1x vendor name.
df_vendor.drop_duplicates(subset='Vendor Code',keep='first',inplace=True)
#Sort the values in ascending manner
df_vendor.sort_values(by=['Vendor Code'],inplace=True)

#Vlookup between both df1 and df_vendor to streamline the vendor name.
df1 = pd.merge(df1,df_vendor,on ='Vendor Code',how ='inner')
#Drop the initial Vendor column and Vendor/supplying plant
df1.drop(columns=['Vendor'],inplace=True)
df1.drop(columns=['Vendor/supplying plant','Outline Agreement'],inplace=True)

#Rename Vendor name columns
df1.rename(columns={'Vendor Unique': 'Vendor'}, inplace=True)

#Rearrange columns to place vendor name in 2nd position.
df1 = df1[['Vendor Code','Vendor', 'Purchasing Document', 'Short Text', 'Currency',
       'Acct Assignment Cat.', 'Document Date', 'Order Quantity', 'Net price',
       'Net Order Value', 'Still to be delivered (qty)',
       'Still to be delivered (value)', 'Still to be invoiced (qty)',
       'Still to be invoiced (val.)']]

print(' ===> VENDOR CODE & NAME ASSIGNMENT COMPLETED..')

 ===> Running..
 ===> VENDOR CODE & NAME ASSIGNMENT COMPLETED..


### 7. Classify each PO according to CAPEX/OPEX category.

Use values from 'Acc Assignment Cat.' as reference for classification.
- CAPEX (A, N, P, X)
- OPEX (F, K, Blank)

Add a new column called 'Capex/Opex' at the end of the data set.

In [9]:
print(' ===> Running..')
def assign_capex_opex(code):
    if(code == 'A' or code == 'N' or code == 'P' or code == 'X'):
        return 'CAPEX'
    elif (code == 'F' or code == 'K' or code == '' or np.isnan(code)):
        return 'OPEX'

df1['Capex/Opex'] = df1.apply(lambda row: assign_capex_opex(row['Acct Assignment Cat.']),axis=1)
print(' ===> CAPEX/OPEX CATEGORY ASSIGNMENT COMPLETED..')

 ===> Running..
 ===> CAPEX/OPEX CATEGORY ASSIGNMENT COMPLETED..


### 8. Calculate Aging, PO Category & PO Year

Add 2 new columns to input the Aging info:
- 'Aging' = shows aging in number of days
- 'Aging (Months & Days)' = shows aging in number of months and remaining days

Check PO Category if the value in 'Document Date' column is similar to current year. If not then PO < Current Year

Check PO Year using the 'Document Date' column. Extract year info.

In [10]:
import datetime

import time
start = time.time()
print(' ===> Running..')
def calculate_aging_days(doc_date,date_now):
    po_date = datetime.datetime.strptime(doc_date, '%d/%m/%Y')
    delta = now - po_date
    return int(delta.days)

def calculate_aging_months_days(aging_days):
    months = int(aging_days/30)
    remaining_days = int(aging_days%30)
    return str(months) + ' months ' + str(remaining_days) + ' days'

def assign_po_year(doc_date):
    po_date = datetime.datetime.strptime(doc_date, '%d/%m/%Y')
    return po_date.year

def assign_po_category(doc_date,current_year):
    po_date = datetime.datetime.strptime(doc_date, '%d/%m/%Y')
    return 'PO ' + str(now.year) if  po_date.year == now.year else 'PO <' + str(now.year)

now = datetime.datetime.now()
df1['Aging Days'] = df1.apply(lambda row: calculate_aging_days(row['Document Date'],now),axis=1)
df1['PO Year'] = df1.apply(lambda row: assign_po_year(row['Document Date']),axis=1)
df1['PO Category'] = df1.apply(lambda row: assign_po_category(row['Document Date'],datetime.datetime.now().year),axis=1)
df1['Aging (Months & Days)'] = df1.apply(lambda row: calculate_aging_months_days(row['Aging Days']),axis=1)
df1['PO Category'] = df1.apply(lambda row: assign_po_category(row['Document Date'],datetime.datetime.now().year),axis=1)

end = time.time()
print(' ===> AGING, PO CATEGORY & YEAR ASSIGNMENT COMPLETED..Executed in ' + str(end-start)+ 's')

 ===> Running..
 ===> AGING, PO CATEGORY & YEAR ASSIGNMENT COMPLETED..Executed in 22.761953592300415s


### 9. Calculate Aging Category

Classify each PO into the following categories:
- ( <6 Months )
- ( >6 Months )
- ( >18 Months )

Add a new column 'Aging Category' at the end.

In [11]:
print(' ===> Running..')
df1.loc[ (df1['Aging Days'] <= 182),'Aging Category'] = '<6 Months'
df1.loc[ (df1['Aging Days'] > 182) & (df1['Aging Days'] <= 540),'Aging Category'] = '>6 Months'
df1.loc[ (df1['Aging Days'] > 540),'Aging Category'] = '>18 Months'
print(' ===> AGING CATEGORY ASSIGNMENT COMPLETED')

 ===> Running..
 ===> AGING CATEGORY ASSIGNMENT COMPLETED


### 10. Assign GR status

Status can either be 'Open' or 'Closed' depending on there are still value to be delivered. Use the column 'Still to be delivered (value)' as reference.

In [12]:
print(' ===> Running..')
df1['GR Status'] = df1['Still to be delivered (value)'].apply(lambda x: 'Open' if x > 0 else 'Closed')
print(' ===> GR STATUS ASSIGNMENT COMPLETED')

 ===> Running..
 ===> GR STATUS ASSIGNMENT COMPLETED


### 11. Assign GRIR Status
Status can be either 'Open' or 'Closed' depending on value in column 'Still to be invoiced (val.)'

In [13]:
print(' ===> Running..')
df1['GRIR Status'] = df1['Still to be invoiced (val.)'].apply(lambda x: 'Closed' if x > -0.1 and x < 0.1 else 'Open')
print(' ===> GRIR STATUS ASSIGNMENT COMPLETED')

 ===> Running..
 ===> GRIR STATUS ASSIGNMENT COMPLETED


### 12. Assign PO Category


In [14]:
df1['PO Status'] = np.where((df1['GR Status'] == 'Closed') & (df1['GRIR Status'] == 'Closed'), 'Closed', 'Open')
print(' ===> ASSIGN PO CATEGORY COMPLETED')

 ===> ASSIGN PO CATEGORY COMPLETED


### 13. Calculate Still to be delivered (MYR-Value)


In [15]:
def convert_to_myr(value,currency):
    if currency == 'USD':
        return value*usd_currency_rate
    elif currency == 'SGD':
        return value*sgd_currency_rate
    else:
        return value

df1['Still to be delivered (MYR-Value)'] = df1.apply(lambda row: convert_to_myr(row['Still to be delivered (value)'],row['Currency']),axis=1)
df1['Still to be invoiced (MYR-Value)'] = df1.apply(lambda row: convert_to_myr(row['Still to be invoiced (val.)'],row['Currency']),axis=1)
print(' ===> COMPLETED')

 ===> COMPLETED


## Part B : Prepare Report

### 1. Write data to excel file

In [16]:
print(' ===> Running..')
writer = pd.ExcelWriter(me2l_output_file, engine='xlsxwriter',datetime_format='dd mmmm  yyyy')
start = time.time()

df1.to_excel(writer, sheet_name='data',index=False)
end = time.time()
print(' ===> WRITE OPERATION TO DATA WORKSHEET COMPLETED..Executed in ' + str(end-start)+ 's')

 ===> Running..
 ===> WRITE OPERATION TO DATA WORKSHEET COMPLETED..Executed in 55.16636562347412s


In [17]:
print(' ===> Running..')
#Convert certain columns to number format
workbook  = writer.book
worksheet = writer.sheets['data']
print(' ===> COLUMN CONVERSION TO NUMBER FORMAT COMPLETED')

 ===> Running..
 ===> COLUMN CONVERSION TO NUMBER FORMAT COMPLETED


## Create PO Summary List


In [18]:
print(' ===> Running..')
df1['Document Date'] = pd.to_datetime(df1['Document Date'], format="%d/%m/%Y")
print(' ===> COLUMN CONVERSION TO DATE FORMAT COMPLETED')

 ===> Running..
 ===> COLUMN CONVERSION TO DATE FORMAT COMPLETED


In [19]:
print(' ===> Running..')
df2 = df1.groupby(['Purchasing Document','Vendor','Capex/Opex', 'Currency','Document Date']).agg('sum')
df2.drop(columns=['Aging Days','PO Year', 'Net price','Order Quantity','Still to be delivered (qty)','Still to be invoiced (qty)'],inplace=True)
print(' ===> GROUPING OF PO LINE ITEMS COMPLETED')

 ===> Running..
 ===> GROUPING OF PO LINE ITEMS COMPLETED


In [20]:
print(' ===> Running..')
start = time.time()
df2.to_excel(writer, sheet_name='PO',merge_cells=False)

writer.save()
end = time.time()
print(' ===> WRITE OPERATION TO PO WORKSHEET COMPLETED..Executed in ' + str(end-start)+ 's')

 ===> Running..
 ===> WRITE OPERATION TO PO WORKSHEET COMPLETED..Executed in 33.05499505996704s
