In [1]:
import pandas as pd

In [2]:
import numpy as np

## Part A : Data Massage

### 1. Declare input file names.

File needs to be in CSV format for faster loading.

#### Input 1 = ME2L_W01.CSV
#### Input 2 = ME2L_W02.CSV

Place the 2 input files in the current directory as the script file.

In [3]:
# import time
# start = time.time()
# me2l_w01_file = "test/me2l_w01x.xlsx"
# df1 = pd.read_excel(me2l_w01_file,sheet_name='Raw');
# end = time.time()
# print(end - start)

In [4]:
me2l_w01_file = "test/me2l_w01.csv"
me2l_w02_file = "test/me2l_w02.csv"

### 2. Read input files

If file contains large data sets with alot of columns, reading will take a bit of time.

In [5]:
# Load first input file
df1 = pd.read_csv(me2l_w01_file,parse_dates=True, encoding='ISO-8859-1');
# df1.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [6]:
# Load second input file
df2 = pd.read_csv(me2l_w02_file,parse_dates=True);
# df2.head()

### 3. Extract PO with correct CAPEX Category from ME2L_W02 

Capex Category = (A, N, P, X).
But how to check if the project is deployment,operation related ? Any keyword or vendor as filters ?

In [7]:

capex_w02 = df2[(df2['Acct Assignment Cat.'] == 'A') |
                (df2['Acct Assignment Cat.'] == 'N') |
                (df2['Acct Assignment Cat.'] == 'P') |
                (df2['Acct Assignment Cat.'] == 'X') 
               ]

### 4. Append extraction from ME2L_W02 to ME2L_W01

Data will be pasted at the bottom of ME2L_W01 data set.

In [8]:
df1.append(capex_w02);

### 5. Remove unwanted columns.

This will allow processing of data faster since unwanted data is discarded.Columns removed:
- Deletion Indicator
- Req. Tracking Number

In [9]:
df1.drop(columns=['Deletion Indicator','Req. Tracking Number']);

### 6. Insert 'Vendor Code' and 'Vendor' column at the begining.

Extract Vendor Code info and Vendor Name info from 'Vendor/supplying plant' column.

Once this process done, remove the 'Vendor/supplying plant' column since it is redundant.

In [10]:
new = df1["Vendor/supplying plant"].str.split(" ", n = 1, expand = True) 
df1.insert(0,'Vendor Code', new[0]);
df1.insert(1,'Vendor', new[1]);
df1.drop(columns=['Vendor/supplying plant']);

### 7. Classify each PO according to CAPEX/OPEX category.

Use values from 'Acc Assignment Cat.' as reference for classification.
- CAPEX (A, N, P, X)
- OPEX (F, K, Blank)

Add a new column called 'Capex/Opex' at the end of the data set.

In [11]:
#Add Capex/Opex Category
df1.loc[ (df1['Acct Assignment Cat.'] == 'A') |
        (df1['Acct Assignment Cat.'] == 'N') |
        (df1['Acct Assignment Cat.'] == 'P') |
        (df1['Acct Assignment Cat.'] == 'X'),'Capex/Opex'] = 'CAPEX'

df1.loc[ (df1['Acct Assignment Cat.'] == 'F') |
        (df1['Acct Assignment Cat.'] == 'K') |
        (df1['Acct Assignment Cat.'] == ''),'Capex/Opex'] = 'OPEX'

### 8. Calculate Aging, PO Category & PO Year

Add 2 new columns to input the Aging info:
- 'Aging' = shows aging in number of days
- 'Aging (Months & Days)' = shows aging in number of months and remaining days

Check PO Category if the value in 'Document Date' column is similar to current year. If not then PO < Current Year

Check PO Year using the 'Document Date' column. Extract year info.

In [12]:
import datetime

import time
start = time.time()

def calculate_aging_days(doc_date,date_now):
    po_date = datetime.datetime.strptime(doc_date, '%d/%m/%Y')
    delta = now - po_date
    return int(delta.days)

def calculate_aging_months_days(aging_days):
    months = int(aging_days/30)
    remaining_days = int(aging_days%30)
    return str(months) + ' months ' + str(remaining_days) + ' days'

now = datetime.datetime.now()
df1['Aging Days'] = df1.apply(lambda row: calculate_aging_days(row['Document Date'],now),axis=1)
df1['PO Year'] =  datetime.datetime.now().year
df1['Aging (Months & Days)'] = df1.apply(lambda row: calculate_aging_months_days(row['Aging Days']),axis=1)

df1['Aging Days'].head()

end = time.time()
print(end - start)

7.312576770782471


### 9. Calculate Aging Category

Classify each PO into the following categories:
- ( <6 Months )
- ( >6 Months )
- ( >18 Months )

Add a new column 'Aging Category' at the end.

In [13]:

df1.loc[ (df1['Aging Days'] <= 182),'Aging Category'] = '<6 Months'
df1.loc[ (df1['Aging Days'] > 182) & (df1['Aging Days'] < 540),'Aging Category'] = '>6 Months'
df1.loc[ (df1['Aging Days'] > 540),'Aging Category'] = '>18 Months'
print('done')


done


### 10. Assign GR status

Status can either be 'Open' or 'Closed' depending on there are still value to be delivered. Use the column 'Still to be delivered (value)' as reference.

In [14]:

df1['Still to be delivered (value)'] = df1['Still to be delivered (value)'].str.replace(',','')
df1['Still to be delivered (value)'] = df1['Still to be delivered (value)'].astype(float)
df1.loc[ (df1['Still to be delivered (value)'] > 0),'GR Status'] = 'Open'
df1.loc[ (df1['Still to be delivered (value)'] == 0),'GR Status'] = 'Closed'
print('done')


done


### 11. Assign GRIR Status
Status can be either 'Open' or 'Closed' depending on value in column 'Still to be invoiced (val.)'

In [15]:
df1['Still to be invoiced (val.)'] = df1['Still to be invoiced (val.)'].str.replace(',','')
df1['Still to be invoiced (val.)'] = df1['Still to be invoiced (val.)'].astype(float)
df1['GRIR Status'] = df1['Still to be invoiced (val.)'].apply(lambda x: 'Open' if x > -0.1 and x < 0.1 else 'Closed')

print('done')

done


### 12. Assign PO Category


In [16]:
# df1.loc[ (df1['GR Status'] == 'Closed') & (df1['GRIR Status'] == 'Closed'),'PO Status'] = 'Closed'
# df1.loc[ (df1['GR Status'] == 'Open') | (df1['GRIR Status'] == 'Open'),'PO Status'] = 'Open'

df1['PO Status'] = np.where((df1['GR Status'] == 'Closed') & (df1['GRIR Status'] == 'Closed'), 'Closed', 'Open')
print('done')

done


### 13. Calculate Still to be delivered (MYR-Value)


In [17]:
# Set USD Currency rate here.
usd_currency_rate = 4.1

df1['Still to be delivered (MYR-Value)'] = np.where(df1['Currency']== 'USD', df1['Still to be delivered (value)']*usd_currency_rate, df1['Still to be delivered (value)'])
print('done')

done


## Part B : Prepare Report

### 1. Write data to excel file

In [18]:
# Drop unnecessary column and create new dataframe
df_final = df1.drop(columns=['Vendor/supplying plant','Outline Agreement', 'Deletion Indicator','Req. Tracking Number'])
print('done')

done


In [None]:
start = time.time()
output_filename = 'output/final.xlsx'

df_final.to_excel(output_filename, sheet_name='data', engine='xlsxwriter',index=False)
print('done')
end = time.time()
print(end - start)

In [None]:
# start = time.time()
# df1.to_csv('output/final_csv.csv', index=False)
# end = time.time()
# print(end - start)