## Data processing automation
1. Code from writing excel session + improvement
2. Migration to .py script
3. Text editor (Atom)
4. Running a script (Anaconda prompt)

#### Writing an excel file

In [1]:
import pandas as pd

#### Datasets

In [2]:
# sales report
report = pd.read_csv('../datasets/output_sales_report.csv')
report

Unnamed: 0,SalesRep,Month,Sales
0,BKK1101,JAN,97072
1,BKK1102,JAN,54712
2,BKK1103,JAN,48605
3,BKK2101,JAN,21028
4,BKK2102,JAN,46199
...,...,...,...
259,LTH101,DEC,0
260,LTH102,DEC,0
261,LTH103,DEC,0
262,LTH104,DEC,0


In [3]:
# sales target
from openpyxl import load_workbook 

targets = pd.DataFrame()
wb = load_workbook(filename='../datasets/Sales_targets.xlsx')
for ws in wb.sheetnames:
    print(ws)
    target = pd.read_excel('../datasets/Sales_targets.xlsx', header=4, sheet_name=ws, usecols=[i for i in range(0,13)])
    targets = pd.concat([targets, target])
    
targets.head()

Bangkok 1
Bangkok 2
UPPER TH
LOWER TH


Unnamed: 0,SalesRep,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC
0,BKK1101,25000,27500.0,30250.0,33275.0,36602.5,40262.75,25000,27500.0,30250.0,33275.0,36602.5,40262.75
1,BKK1102,25000,27500.0,30250.0,33275.0,36602.5,40262.75,25000,27500.0,30250.0,33275.0,36602.5,40262.75
2,BKK1103,25000,27500.0,30250.0,33275.0,36602.5,40262.75,25000,27500.0,30250.0,33275.0,36602.5,40262.75
0,BKK2101,25000,27500.0,30250.0,33275.0,36602.5,40262.75,25000,27500.0,30250.0,33275.0,36602.5,40262.75
1,BKK2102,25000,27500.0,30250.0,33275.0,36602.5,40262.75,25000,27500.0,30250.0,33275.0,36602.5,40262.75


In [4]:
mlt_target = pd.melt(targets, id_vars='SalesRep', value_vars=targets.columns[1:],
                    var_name = 'Month', value_name = 'Target')
mlt_target

Unnamed: 0,SalesRep,Month,Target
0,BKK1101,JAN,25000.00
1,BKK1102,JAN,25000.00
2,BKK1103,JAN,25000.00
3,BKK2101,JAN,25000.00
4,BKK2102,JAN,25000.00
...,...,...,...
259,LTH101,DEC,11273.57
260,LTH102,DEC,11273.57
261,LTH103,DEC,11273.57
262,LTH104,DEC,11273.57


In [5]:
# target type casting
mlt_target['Target'] = mlt_target['Target'].round(0).astype(int)
mlt_target

Unnamed: 0,SalesRep,Month,Target
0,BKK1101,JAN,25000
1,BKK1102,JAN,25000
2,BKK1103,JAN,25000
3,BKK2101,JAN,25000
4,BKK2102,JAN,25000
...,...,...,...
259,LTH101,DEC,11274
260,LTH102,DEC,11274
261,LTH103,DEC,11274
262,LTH104,DEC,11274


#### Map sales target with report

In [6]:
compare = pd.merge(report, mlt_target, on=['SalesRep', 'Month'], how='inner')
compare.head(3)

Unnamed: 0,SalesRep,Month,Sales,Target
0,BKK1101,JAN,97072,25000
1,BKK1102,JAN,54712,25000
2,BKK1103,JAN,48605,25000


In [7]:
compare['Month'].unique()

array(['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP',
       'OCT', 'NOV', 'DEC'], dtype=object)

In [8]:
# month order
mo_order = pd.DataFrame({'Month': ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP','OCT', 'NOV', 'DEC'],
                         'MonthNo': [i for i in range(1, 13)]})
mo_order

Unnamed: 0,Month,MonthNo
0,JAN,1
1,FEB,2
2,MAR,3
3,APR,4
4,MAY,5
5,JUN,6
6,JUL,7
7,AUG,8
8,SEP,9
9,OCT,10


In [9]:
compare = pd.merge(compare, mo_order, on=['Month'], how='inner')
compare.head(3)

Unnamed: 0,SalesRep,Month,Sales,Target,MonthNo
0,BKK1101,JAN,97072,25000,1
1,BKK1102,JAN,54712,25000,1
2,BKK1103,JAN,48605,25000,1


#### Calculate archivement ratio 

In [10]:
compare['AR'] = compare['Sales']/compare['Target']
compare.head(3)

Unnamed: 0,SalesRep,Month,Sales,Target,MonthNo,AR
0,BKK1101,JAN,97072,25000,1,3.88288
1,BKK1102,JAN,54712,25000,1,2.18848
2,BKK1103,JAN,48605,25000,1,1.9442


In [11]:
# pivot - long to wide format
pvt_compare = compare.pivot(index=['SalesRep'], columns=['MonthNo'], values=['AR']).reset_index(drop=False)
pvt_compare

Unnamed: 0_level_0,SalesRep,AR,AR,AR,AR,AR,AR,AR,AR,AR,AR,AR,AR
MonthNo,Unnamed: 1_level_1,1,2,3,4,5,6,7,8,9,10,11,12
0,BKK1101,3.88288,0.797818,2.209752,2.859174,0.606262,1.172441,0.80404,0.0,0.0,0.0,0.0,0.0
1,BKK1102,2.18848,1.446109,1.980959,1.586627,1.877906,0.476964,3.38684,0.0,0.0,0.0,0.0,0.0
2,BKK1103,1.9442,1.711345,0.871669,0.640661,1.563806,1.894742,1.88116,0.0,0.0,0.0,0.0,0.0
3,BKK2101,0.84112,3.019127,1.799636,1.68009,0.69773,2.195986,2.09052,0.0,0.0,0.0,0.0,0.0
4,BKK2102,1.84796,2.158509,0.832331,1.168475,1.81649,0.356854,2.69008,0.0,0.0,0.0,0.0,0.0
5,BKK2103,2.64756,2.132218,2.669917,2.174365,2.14436,0.281822,1.12392,0.0,0.0,0.0,0.0,0.0
6,BKK2104,3.38536,0.446,2.624959,2.404237,0.790536,0.935772,2.006,0.0,0.0,0.0,0.0,0.0
7,BKK2105,1.21644,2.771127,2.41838,0.535026,0.844576,0.539031,2.4104,0.0,0.0,0.0,0.0,0.0
8,LTH101,1.082714,0.217662,1.070248,0.848771,0.333789,0.21146,0.0,0.0,0.0,0.0,0.0,0.0
9,LTH102,1.341429,1.003506,0.659504,0.933884,0.868085,0.559961,1.181429,0.0,0.0,0.0,0.0,0.0


In [12]:
data_bkk1 = pvt_compare[pvt_compare['SalesRep'].str.contains('BKK1')]
data_bkk1

Unnamed: 0_level_0,SalesRep,AR,AR,AR,AR,AR,AR,AR,AR,AR,AR,AR,AR
MonthNo,Unnamed: 1_level_1,1,2,3,4,5,6,7,8,9,10,11,12
0,BKK1101,3.88288,0.797818,2.209752,2.859174,0.606262,1.172441,0.80404,0.0,0.0,0.0,0.0,0.0
1,BKK1102,2.18848,1.446109,1.980959,1.586627,1.877906,0.476964,3.38684,0.0,0.0,0.0,0.0,0.0
2,BKK1103,1.9442,1.711345,0.871669,0.640661,1.563806,1.894742,1.88116,0.0,0.0,0.0,0.0,0.0


#### Generate excel report

In [13]:
# load modules
from openpyxl import Workbook
from openpyxl.styles import colors
from openpyxl.styles import Font, Color, PatternFill
from openpyxl.utils.dataframe import dataframe_to_rows

# create workbook
wb = Workbook()
# create worksheet
ws = wb.active

# create title and attibutes
ws['A1'] = 'Target Report'
ws['A1'].font = Font(bold=True)

ws['A3'] = 'Team'
ws['B3'] = 'BKK1'
ws['B3'].font = Font(color="00FF0000", italic=True)

ws['A5'] = 'SalesRep'
ws['A5'].fill = PatternFill("solid", fgColor="00FFFF00")
ws['A5'].font = Font(bold=True)

# create columns
for row in ws.iter_rows(min_row=5, max_row=5, min_col=2, max_col=13):
    i = 0
    for cell in row:
        cell.value = mo_order['Month'][i]
        cell.fill = PatternFill("solid", fgColor="00FFFF00")
        cell.font = Font(bold=True)
        i+=1

# load/add data
for r in dataframe_to_rows(data_bkk1, index=False, header=False):
    ws.append(r)

# number format - percent
for row in ws.iter_rows(min_row=6, max_row=8, min_col=2, max_col=13):
    for cell in row:
        cell.number_format = '0.00%'
    
# save to excel 
wb.save('../datasets/ar_report.xlsx')