# Processor
### Takes audits created by Cruncher and builds Amazon Advertising Bulk Uploads

In [60]:
# Dependencies
import pandas as pd
import numpy as np
import os
import sys
import glob
import openpyxl
from itertools import islice
from openpyxl import Workbook,load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from datetime import date

# Create Time Stamp
today = date.today()
td_form = today.strftime("%b-%d-%Y")

In [61]:
# Paths
audits_dir = 'ToProcess_Audits/'
bs_path = f'ToUpload_Bulksheets/BulkUpload_{td_form}.xlsx'
rep_path = f'ToUpload_Bulksheets/report_{td_form}.txt'
hist_path = 'Resources/History/adjustment_history.csv'

In [62]:
# Extract Data from completed Audit Sheets

dfa = None
# Use glob library to iterate through multiple files matching wildcard
this = glob.glob(f'{audits_dir}*.xlsx')
if (len(this)>=1):
    print(f'Found {len(this)} Completed Audits in folder.. Processing')

    # Open each respective file
    for audit in this:
        #Use read-only mode for efficiency
        wb = load_workbook(filename=audit, read_only=True, data_only=True)
        for sn in wb.sheetnames:
            #Ignore first tab
            if (sn != 'Checklist'):
                #Select appropriate range for each tab containing an Audit
                val = list(wb[sn].values)
                col = val[6]
                data = val[7:]
                dfb = pd.DataFrame(data,columns=col)
                if dfa is not None:
                    #Concatenate onto dataframe (It can all be together on 1)
                    dfa = pd.concat([dfa,dfb]).reset_index(drop=True)
                else:
                    dfa = dfb
else:
    #Warn if no xlsx files were in folder and close script
    print(f'Could Not Find Any Completed Audits.. Place Files in {audits_dir}')
    sys.exit()


Found 1 Completed Audits in folder.. Processing


In [63]:
# Clean Data

print('Cleaning Data..')
#Drop blank rows
dfa = dfa.dropna(how='all').reset_index(drop=True)

#Drop rows with no changes or Notes
#dfa = dfa.loc[(dfa.Notes.notna()) | ((dfa['Skip?'] == False) & (dfa['Override Bid'].notna() | (dfa["Will Set Bid to:"].notna())))]

# Warnings
big_ch = dfa.loc[(dfa['Skip?'] == False) & ((dfa["Will Set Bid to:"] > (1.35*dfa.Bid)) | (dfa["Will Set Bid to:"] < (0.65*dfa.Bid)))].shape[0]
if big_ch: print(f'WARN: Applying {big_ch} Bid changes larger than 35%')

#Add Date Column
dfa['Date'] = td_form

# Remove Skipped last run tags and reapply to new skips
dfa.Notes.str.replace('(Skipped Last Run..)','')
dfa.Notes = dfa.apply(lambda x: f'(Skipped Last Run..) \n{x.Notes if x.Notes.notna else ""}' if x['Skip?'] > 0 else x.Notes, axis=1)

#Rearrange columns to match History File
hist_df = dfa[['Date','Product','Entity','Campaign Id','Ad Group Id','Keyword Id (Read only)','Product Targeting Id (Read only)','Keyword Text','Product Targeting Expression','Campaign','Ad Group','Product Name','AG Type','Match Type','Bid','Flag','Flag Type','Flag Text','Reco. Bid','Skip?','Override Bid','Will Set Bid to:','Notes']]

# Select Only rows with valid bid changes
dfa = dfa.loc[(dfa['Will Set Bid to:'].notna()) & dfa['Skip?'] == False]

#Generate report of changes
rep=[]
for prod in dfa['Product Name'].unique():
    this_df = dfa.loc[(dfa['Product Name'] == prod)]
    raised = this_df.loc[(this_df.Bid < this_df['Will Set Bid to:'])].shape[0]
    lower = this_df.loc[(this_df.Bid > this_df['Will Set Bid to:'])].shape[0]
    if (raised>0 | lower>0):
        rep.append(f'------------------------------- \nProduct Name: {prod} \n')
        if raised>0:
            rep.append(f'Raised Bids on {raised} Targets \n')
        if lower>0:
            rep.append(f'Lowered Bids on {lower} Targets \n')
with open(rep_path,'w') as f:
    f.writelines(rep)

#Set column indices to match bulksheet format
dfa['Operation'] = 'Update'
dfa['Keyword Id'] = dfa['Keyword Id (Read only)']
dfa['Product Targeting Id'] = dfa['Product Targeting Id (Read only)']
dfa['Bid'] = dfa['Will Set Bid to:']
dfa['State'] = 'Enabled'
dfa = dfa[['Product','Entity','Operation','Campaign Id','Ad Group Id','Keyword Id','Product Targeting Id','Bid']]

# Correct Invalid Bids
dfa[(dfa.Product == 'Sponsored Products') & (dfa.Bid<0.02)].Bid = 0.02
dfa[(dfa.Product == 'Sponsored Brands') & (dfa.Bid<0.10)].Bid = 0.10

#Select SP and create new DF
sp_upload = dfa.loc[(dfa.Product == 'Sponsored Products')]

#Select SB and create DF
sb_upload = dfa.loc[(dfa.Product == 'Sponsored Brands')][['Product','Entity','Operation','Campaign Id','Keyword Id','Product Targeting Id','Bid']]


Cleaning Data..
WARN: Applying 8 Bid changes larger than 35%


  dfa.Notes.str.replace('(Skipped Last Run..)','')


In [64]:
#Append history to history file
print('Saving History..')
hist_df.to_csv(hist_path, mode='a', index=False, header=False)

Saving History..


In [65]:
sb_upload

Unnamed: 0,Product,Entity,Operation,Campaign Id,Keyword Id,Product Targeting Id,Bid
0,Sponsored Brands,Keyword,Update,23896926942109,184805681053334,,1.68
1,Sponsored Brands,Keyword,Update,23896926942109,252415618504372,,1.93
2,Sponsored Brands,Keyword,Update,23896926942109,106969592849699,,1.90
3,Sponsored Brands,Keyword,Update,23896926942109,108422569065321,,1.90
4,Sponsored Brands,Keyword,Update,23896926942109,151482810692426,,1.68
...,...,...,...,...,...,...,...
601,Sponsored Brands,Keyword,Update,196297977829354,107370755354609,,2.62
602,Sponsored Brands,Keyword,Update,196297977829354,126658983431377,,2.10
603,Sponsored Brands,Keyword,Update,196297977829354,245558634677690,,2.10
604,Sponsored Brands,Keyword,Update,196297977829354,387389763584,,2.10


In [66]:
#Create new bulksheet File for upload
print('Generating Bulksheet..')
wb = Workbook(write_only = True)
sp = wb.create_sheet(title='Sponsored Products Campaigns')
for r in dataframe_to_rows(sp_upload, index=False, header=True):
    sp.append(r)
sb = wb.create_sheet(title='Sponsored Brands Campaigns')
for r in dataframe_to_rows(sb_upload, index=False, header=True):
    sb.append(r)
wb.save(bs_path)
wb.close()
print('Success!')

Generating Bulksheet..
Success!
