In [3]:
import pandas as pd
import numpy as np
pd.options.display.max_rows = 6

import argparse
import httplib2
import pprint
import time
import datetime
from io import StringIO

from apiclient.discovery import build
from oauth2client import GOOGLE_TOKEN_URI
from oauth2client.client import OAuth2Credentials
from googleapiclient.errors import HttpError


def create_credentials():
    """Create Google OAuth2 credentials.

    Args:
        client_id: Client id of a Google Cloud console project.
        client_secret: Client secret of a Google Cloud console project.
        refresh_token: A refresh token authorizing the Google Cloud console project
          to access the DS data of some Google user.
    Returns:
        OAuth2Credentials
    """
    return OAuth2Credentials(access_token=None,
                           client_id='549790627766-qnth4m8qvuimg87pnsp4b82lhte7dk5a.apps.googleusercontent.com',
                           client_secret='Vta4lQLOL49vVYvktkcPGRNb',
                           refresh_token='1/ab7pCGMu3K5AveG0UOUpQ0J08vCp6uM357O8qmoPDMs',
                           token_expiry=None,
                           token_uri="https://accounts.google.com/o/oauth2/token",
                           user_agent=None)

def get_service(credentials):
    """Set up a new DoubleClick Search service.

    Args:
        credentials: An OAuth2Credentials generated with create_credentials, or
        flows in the oatuh2client.client package.
    Returns:
        An authorized Doubleclicksearch serivce.
    """
    # Use the authorize() function of OAuth2Credentials to apply necessary credential
    # headers to all requests.
    http = credentials.authorize(http = httplib2.Http())

    # Construct the service object for the interacting with the DoubleClick Search API.
    service = build('doubleclicksearch', 'v2', http=http)
    return service

def poll_report(service, report_id):
    """Poll the API with the reportId until the report is ready, up to ten times.

    Args:
        service: An authorized Doublelcicksearch service.
        report_id: The ID DS has assigned to a report.
    Returns:
        pd.DataFrame, report file
    """
    for _ in range(10):
        try:
            request = service.reports().get(reportId=report_id)
            json_data = request.execute()
            if json_data['isReportReady']:
                pprint.pprint('The report is ready.')

                # For large reports, DS automatically fragments the report into multiple
                # files. The 'files' property in the JSON object that DS returns contains
                # the list of URLs for file fragment. To download a report, DS needs to
                # know the report ID and the index of a file fragment.
                report = pd.DataFrame()
                for i in range(len(json_data['files'])):
                    pprint.pprint('Downloading fragment ' + str(i) + ' for report ' + report_id)
                    report = report.append(download_files(service, report_id, str(i)), ignore_index = True) # See Download the report.
                return report

            else:
                pprint.pprint('Report is not ready. I will try again.')
                time.sleep(10)
        except HttpError as e:
            error = simplejson.loads(e.content)['error']['errors'][0]

            # See Response Codes
            pprint.pprint('HTTP code %d, reason %s' % (e.resp.status, error['reason']))
            break
        
def download_files(service, report_id, report_fragment):
    """Generate and print sample report.

    Args:
        service: An authorized Doublelcicksearch service.
        report_id: The ID DS has assigned to a report.
        report_fragment: The 0-based index of the file fragment from the files array.
    Returns:
        pd.DataFrame report file
    """
    request = service.reports().getFile(reportId=report_id, reportFragment=report_fragment)
    return pd.read_csv(StringIO(request.execute().decode('utf-8')))

def request_report(service, start_date, end_date, columns):
    """Request sample report and print the report ID that DS returns. See Set Up Your Application.

    Args:
        service: An authorized Doublelcicksearch service.
        columns: list of columns will be in the report
    Returns:
        The report id.
    """
    request = service.reports().request(
        body={
                "reportScope": {
                    "agencyId": "20100000000000932",
                    "advertiserId": "21700000001365301",
                    #"advertiserId": "21700000001406447", # Callaway Apparel - Perry Ellis International
                    #"engineAccountId": "700000001564770" # Google - Callaway Apparel
                    #"advertiserId": "21700000001131725", # Celebrity Cruise
                    #"engineAccountId": "700000001217833" # Celebrity Cruise
                    #"engineAccountId": "700000001561242" # Celebrity Cruise - Juba Plus
                },
                "reportType": "keyword",
                "columns": [{'columnName': column} for column in columns],   
                "timeRange" : {
                    "startDate" : start_date,
                    "endDate" : end_date
                    },
                
                "filters": [
                    {
                        "column" : { "columnName": "effectiveLabels" },
                        "operator" : "containsElement",
                        "values" : ["KPI_Retail",]
                    }
                ],
                
                "downloadFormat": "csv",
                "maxRowsPerFile": 100000000,
                "statisticsCurrency": "agency",
                "verifySingleTimeZone": "false",
                "includeRemovedEntities": "false"
            }
    )
    json_data = request.execute()
    return json_data['id']

In [4]:
# download reports
creds = create_credentials()

service = get_service(creds)

end_date = "2017-12-17"
start_date = "2017-08-01"

print ([start_date, end_date])

REPORTID_nonHVA = request_report(service, start_date, end_date, 
                                 ['account','campaign', 'adGroup', 'keywordText', 'date', 'deviceSegment', 
                                  'status', 'keywordMatchType', 'keywordMaxCpc', 
                                  'topOfPageBidCurrent', 'topOfPageBidAvg',
                                  'effectiveKeywordMaxCpc', 'impr', 'clicks', 'cost', 'effectiveLabels',
                                  'avgCpc', 'avgPos', 'dfaRevenue'])
REPORTID_HVA = request_report(service, start_date, end_date, 
                              ['account','campaign', 'adGroup', 'keywordText', 'status','date', 'deviceSegment', 'keywordMatchType',
                               'floodlightActivity', 'dfaActions', 'effectiveLabels'])
REPORTID_kw_meta = request_report(service, start_date, end_date, 
                              ['keywordText', 'account','campaign', 'adGroup', 'status', 'keywordMatchType', 
                                        'keywordMaxCpc', 'topOfPageBidCurrent', 'topOfPageBidAvg',
                                        'avgPos'])


non_hva = poll_report(service, REPORTID_nonHVA)
hva = poll_report(service, REPORTID_HVA)
kw_meta=poll_report(service, REPORTID_kw_meta)

['2017-08-01', '2017-12-17']
'Report is not ready. I will try again.'
'Report is not ready. I will try again.'
'The report is ready.'
'Downloading fragment 0 for report AAAnrvIxEUPKxXjz'
'The report is ready.'
'Downloading fragment 0 for report AAAnZR3QkSF3PLrW'
'The report is ready.'
'Downloading fragment 0 for report AAAn4uNrJ9a5J6Iu'


In [5]:
#merged hva and non_hva
def merge_hva_and_non_hva(hva, non_hva):
    '''merge two reports downloaded by download_reports().
    Args:
        hva: pd.DataFrame
        non_hva: pd.DataFrame
        columns_hva: list of string
    Returns:
        pd.DataFrame
    '''   
    columns_hva= [
        'All Products',
        'Checkout',
        'Diamond - Cabernet Sauvignon page',
        'Diamond - Chardonnay page',
        'Diamond - Claret page',
        'Diamond - Diamond Red Blend page',
        'Diamond - Malbec page',
        'Diamond - Merlot page',
        'Diamond - Pavilion page',
        'Diamond - Pinot Grigio page',
        'Diamond - Pinot Noir page',
        'Diamond - Sauvignon Blanc page',
        'Diamond - Syrah Shiraz page',
        'Diamond - Zinfandel page',
        'Diamond Adventure landing page',
        'FFC Home Page',
        'Membership',
        'Membership-Join now',
        'Our Wines Diamond Collection',
        'Shop - Diamond Collection Wines',
        'Shop Now',
        'Shop online - Wine',
        'Shopping Cart',
        'Store Locator',
        'Thank You Page',
        'Visit Location']
    
    result = pd.DataFrame(columns=['keywordText', 'date', 'deviceSegment',]+columns_hva)
    
    for (account,keyword, keywordMatchType, status,adGroup, campaign, date, device), group in hva.groupby(['account','keywordText', 'keywordMatchType', 'status','adGroup', 'campaign', 'date', 'deviceSegment']):
        df = pd.DataFrame([{
            'account':account,
            'keywordMatchType':keywordMatchType,
            'campaign': campaign,
            'adGroup': adGroup,
            'keywordText' : keyword,
            'status':status,
            'date': date,
            'deviceSegment': device
        }])

        for column in columns_hva:
            if column in group['floodlightActivity'].values:
                df[column] = group[group['floodlightActivity'] == column]['dfaActions'].values[0]
            else:
                df[column] = 0
                
        result = result.append(df, ignore_index = True)

    # combine hva and non_hva
    merged = non_hva.merge(result, 
                           on = ['account','campaign','keywordText', 'keywordMatchType', 'adGroup','status','date', 'deviceSegment'], 
                           how = 'left')

    # generate baseline and resid compare
    merged = merged.fillna(value = 0)
    
    # generate new fields
    merged['HVA'] = merged[columns_hva].sum(axis=1).apply(int)   
    return merged

In [6]:
df = merge_hva_and_non_hva(hva, non_hva)

weekdays='Monday Tuesday Wednesday Thursday Friday Saturday Sunday'.split()
df['weekday'] = df['date'].apply(lambda x:weekdays[datetime.datetime.strptime(x, '%Y-%m-%d').weekday()])


columns_cum = ['impr', 'clicks', 'cost', 'dfaRevenue', 'HVA',
                'All Products',
                'Checkout',
                'Diamond - Cabernet Sauvignon page',
                'Diamond - Chardonnay page',
                'Diamond - Claret page',
                'Diamond - Diamond Red Blend page',
                'Diamond - Malbec page',
                'Diamond - Merlot page',
                'Diamond - Pavilion page',
                'Diamond - Pinot Grigio page',
                'Diamond - Pinot Noir page',
                'Diamond - Sauvignon Blanc page',
                'Diamond - Syrah Shiraz page',
                'Diamond - Zinfandel page',
                'Diamond Adventure landing page',
                'FFC Home Page',
                'Membership',
                'Membership-Join now',
                'Our Wines Diamond Collection',
                'Shop - Diamond Collection Wines',
                'Shop Now',
                'Shop online - Wine',
                'Shopping Cart',
                'Store Locator',
                'Thank You Page',
                'Visit Location']

writer = pd.ExcelWriter('/home/jian/Projects/FFC/SEM/Sem_%s_%s.xlsx'%(start_date, end_date))

for view in ['keywordText']:
    result = df.groupby([view,'account','status','keywordMatchType','campaign','adGroup'])[columns_cum].sum()
    result.insert(4, 'CPStoreLocator', result['cost']/(result['Visit Location']+ result['Store Locator']))
    result.insert(4, 'cphva', result['cost']/result['HVA'])
    result.insert(4, 'cpc', result['cost']/result['clicks'])
    result.insert(4, 'cpm', result['cost']/result['impr'])
    result.insert(4, 'ctr', result['clicks']/result['impr'])
    
    if view == 'keywordText':
        result.reset_index(inplace=True)
        result=pd.merge(kw_meta,result,on=['keywordText','account','status','keywordMatchType','campaign','adGroup'],how="right")
        # result = kw_meta.merge(result, left_on=['keywordText','account','status','keywordMatchType','campaign','adGroup'], right_index=True)
        result.index = result['keywordText']
        del result['keywordText']
    result.sort_values(by='CPStoreLocator', inplace=True)
    result.to_excel(writer, view)


for view in ['date', 'weekday', 'deviceSegment']:
    result = df.groupby(view)[columns_cum].sum()
    result.insert(4, 'CPStoreLocator', result['cost']/(result['Visit Location']+ result['Store Locator']))
    result.insert(4, 'cphva', result['cost']/result['HVA'])
    result.insert(4, 'cpc', result['cost']/result['clicks'])
    result.insert(4, 'cpm', result['cost']/result['impr'])
    result.insert(4, 'ctr', result['clicks']/result['impr'])
    

    if view !='date':
        result.sort_values(by='CPStoreLocator', inplace=True)
    result.to_excel(writer, view)
#writer.save()

In [7]:
#manually run

# Filter the all lables with "KPI_Retail" in the DCS UI

import pandas as pd
pd.options.display.max_rows=6

hva_hour = pd.read_excel('/home/jian/Projects/FFC/SEM/Hour Downloads/Aug 1 to Dec 17/FFC Hour of Day Hva.xlsx')
non_hva_hour = pd.read_excel('/home/jian/Projects/FFC/SEM/Hour Downloads/Aug 1 to Dec 17/FFC Hour of Day Non Hva.xlsx')

# non_hva_hour['Cost'] = non_hva_hour['Cost'].apply(lambda x: float(x.replace(',', '')))

result_non_hva = non_hva_hour.groupby('Hour of day')['Impr', 'Clicks', 'Cost','Revenue'].sum()

result_non_hva.insert(0, 'hour', result_non_hva.index)

columns_hva= [
    'All Products',
    'Checkout',
    'Diamond - Cabernet Sauvignon page',
    'Diamond - Chardonnay page',
    'Diamond - Claret page',
    'Diamond - Diamond Red Blend page',
    'Diamond - Malbec page',
    'Diamond - Merlot page',
    'Diamond - Pavilion page',
    'Diamond - Pinot Grigio page',
    'Diamond - Pinot Noir page',
    'Diamond - Sauvignon Blanc page',
    'Diamond - Syrah Shiraz page',
    'Diamond - Zinfandel page',
    'Diamond Adventure landing page',
    'FFC Home Page',
    'Membership',
    'Membership-Join now',
    'Our Wines Diamond Collection',
    'Shop - Diamond Collection Wines',
    'Shop Now',
    'Shop online - Wine',
    'Shopping Cart',
    'Store Locator',
    'Thank You Page',
    'Visit Location']

result_hva = pd.DataFrame(columns=['Hour of day']+columns_hva)

for hour, group in hva_hour.groupby('Hour of day'):
    df = pd.DataFrame([{
        'hour': hour
    }])

    for column in columns_hva:
        if column in group['Floodlight activity'].values:
            df[column] = group[group['Floodlight activity'] == column]['Actions'].sum()
        else:
            df[column] = 0

    result_hva = result_hva.append(df, ignore_index = True)

result = result_non_hva.merge(result_hva, on='hour', how='left')
result.insert(5, 'HVA', result[columns_hva].sum(axis=1))
result.insert(5, 'CPStoreLocator', result['Cost']/(result['Visit Location']+ result['Store Locator']))
result.insert(5, 'cphva', result['Cost']/result['HVA'])
result.insert(5, 'cpc', result['Cost']/result['Clicks'])
result.insert(5, 'cpm', result['Cost']/result['Impr'])
result.insert(5, 'ctr', result['Clicks']/result['Impr'])
result.set_index('hour', inplace=True)
result.sort_values(by='CPStoreLocator', inplace=True)
# result.to_csv('/root/jzou/winery/sem/hour_%s_%s.csv'%(start_date, end_date), index=False)
result.to_excel(writer, 'hour')
# writer.save()

In [8]:
workbook  = writer.book
keywordText=writer.sheets['keywordText']
date=writer.sheets['date']
weekday=writer.sheets['weekday']
deviceSegment=writer.sheets['deviceSegment']
hour=writer.sheets['hour']

format1_Dollar = workbook.add_format({'num_format': '[$$-409]#,##0.00'})
format2_Pctg = workbook.add_format({'num_format': '0.00%'})

keywordText.set_column('G:I', None, format1_Dollar)
keywordText.set_column('M:N', None, format1_Dollar)
keywordText.set_column('P:S', None, format1_Dollar)
keywordText.set_column('O:O', None, format2_Pctg)

date.set_column('D:E', None, format1_Dollar)
date.set_column('G:J', None, format1_Dollar)
date.set_column('F:F', None, format2_Pctg)

weekday.set_column('D:E', None, format1_Dollar)
weekday.set_column('G:J', None, format1_Dollar)
weekday.set_column('F:F', None, format2_Pctg)

deviceSegment.set_column('D:E', None, format1_Dollar)
deviceSegment.set_column('G:J', None, format1_Dollar)
deviceSegment.set_column('F:F', None, format2_Pctg)

hour.set_column('D:E', None, format1_Dollar)
hour.set_column('G:J', None, format1_Dollar)
hour.set_column('F:F', None, format2_Pctg)

writer.save()