In [1]:
import pandas as pd
import numpy as np
pd.options.display.max_rows = 6

import argparse
import httplib2
import pprint
import time
import datetime
from io import StringIO

from googleapiclient.discovery import build
from oauth2client import GOOGLE_TOKEN_URI
from oauth2client.client import OAuth2Credentials
from googleapiclient.errors import HttpError


def create_credentials():
    """Create Google OAuth2 credentials.

    Args:
        client_id: Client id of a Google Cloud console project.
        client_secret: Client secret of a Google Cloud console project.
        refresh_token: A refresh token authorizing the Google Cloud console project
          to access the DS data of some Google user.
    Returns:
        OAuth2Credentials
    """
    return OAuth2Credentials(access_token=None,
                           client_id='549790627766-qnth4m8qvuimg87pnsp4b82lhte7dk5a.apps.googleusercontent.com',
                           client_secret='Vta4lQLOL49vVYvktkcPGRNb',
                           refresh_token='1/ab7pCGMu3K5AveG0UOUpQ0J08vCp6uM357O8qmoPDMs',
                           token_expiry=None,
                           token_uri="https://accounts.google.com/o/oauth2/token",
                           user_agent=None)

def get_service(credentials):
    """Set up a new DoubleClick Search service.

    Args:
        credentials: An OAuth2Credentials generated with create_credentials, or
        flows in the oatuh2client.client package.
    Returns:
        An authorized Doubleclicksearch serivce.
    """
    # Use the authorize() function of OAuth2Credentials to apply necessary credential
    # headers to all requests.
    http = credentials.authorize(http = httplib2.Http())

    # Construct the service object for the interacting with the DoubleClick Search API.
    service = build('doubleclicksearch', 'v2', http=http)
    return service

def poll_report(service, report_id):
    """Poll the API with the reportId until the report is ready, up to ten times.

    Args:
        service: An authorized Doublelcicksearch service.
        report_id: The ID DS has assigned to a report.
    Returns:
        pd.DataFrame, report file
    """
    for _ in range(10):
        try:
            request = service.reports().get(reportId=report_id)
            json_data = request.execute()
            if json_data['isReportReady']:
                pprint.pprint('The report is ready.')

                # For large reports, DS automatically fragments the report into multiple
                # files. The 'files' property in the JSON object that DS returns contains
                # the list of URLs for file fragment. To download a report, DS needs to
                # know the report ID and the index of a file fragment.
                report = pd.DataFrame()
                for i in range(len(json_data['files'])):
                    pprint.pprint('Downloading fragment ' + str(i) + ' for report ' + report_id)
                    report = report.append(download_files(service, report_id, str(i)), ignore_index = True) # See Download the report.
                return report

            else:
                pprint.pprint('Report is not ready. I will try again.')
                time.sleep(10)
        except HttpError as e:
            error = simplejson.loads(e.content)['error']['errors'][0]

            # See Response Codes
            pprint.pprint('HTTP code %d, reason %s' % (e.resp.status, error['reason']))
            break
        
def download_files(service, report_id, report_fragment):
    """Generate and print sample report.

    Args:
        service: An authorized Doublelcicksearch service.
        report_id: The ID DS has assigned to a report.
        report_fragment: The 0-based index of the file fragment from the files array.
    Returns:
        pd.DataFrame report file
    """
    request = service.reports().getFile(reportId=report_id, reportFragment=report_fragment)
    return pd.read_csv(StringIO(request.execute().decode('utf-8')))

def request_report(service, start_date, end_date, columns):
    """Request sample report and print the report ID that DS returns. See Set Up Your Application.

    Args:
        service: An authorized Doublelcicksearch service.
        columns: list of columns will be in the report
    Returns:
        The report id.
    """
    request = service.reports().request(
        body={
                "reportScope": {
                    "agencyId": "20100000000000932",
                    "advertiserId": "21700000001406447", # Callaway Apparel - Perry Ellis International
                    #"engineAccountId": "700000001564770" # Google - Callaway Apparel
                    #"advertiserId": "21700000001131725", # Celebrity Cruise
                    #"engineAccountId": "700000001217833" # Celebrity Cruise
                    #"engineAccountId": "700000001561242" # Celebrity Cruise - Juba Plus
                },
                "reportType": "account",
                "columns": [{'columnName': column} for column in columns],   
                "timeRange" : {
                    "startDate" : start_date,
                    "endDate" : end_date
                    },
                
                #"filters": [
                #    {
                #        "column" : { "columnName": "keywordLabels" },
                #        "operator" : "containsElement",
                #        "values" : ["JubaNovTest",]
                #    }
                #],
                
                "downloadFormat": "csv",
                "maxRowsPerFile": 100000000,
                "statisticsCurrency": "agency",
                "verifySingleTimeZone": "false",
                "includeRemovedEntities": "false"
            }
    )
    json_data = request.execute()
    return json_data['id']

In [2]:
creds = create_credentials()

service = get_service(creds)

end_date = "2017-08-06"
start_date = "2017-04-01"

print(start_date), print(end_date)
REPORTID_nonHVA = request_report(service, start_date, end_date, 
                                 ['accountType', 'deviceSegment', 'clicks', 'cost', 'impr','dfaRevenue', 'dfaTransactions'])
REPORTID_HVA = request_report(service, start_date, end_date, 
                              ['accountType', 'deviceSegment',
                               'floodlightActivity', 'dfaActions'])

2017-04-01
2017-08-06


In [3]:
non_hva_device= poll_report(service, REPORTID_nonHVA)
hva_device = poll_report(service, REPORTID_HVA)
Device_1=non_hva_device
Device_2=hva_device

'The report is ready.'
'Downloading fragment 0 for report AAAnQageeLmgl7tg'
'The report is ready.'
'Downloading fragment 0 for report AAAnWuFMADK7iOlZ'


In [4]:
Group = pd.read_csv("/Users/JayLiang/Desktop/Media Storm/CallAway/New/Group.csv")
Group['floodlightActivity'] = Group['DCM activity mapping'].apply(lambda x: x[19:len(x)-19])

In [5]:
Device_2_Group=pd.merge(Device_2,Group,on="floodlightActivity")
Device_2['floodlightActivity'].replace("Callaway - Billing_Payment - All","Callaway - Billing_Payment")
Device_2['floodlightActivity'].replace("Callaway - Order Review - All","Callaway - Order Review")
Device_2['floodlightActivity'].replace("Callaway - Shipping - All","Callaway - Shipping")

Device_2_Group_New=pd.merge(Device_2,Group,on="floodlightActivity")
len(pd.unique(Device_2_Group_New.floodlightActivity)) #53
del Device_2_Group_New['Activity Name']
del Device_2_Group_New['DCM activity mapping']
type(Device_2_Group_New)
Device_2_Group_New

Unnamed: 0,accountType,deviceSegment,floodlightActivity,dfaActions,Group Name
0,Google AdWords,Desktop,Callaway - Check Out,8648,Callaway - HVA 3
1,Google AdWords,Tablet,Callaway - Check Out,2464,Callaway - HVA 3
2,Google AdWords,Mobile,Callaway - Check Out,5972,Callaway - HVA 3
...,...,...,...,...,...
414,Google AdWords,Desktop,Callaway - Unsubscribe - HVA 2,2,Callaway - HVA 2
415,Yahoo Gemini,Desktop,Callaway - Unsubscribe - HVA 2,1,Callaway - HVA 2
416,Bing Ads,Desktop,Callaway - Unsubscribe - HVA 2,1,Callaway - HVA 2


In [5]:
Device_2_Group_3HVA=Device_2_Group_New.groupby(['deviceSegment','accountType','Group Name'])[['dfaActions']].sum() 
type(Device_2_Group_3HVA)
Device_2_Group_3HVA.reset_index(inplace = True)
Device_2_Group_3HVA_Wide=Device_2_Group_3HVA.pivot_table(index=['deviceSegment','accountType'],columns=['Group Name'],values=['dfaActions'])

NameError: name 'Device_2_Group_New' is not defined

In [7]:
Device_2_Group_floodlightActivity=Device_2_Group_New
del Device_2_Group_floodlightActivity['Group Name']
Device_2_Group_53Activities=Device_2_Group_floodlightActivity.pivot_table(index=['deviceSegment','accountType'],columns=['floodlightActivity'],values=['dfaActions'])
Device_2_Group_3HVA_Wide.columns =Device_2_Group_3HVA_Wide.columns.get_level_values(1)
Device_2_Group_3HVA_Wide.reset_index(inplace=True)
Device_2_Group_53Activities.columns =Device_2_Group_53Activities.columns.get_level_values(1)
Device_2_Group_53Activities.reset_index(inplace=True)

In [8]:
Merge_1_Device=pd.merge(Device_1,Device_2_Group_3HVA_Wide,on=["deviceSegment","accountType"])
Merge_2_Device=pd.merge(Merge_1_Device,Device_2_Group_53Activities,on=["deviceSegment","accountType"])
Merge_2_Device['Conversions']=np.sum(Merge_2_Device.loc[:,['Callaway - HVA 1','Callaway - HVA 2','Callaway - HVA 3']],axis=1)
Merge_2_Device['ROI']=Merge_2_Device['dfaRevenue']/Merge_2_Device['cost']
Merge_2_Device['Click Through Rate']=Merge_2_Device['clicks']/Merge_2_Device['impr']
Merge_2_Device['Cost per Transaction']=Merge_2_Device['cost']/Merge_2_Device['dfaTransactions']  
Merge_2_Device['Cost per Click']=Merge_2_Device['cost']/Merge_2_Device['clicks']
Merge_2_Device['Cost per HVA (total)']=Merge_2_Device['cost']/Merge_2_Device['Conversions']
Merge_2_Device['Cost per HVA 1']=Merge_2_Device['cost']/Merge_2_Device['Callaway - HVA 1']
Merge_2_Device['Cost per HVA 2']=Merge_2_Device['cost']/Merge_2_Device['Callaway - HVA 1']
Merge_2_Device['Cost per HVA 3']=Merge_2_Device['cost']/Merge_2_Device['Callaway - HVA 1']

In [9]:
Merge_2_Device

Unnamed: 0,accountType,deviceSegment,clicks,cost,impr,dfaRevenue,dfaTransactions,Callaway - HVA 1,Callaway - HVA 2,Callaway - HVA 3,...,Callaway - Women_Standard Collection,Conversions,ROI,Click Through Rate,Cost per Transaction,Cost per Click,Cost per HVA (total),Cost per HVA 1,Cost per HVA 2,Cost per HVA 3
0,Google AdWords,Desktop,36042,45309.34,4514945,196400.509981,1490,3393,105802,14469,...,550.0,123664,4.334658,0.007983,30.408953,1.257126,0.366391,13.353770,13.353770,13.353770
1,Google AdWords,Tablet,11533,10475.11,1122260,47231.959995,350,1282,31161,4128,...,126.0,36571,4.508970,0.010277,29.928886,0.908273,0.286432,8.170913,8.170913,8.170913
2,Google AdWords,Mobile,38422,31067.44,3384383,80345.469988,683,552,79412,9416,...,95.0,89380,2.586163,0.011353,45.486735,0.808585,0.347588,56.281594,56.281594,56.281594
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,Bing Ads,Desktop,7742,11974.22,196400,49376.319999,338,842,18925,2898,...,105.0,22665,4.123552,0.039420,35.426686,1.546657,0.528313,14.221164,14.221164,14.221164
7,Bing Ads,Tablet,1559,2281.34,27074,4475.680000,40,218,3071,385,...,26.0,3674,1.961865,0.057583,57.033500,1.463335,0.620942,10.464862,10.464862,10.464862
8,Bing Ads,Mobile,2039,2385.77,60635,4110.200000,19,28,2457,196,...,,2681,1.722798,0.033627,125.566842,1.170069,0.889881,85.206071,85.206071,85.206071


In [17]:
import os
os.chdir('/Users/JayLiang/Desktop/Media Storm/CallAway/Aug. 14-Try')
cols = Merge_2_Device.columns.tolist()
cols_new=cols[:2]+['ROI']+cols[2:7]+['Conversions']+cols[-7:]+cols[7:-9]
Merge_2_Device_Final=Merge_2_Device[cols_new]
Merge_2_Device_Final.to_excel('Device_Final.xlsx', sheet_name='Device_Final', index=False)

In [56]:
# non_hva_device.to_csv("/Users/JayLiang/Desktop/Media Storm/CallAway/Aug. 14-Try/Device 1.csv",index=False)
# hva_device.to_csv("/Users/JayLiang/Desktop/Media Storm/CallAway/Aug. 14-Try/Device 2.csv",index=False)