In [23]:
import pandas as pd
import numpy as np
pd.options.display.max_rows = 6

import argparse
import httplib2
import pprint
import time
import datetime
from io import StringIO

from apiclient.discovery import build
from oauth2client import GOOGLE_TOKEN_URI
from oauth2client.client import OAuth2Credentials
from googleapiclient.errors import HttpError


def create_credentials():
    """Create Google OAuth2 credentials.

    Args:
        client_id: Client id of a Google Cloud console project.
        client_secret: Client secret of a Google Cloud console project.
        refresh_token: A refresh token authorizing the Google Cloud console project
          to access the DS data of some Google user.
    Returns:
        OAuth2Credentials
    """
    return OAuth2Credentials(access_token=None,
                           client_id='549790627766-qnth4m8qvuimg87pnsp4b82lhte7dk5a.apps.googleusercontent.com',
                           client_secret='Vta4lQLOL49vVYvktkcPGRNb',
                           refresh_token='1/ab7pCGMu3K5AveG0UOUpQ0J08vCp6uM357O8qmoPDMs',
                           token_expiry=None,
                           token_uri="https://accounts.google.com/o/oauth2/token",
                           user_agent=None)

def get_service(credentials):
    """Set up a new DoubleClick Search service.

    Args:
        credentials: An OAuth2Credentials generated with create_credentials, or
        flows in the oatuh2client.client package.
    Returns:
        An authorized Doubleclicksearch serivce.
    """
    # Use the authorize() function of OAuth2Credentials to apply necessary credential
    # headers to all requests.
    http = credentials.authorize(http = httplib2.Http())

    # Construct the service object for the interacting with the DoubleClick Search API.
    service = build('doubleclicksearch', 'v2', http=http)
    return service

def poll_report(service, report_id):
    """Poll the API with the reportId until the report is ready, up to ten times.

    Args:
        service: An authorized Doublelcicksearch service.
        report_id: The ID DS has assigned to a report.
    Returns:
        pd.DataFrame, report file
    """
    for _ in range(10):
        try:
            request = service.reports().get(reportId=report_id)
            json_data = request.execute()
            if json_data['isReportReady']:
                pprint.pprint('The report is ready.')

                # For large reports, DS automatically fragments the report into multiple
                # files. The 'files' property in the JSON object that DS returns contains
                # the list of URLs for file fragment. To download a report, DS needs to
                # know the report ID and the index of a file fragment.
                report = pd.DataFrame()
                for i in range(len(json_data['files'])):
                    pprint.pprint('Downloading fragment ' + str(i) + ' for report ' + report_id)
                    report = report.append(download_files(service, report_id, str(i)), ignore_index = True) # See Download the report.
                return report

            else:
                pprint.pprint('Report is not ready. I will try again.')
                time.sleep(10)
        except HttpError as e:
            error = simplejson.loads(e.content)['error']['errors'][0]

            # See Response Codes
            pprint.pprint('HTTP code %d, reason %s' % (e.resp.status, error['reason']))
            break
        
def download_files(service, report_id, report_fragment):
    """Generate and print sample report.

    Args:
        service: An authorized Doublelcicksearch service.
        report_id: The ID DS has assigned to a report.
        report_fragment: The 0-based index of the file fragment from the files array.
    Returns:
        pd.DataFrame report file
    """
    request = service.reports().getFile(reportId=report_id, reportFragment=report_fragment)
    return pd.read_csv(StringIO(request.execute().decode('utf-8')))

def request_report(service, start_date, end_date, columns):
    """Request sample report and print the report ID that DS returns. See Set Up Your Application.

    Args:
        service: An authorized Doublelcicksearch service.
        columns: list of columns will be in the report
    Returns:
        The report id.
    """
    request = service.reports().request(
        body={
                "reportScope": {
                    "agencyId": "20100000000000932",
                    "advertiserId": "21700000001445074",
                    #"advertiserId": "21700000001406447", # Callaway Apparel - Perry Ellis International
                    #"engineAccountId": "700000001564770" # Google - Callaway Apparel
                    #"advertiserId": "21700000001131725", # Celebrity Cruise
                    #"engineAccountId": "700000001217833" # Celebrity Cruise
                    #"engineAccountId": "700000001561242" # Celebrity Cruise - Juba Plus
                },
                "reportType": "keyword",
                "columns": [{'columnName': column} for column in columns],   
                "timeRange" : {
                    "startDate" : start_date,
                    "endDate" : end_date
                    },
                
               # "filters": [
                #    {
                 #       "column" : { "columnName": "effectiveLabels" },
                  #      "operator" : "containsElement",
                   #     "values" : ["KPI_Retail",]
                    #}
                #],
                
                "downloadFormat": "csv",
                "maxRowsPerFile": 100000000,
                "statisticsCurrency": "agency",
                "verifySingleTimeZone": "false",
                "includeRemovedEntities": "false"
            }
    )
    json_data = request.execute()
    return json_data['id']

In [24]:
# download reports
creds = create_credentials()

service = get_service(creds)

end_date = "2017-10-02"
start_date = "2017-09-25"



REPORTID_nonHVA = request_report(service, start_date, end_date, 
                                 ['campaign', 'effectiveLabels','adGroup', 'keywordText', 'date', 'deviceSegment', 
                                  'status', 'keywordMatchType', 'keywordMaxCpc', 
                                  'topOfPageBidCurrent', 'topOfPageBidAvg',
                                  'effectiveKeywordMaxCpc', 'impr', 'clicks', 'cost',
                                  'avgCpc', 'avgPos'])
REPORTID_HVA = request_report(service, start_date, end_date, 
                              ['campaign', 'adGroup', 'keywordText', 'date', 'deviceSegment', 
                               'floodlightActivity', 'dfaActions', 'effectiveLabels'])

non_hva = poll_report(service, REPORTID_nonHVA)
hva = poll_report(service, REPORTID_HVA)

'Report is not ready. I will try again.'
'The report is ready.'
'Downloading fragment 0 for report AAAnd1Gm0i7lRq4s'
'The report is ready.'
'Downloading fragment 0 for report AAAnUJl0TdBq4Pu2'


# merged hva and non_hva

In [25]:
def merge_hva_and_non_hva(hva, non_hva):
    '''merge two reports downloaded by download_reports().
    Args:
        hva: pd.DataFrame
        non_hva: pd.DataFrame
        columns_hva: list of string
    Returns:
        pd.DataFrame
    '''   
    columns_hva= [
        'Snap Fitness - Franchise - Choose a Country',
        'Snap Fitness - Franchise - Email',
        'Snap Fitness - Franchise - First Name',
        'Snap Fitness - Franchise - I Attest Checkbox',
        'Snap Fitness - Franchise - Last Name',
        'Snap Fitness - Franchise - Phone Number',
        'Snap Fitness - Franchise - Select Country First',
        'Snap Fitness - Franchise - Take the First Step Button',
        'Snap Fitness - Franchise - What is This? Link',
        'Snap Fitness - Franchise Opportunities',
        'Snap Fitness - Franchise Request Info',
        'Snap Fitness - Franchise Thank You',
        'Snap Fitness - International Franchise',
        'Snap Fitness - International Franchise - Choose a Country',
        'Snap Fitness - International Franchise - Email',
        'Snap Fitness - International Franchise - First Name',
        'Snap Fitness - International Franchise - I Attest Checkbox',
        'Snap Fitness - International Franchise - Last Name',
        'Snap Fitness - International Franchise - Phone Number',
        'Snap Fitness - International Franchise - Select Country First',
        'Snap Fitness - International Franchise - Take the First Step Buton',
        'Snap Fitness - International Franchise - What is This? Link',
        'Snap Fitness - International Franchise Thank You']
    
    result = pd.DataFrame(columns=['keywordText', 'date', 'deviceSegment',]+columns_hva)
    
    for (keyword, date, device), group in hva.groupby(['keywordText', 'date', 'deviceSegment',]):
        df = pd.DataFrame([{
            #'campaign': campaign,
            #'adGroup': ad_group,
            'keywordText' : keyword,
            'date': date,
            'deviceSegment': device
        }])

        for column in columns_hva:
            if column in group['floodlightActivity'].values:
                df[column] = group[group['floodlightActivity'] == column]['dfaActions'].values[0]
            else:
                df[column] = 0
                
        result = result.append(df, ignore_index = True)

    # combine hva and non_hva
    merged = non_hva.merge(result, 
                           on = ['keywordText', 'date', 'deviceSegment'], 
                           how = 'left')

    # generate baseline and resid compare
    merged = merged.fillna(value = 0)
    
    # generate new fields
    merged['HVA'] = merged[columns_hva].sum(axis=1).apply(int)   
    return merged

In [26]:
df = merge_hva_and_non_hva(hva, non_hva)

In [27]:
weekdays='Monday Tuesday Wednesday Thursday Friday Saturday Sunday'.split()
df['weekday'] = df['date'].apply(lambda x:weekdays[datetime.datetime.strptime(x, '%Y-%m-%d').weekday()])

In [28]:
kw_meta = non_hva.drop_duplicates('keywordText')[['campaign', 'adGroup', 'status', 'keywordMatchType', 
                                        'keywordMaxCpc', 'topOfPageBidCurrent', 'topOfPageBidAvg',
                                        'avgPos', 'keywordText']].copy()

In [31]:
non_hva

Unnamed: 0,campaign,effectiveLabels,adGroup,keywordText,date,deviceSegment,status,keywordMatchType,keywordMaxCpc,topOfPageBidCurrent,topOfPageBidAvg,effectiveKeywordMaxCpc,impr,clicks,cost,avgCpc,avgPos
0,EST# 2856_LIFT Brands_Snap Fitness_Franchise -...,,Franchise,buying a +franchise,2017-10-02,,Active,Broad,1.00,,0.0,1.00,0,0,0.00,,
1,EST# 2856_LIFT Brands_Snap Fitness_Franchise -...,,Franchise,buying a +franchise,2017-10-02,Desktop,Active,Broad,1.00,,,1.00,14,1,2.66,2.66,2.428571
2,EST# 2856_LIFT Brands_Snap Fitness_Franchise -...,,Franchise,buying a +franchise,2017-10-02,Mobile,Active,Broad,1.00,,,1.00,4,0,0.00,,3.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15312,EST# 2856_LIFT Brands_Snap Fitness_Franchise -...,,Gym Business,+starting my +own +gym,2017-09-26,Desktop,Active,Broad,0.62,,,0.62,2,0,0.00,,3.000000
15313,EST# 2856_LIFT Brands_Snap Fitness_Brand,,Snap Franchising,+snap +fitness +franchise +for +sale,2017-09-27,,Active,Broad,1.00,,,1.00,0,0,0.00,,
15314,EST# 2856_LIFT Brands_Snap Fitness_Brand,,Snap Franchising,+snap +fitness +franchise +for +sale,2017-10-02,,Active,Broad,1.00,,,1.00,0,0,0.00,,


In [29]:
columns_cum = ['impr', 'clicks', 'cost', 'HVA',
        'Snap Fitness - Franchise Thank You',
        'Snap Fitness - International Franchise Thank You',
        'Snap Fitness - Franchise - Choose a Country',
        'Snap Fitness - Franchise - Email',
        'Snap Fitness - Franchise - First Name',
        'Snap Fitness - Franchise - I Attest Checkbox',
        'Snap Fitness - Franchise - Last Name',
        'Snap Fitness - Franchise - Phone Number',
        'Snap Fitness - Franchise - Select Country First',
        'Snap Fitness - Franchise - Take the First Step Button',
        'Snap Fitness - Franchise - What is This? Link',
        'Snap Fitness - Franchise Opportunities',
        'Snap Fitness - Franchise Request Info',
        'Snap Fitness - International Franchise',
        'Snap Fitness - International Franchise - Choose a Country',
        'Snap Fitness - International Franchise - Email',
        'Snap Fitness - International Franchise - First Name',
        'Snap Fitness - International Franchise - I Attest Checkbox',
        'Snap Fitness - International Franchise - Last Name',
        'Snap Fitness - International Franchise - Phone Number',
        'Snap Fitness - International Franchise - Select Country First',
        'Snap Fitness - International Franchise - Take the First Step Buton',
        'Snap Fitness - International Franchise - What is This? Link']

writer = pd.ExcelWriter('/Users/JayLiang/Desktop/Media Storm/Jay_SEM_%s_%s.xlsx'%(start_date, end_date))
for view in ['keywordText', 'date', 'weekday', 'deviceSegment']:
    result = df.groupby(view)[columns_cum].sum()
    result.insert(4, 'CPThankYou', result['cost']/(result['Snap Fitness - Franchise Thank You']+ result['Snap Fitness - International Franchise Thank You']))
    result.insert(4, 'cphva', result['cost']/result['HVA'])
    result.insert(4, 'cpc', result['cost']/result['clicks'])
    result.insert(4, 'cpm', result['cost']/result['impr'])
    result.insert(4, 'ctr', result['clicks']/result['impr'])
    
    if view == 'keywordText':
        result = kw_meta.merge(result, left_on='keywordText', right_index=True)
        result.index = result['keywordText']
        del result['keywordText']
    if view !='date':
        result.sort_values(by='CPThankYou', inplace=True)
    result.to_excel(writer, view)
writer.save()

# manually run

In [8]:
import pandas as pd
pd.options.display.max_rows=6

hva_hour = pd.read_csv('/root/jzou/Lift/SEM/Lift Brand Gym HVA.csv',encoding='utf-16')
non_hva_hour = pd.read_csv('/root/jzou/Lift/SEM/Lift Brand Gym nonHVA.csv',encoding='utf-16')

#non_hva_hour['Cost'] = non_hva_hour['Cost'].apply(lambda x: float(x.replace(',', '')))

result_non_hva = non_hva_hour.groupby('Hour of day')['Clicks', 'Impr', 'Cost'].sum()

result_non_hva.insert(0, 'hour', result_non_hva.index)

columns_hva= ['Snap Fitness - Franchise Thank You',
        'Snap Fitness - International Franchise Thank You',
        'Snap Fitness - Franchise - Choose a Country',
        'Snap Fitness - Franchise - Email',
        'Snap Fitness - Franchise - First Name',
        'Snap Fitness - Franchise - I Attest Checkbox',
        'Snap Fitness - Franchise - Last Name',
        'Snap Fitness - Franchise - Phone Number',
        'Snap Fitness - Franchise - Select Country First',
        'Snap Fitness - Franchise - Take the First Step Button',
        'Snap Fitness - Franchise - What is This? Link',
        'Snap Fitness - Franchise Opportunities',
        'Snap Fitness - Franchise Request Info',
        'Snap Fitness - International Franchise',
        'Snap Fitness - International Franchise - Choose a Country',
        'Snap Fitness - International Franchise - Email',
        'Snap Fitness - International Franchise - First Name',
        'Snap Fitness - International Franchise - I Attest Checkbox',
        'Snap Fitness - International Franchise - Last Name',
        'Snap Fitness - International Franchise - Phone Number',
        'Snap Fitness - International Franchise - Select Country First',
        'Snap Fitness - International Franchise - Take the First Step Buton',
        'Snap Fitness - International Franchise - What is This? Link']

result_hva = pd.DataFrame(columns=['hour']+columns_hva)

for hour, group in hva_hour.groupby('Hour of day'):
    df = pd.DataFrame([{
        'hour': hour
    }])

    for column in columns_hva:
        if column in group['Floodlight activity'].values:
            df[column] = group[group['Floodlight activity'] == column]['Actions'].values[0]
        else:
            df[column] = 0

    result_hva = result_hva.append(df, ignore_index = True)

result = result_non_hva.merge(result_hva, on='hour', how='left')
result.insert(4, 'HVA', result[columns_hva].sum(axis=1))
result.insert(4, 'CPThankYou', result['Cost']/(result['Snap Fitness - Franchise Thank You']+ result['Snap Fitness - International Franchise Thank You']))
result.insert(4, 'cphva', result['Cost']/result['HVA'])
result.insert(4, 'cpc', result['Cost']/result['Clicks'])
result.insert(4, 'cpm', result['Cost']/result['Impr'])
result.insert(4, 'ctr', result['Clicks']/result['Impr'])

result.sort_values(by='CPThankYou', inplace=True)
result.to_csv('/root/jzou/Lift/SEM/hour_%s_%s.csv'%(start_date, end_date), index=False)