In [1]:
import pandas as pd
import numpy as np
pd.options.display.max_rows = 6

import argparse
import httplib2
import pprint
import time
import datetime
from io import BytesIO

from apiclient.discovery import build
from oauth2client import GOOGLE_TOKEN_URI
from oauth2client.client import OAuth2Credentials
from googleapiclient.errors import HttpError


def create_credentials():
    """Create Google OAuth2 credentials.

    Args:
        client_id: Client id of a Google Cloud console project.
        client_secret: Client secret of a Google Cloud console project.
        refresh_token: A refresh token authorizing the Google Cloud console project
          to access the DS data of some Google user.
    Returns:
        OAuth2Credentials
    """
    return OAuth2Credentials(access_token=None,
                           client_id='549790627766-qnth4m8qvuimg87pnsp4b82lhte7dk5a.apps.googleusercontent.com',
                           client_secret='Vta4lQLOL49vVYvktkcPGRNb',
                           refresh_token='1/ab7pCGMu3K5AveG0UOUpQ0J08vCp6uM357O8qmoPDMs',
                           token_expiry=None,
                           token_uri="https://accounts.google.com/o/oauth2/token",
                           user_agent=None)

def get_service(credentials):
    """Set up a new DoubleClick Search service.

    Args:
        credentials: An OAuth2Credentials generated with create_credentials, or
        flows in the oatuh2client.client package.
    Returns:
        An authorized Doubleclicksearch serivce.
    """
    # Use the authorize() function of OAuth2Credentials to apply necessary credential
    # headers to all requests.
    http = credentials.authorize(http = httplib2.Http())

    # Construct the service object for the interacting with the DoubleClick Search API.
    service = build('doubleclicksearch', 'v2', http=http)
    return service

def poll_report(service, report_id):
    """Poll the API with the reportId until the report is ready, up to ten times.

    Args:
        service: An authorized Doublelcicksearch service.
        report_id: The ID DS has assigned to a report.
    Returns:
        pd.DataFrame, report file
    """
    for _ in range(10):
        try:
            request = service.reports().get(reportId=report_id)
            json_data = request.execute()
            if json_data['isReportReady']:
                pprint.pprint('The report is ready.')

                # For large reports, DS automatically fragments the report into multiple
                # files. The 'files' property in the JSON object that DS returns contains
                # the list of URLs for file fragment. To download a report, DS needs to
                # know the report ID and the index of a file fragment.
                report = pd.DataFrame()
                for i in range(len(json_data['files'])):
                    pprint.pprint('Downloading fragment ' + str(i) + ' for report ' + report_id)
                    report = report.append(download_files(service, report_id, str(i)), ignore_index = True) # See Download the report.
                return report

            else:
                pprint.pprint('Report is not ready. I will try again.')
                time.sleep(10)
        except HttpError as e:
            error = simplejson.loads(e.content)['error']['errors'][0]

            # See Response Codes
            pprint.pprint('HTTP code %d, reason %s' % (e.resp.status, error['reason']))
            break
        
def download_files(service, report_id, report_fragment):
    """Generate and print sample report.

    Args:
        service: An authorized Doublelcicksearch service.
        report_id: The ID DS has assigned to a report.
        report_fragment: The 0-based index of the file fragment from the files array.
    Returns:
        pd.DataFrame report file
    """
    request = service.reports().getFile(reportId=report_id, reportFragment=report_fragment)
    return pd.read_csv(BytesIO(request.execute()))

def request_report(service, start_date, end_date, columns):
    """Request sample report and print the report ID that DS returns. See Set Up Your Application.

    Args:
        service: An authorized Doublelcicksearch service.
        columns: list of columns will be in the report
    Returns:
        The report id.
    """
    request = service.reports().request(
        body={
                "reportScope": {
                    "agencyId": "20100000000000932",
                    "advertiserId": "21700000001406447", # Callaway Apparel - Perry Ellis International
                    #"engineAccountId": "700000001564770" # Google - Callaway Apparel
                    #"advertiserId": "21700000001131725", # Celebrity Cruise
                    #"engineAccountId": "700000001217833" # Celebrity Cruise
                    #"engineAccountId": "700000001561242" # Celebrity Cruise - Juba Plus
                },
                "reportType": "keyword",
                "columns": [{'columnName': column} for column in columns],   
                "timeRange" : {
                    "startDate" : start_date,
                    "endDate" : end_date
                    },
                
                #"filters": [
                #    {
                #        "column" : { "columnName": "keywordLabels" },
                #        "operator" : "containsElement",
                #        "values" : ["JubaNovTest",]
                #    }
                #],
                
                "downloadFormat": "csv",
                "maxRowsPerFile": 100000000,
                "statisticsCurrency": "agency",
                "verifySingleTimeZone": "false",
                "includeRemovedEntities": "false"
            }
    )
    json_data = request.execute()
    return json_data['id']

In [2]:
creds = create_credentials()

service = get_service(creds)

end_date = "2017-08-15"
start_date = "2017-03-30"

REPORTID_cost_by_kw_by_date = request_report(service, start_date, end_date,
                                             ['keywordText', 'keywordId','campaign', 'adGroup', #'adId',
                                              'keywordMatchType', 'date', 'clicks', 'cost'])

In [4]:
df = poll_report(service, REPORTID_cost_by_kw_by_date)

'Report is not ready. I will try again.'
'Report is not ready. I will try again.'
'The report is ready.'
'Downloading fragment 0 for report AAAnZoUoUY8CauXt'


In [5]:
df['clicks'].sum()

69838

In [60]:
len(df)

19725629

In [6]:
df.to_csv(
    '/home/jubauser1/jzou/dcm_account7252/user_path/cost_outer_source/callaway_cost_by_kw_by_date.csv', 
    index=False)

In [53]:
def request_campaign_report(service, start_date, end_date, columns, _engine_id, _campaign_id):
    """Request sample report and print the report ID that DS returns. See Set Up Your Application.

    Args:
        service: An authorized Doublelcicksearch service.
        columns: list of columns will be in the report
    Returns:
        The report id.
    """
    request = service.reports().request(
        body={
                "reportScope": {
                    "agencyId": "20100000000000932",
                    "advertiserId": "21700000001406447", # Callaway Apparel - Perry Ellis International
                    "engineAccountId": _engine_id,
                    "campaignId": _campaign_id
                },
                "reportType": "keyword",
                "columns": [{'columnName': column} for column in columns],   
                "timeRange" : {
                    "startDate" : start_date,
                    "endDate" : end_date
                    },
                
                "downloadFormat": "csv",
                "maxRowsPerFile": 100000000,
                "statisticsCurrency": "agency",
                "verifySingleTimeZone": "false",
                "includeRemovedEntities": "false"
            }
    )
    json_data = request.execute()
    return json_data['id']

In [54]:
campaign_id = "71700000022345116" # Google - Callaway Apparel
engine_id = "700000001564770" # G_Google Shopping_Callaway Apparel_EST-2773
REPORTID_cost_google_pla = request_campaign_report(service, start_date, end_date,
                                             ['campaign', 'date', 'clicks', 'cost'],
                                             engine_id, campaign_id,)

In [55]:
cost_google_pla = poll_report(service, REPORTID_cost_google_pla)

'Report is not ready. I will try again.'
'The report is ready.'
'Downloading fragment 0 for report AAAnRw_60eIYI_Ul'


In [59]:
cost_google_pla['cost'].sum()

0.0

In [None]:
campaign_id = "700000001564701" # Bing - Callaway Apparel
engine_id = "71700000022376924" # B_Bing Shopping_EST-2773
REPORTID_cost_bing_pla = request_campaign_report(service, start_date, end_date,
                                             ['campaign', 'date', 'clicks', 'cost'],
                                             campaign_id, engine_id)

In [44]:
cost_bing_pla = poll_report(service, REPORTID_cost_bing_pla)

'The report is ready.'
'Downloading fragment 0 for report AAAnihT3fmqyOMMv'


In [45]:
cost_bing_pla

Unnamed: 0,campaign,date,clicks,cost


# debug

In [33]:
request = service.reports().get(reportId=REPORTID_cost_by_kw_by_date)
json_data = request.execute()

In [34]:
json_data['isReportReady']

True

In [36]:
json_data['files']

[{'byteCount': '2552471304',
  'url': 'https://www.googleapis.com/doubleclicksearch/v2/reports/AAAnaYuByUBCJ2ZR/files/0'}]

In [9]:
request = service.reports().getFile(reportId=REPORTID_cost_by_kw_by_date, reportFragment=0)

In [16]:
pd.read_csv(BytesIO(request.execute()))

Unnamed: 0,campaign,adGroup,keywordText,keywordMatchType,date,clicks,cost
0,CA_US || B | Leggings,CA || B | Callaway > Leggings > Storm > Women,+women +storm +callaway +leggins,Broad,2017-09-02,0,0.0
1,CA_US || B | Leggings,CA || B | Callaway > Leggings > Storm > Women,+women +storm +callaway +leggins,Broad,2017-09-04,0,0.0
2,CA_US || B | Leggings,CA || B | Callaway > Leggings > Storm > Women,+women +callaway +storm +leggins,Broad,2017-09-04,0,0.0
...,...,...,...,...,...,...,...
425730,G_Brand_Callaway Apparel_Callaway Test_ EST-2773,Women General - Exact,Display Network Stats,,2017-09-04,0,0.0
425731,G_Brand_Callaway Apparel_Callaway Test_ EST-2773,Best Sellers - Exact,callaway apparel best sellers,Broad,2017-09-03,0,0.0
425732,G_Brand_Callaway Apparel_Callaway Test_ EST-2773,Best Sellers - Exact,Display Network Stats,,2017-09-04,0,0.0
