In [1]:
import pandas as pd
import numpy as np
pd.options.display.max_rows = 6
import os
os.chdir('/Users/JayLiang/Desktop/Media Storm/PEI/Penguin/Keywords')
import argparse
import httplib2
import pprint
import time
import datetime
from io import StringIO

from apiclient.discovery import build
from oauth2client import GOOGLE_TOKEN_URI
from oauth2client.client import OAuth2Credentials
from googleapiclient.errors import HttpError


def create_credentials():
    """Create Google OAuth2 credentials.

    Args:
        client_id: Client id of a Google Cloud console project.
        client_secret: Client secret of a Google Cloud console project.
        refresh_token: A refresh token authorizing the Google Cloud console project
          to access the DS data of some Google user.
    Returns:
        OAuth2Credentials
    """
    return OAuth2Credentials(access_token=None,
                           client_id='549790627766-qnth4m8qvuimg87pnsp4b82lhte7dk5a.apps.googleusercontent.com',
                           client_secret='Vta4lQLOL49vVYvktkcPGRNb',
                           refresh_token='1/ab7pCGMu3K5AveG0UOUpQ0J08vCp6uM357O8qmoPDMs',
                           token_expiry=None,
                           token_uri="https://accounts.google.com/o/oauth2/token",
                           user_agent=None)

def get_service(credentials):
    """Set up a new DoubleClick Search service.

    Args:
        credentials: An OAuth2Credentials generated with create_credentials, or
        flows in the oatuh2client.client package.
    Returns:
        An authorized Doubleclicksearch serivce.
    """
    # Use the authorize() function of OAuth2Credentials to apply necessary credential
    # headers to all requests.
    http = credentials.authorize(http = httplib2.Http())

    # Construct the service object for the interacting with the DoubleClick Search API.
    service = build('doubleclicksearch', 'v2', http=http)
    return service

def poll_report(service, report_id):
    """Poll the API with the reportId until the report is ready, up to ten times.

    Args:
        service: An authorized Doublelcicksearch service.
        report_id: The ID DS has assigned to a report.
    Returns:
        pd.DataFrame, report file
    """
    for _ in range(10):
        try:
            request = service.reports().get(reportId=report_id)
            json_data = request.execute()
            if json_data['isReportReady']:
                pprint.pprint('The report is ready.')

                # For large reports, DS automatically fragments the report into multiple
                # files. The 'files' property in the JSON object that DS returns contains
                # the list of URLs for file fragment. To download a report, DS needs to
                # know the report ID and the index of a file fragment.
                report = pd.DataFrame()
                for i in range(len(json_data['files'])):
                    pprint.pprint('Downloading fragment ' + str(i) + ' for report ' + report_id)
                    report = report.append(download_files(service, report_id, str(i)), ignore_index = True) # See Download the report.
                return report

            else:
                pprint.pprint('Report is not ready. I will try again.')
                time.sleep(10)
        except HttpError as e:
            error = simplejson.loads(e.content)['error']['errors'][0]

            # See Response Codes
            pprint.pprint('HTTP code %d, reason %s' % (e.resp.status, error['reason']))
            break
        
def download_files(service, report_id, report_fragment):
    """Generate and print sample report.

    Args:
        service: An authorized Doublelcicksearch service.
        report_id: The ID DS has assigned to a report.
        report_fragment: The 0-based index of the file fragment from the files array.
    Returns:
        pd.DataFrame report file
    """
    request = service.reports().getFile(reportId=report_id, reportFragment=report_fragment)
    return pd.read_csv(StringIO(request.execute().decode('utf-8')))

def request_report(service, start_date, end_date, columns):
    """Request sample report and print the report ID that DS returns. See Set Up Your Application.

    Args:
        service: An authorized Doublelcicksearch service.
        columns: list of columns will be in the report
    Returns:
        The report id.
    """
    request = service.reports().request(
        body={
                "reportScope": {
                    "agencyId": "20100000000000932",
                    "advertiserId": "21700000001424088", # PEI - Original Penguin - Search
                },
                "reportType": "keyword",
                "columns": [{'columnName': column} for column in columns],   
                "timeRange" : {
                    "startDate" : start_date,
                    "endDate" : end_date
                    },
                
                #"filters": [
                #    {
                #        "column" : { "columnName": "keywordLabels" },
                #        "operator" : "containsElement",
                #        "values" : ["JubaNovTest",]
                #    }
                #],
                
                "downloadFormat": "csv",
                "maxRowsPerFile": 100000000,
                "statisticsCurrency": "agency",
                "verifySingleTimeZone": "false",
                "includeRemovedEntities": "false"
            }
    )
    json_data = request.execute()
    return json_data['id']

In [2]:
# download reports
creds = create_credentials()

service = get_service(creds)

end_date = "2017-08-28"
start_date = "2017-07-18"

print(start_date, end_date)
REPORTID_nonHVA = request_report(service, start_date, end_date, 
                                 ['campaign', 'adGroup', 'keywordText', 'keywordMatchType', 'status', 
                                  'effectiveKeywordMaxCpc', 'keywordMaxCpc', 'topOfPageBidCurrent',
                                  'topOfPageBidAvg', 'impr', 'clicks', 'cost', 
                                  'avgCpc', 'avgPos', 'dfaRevenue'])
REPORTID_HVA = request_report(service, start_date, end_date, 
                              ['campaign', 'adGroup', 'keywordText', 'keywordMatchType', 
                               'floodlightActivity', 'dfaActions'])

non_hva = poll_report(service, REPORTID_nonHVA)

2017-07-18 2017-08-28
'Report is not ready. I will try again.'
'Report is not ready. I will try again.'
'The report is ready.'
'Downloading fragment 0 for report AAAnw85C8aWD0905'


In [14]:
non_hva

Unnamed: 0,campaign,adGroup,keywordText,keywordMatchType,status,effectiveKeywordMaxCpc,keywordMaxCpc,topOfPageBidCurrent,topOfPageBidAvg,impr,clicks,cost,avgCpc,avgPos,dfaRevenue
0,Y_Brand_Original Penguin_Tops_EST-2775,Shirts - Exact,Original Penguin oxford shirts,Exact,Active,3.50,3.5,,,0,0,0.0,,,0.0
1,Y_Brand_Original Penguin_Tops_EST-2775,Shirts - Exact,Original Penguin plaid shirts,Exact,Active,3.50,3.5,,,0,0,0.0,,,0.0
2,Y_Brand_Original Penguin_Tops_EST-2775,Shirts - Exact,Original Penguin dobby shirts,Exact,Active,3.50,3.5,,,0,0,0.0,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399091,G_Non-Brand_Original Penguin_Fragrances_EST-2775,Iconic Blend - Exact,Display Network Stats,,Active,0.05,,,,0,0,0.0,,,0.0
399092,G_Non-Brand_Original Penguin_Fragrances_EST-2775,Iconic Blend - BMM,Display Network Stats,,Active,0.05,,,,0,0,0.0,,,0.0
399093,G_Non-Brand_Original Penguin_Fragrances_EST-2775,Premium Blend - BMM,Display Network Stats,,Active,0.05,,,,0,0,0.0,,,0.0


In [36]:
# all_campaign=list(non_hva['campaign'].unique())
# brand_camgaign=[campaign for campaign in all_campaign if ('_Brand' in campaign)]
# brand_camgaign
# 252 unique campaigns

In [23]:
Campaign_Dic={'Y_Non-Brand_Men':'Y_Non-Brand_Original Penguin_Mens_EST-2775',
                  'G_Non-Brand_Men':'G_Non-Brand_Original Penguin_Mens_EST-2775',
                  'B_Non-Brand_Men':'B_Non-Brand_Original Penguin_Mens_EST-2775',
                  'Y_Non-Brand_Big&Tall':'Y_Non-Brand_Original Penguin_Big & Tall_EST-2775',
                  'G_Non-Brand_Big&Tall':'G_Non-Brand_Original Penguin_Big & Tall_EST-2775',
                  'B_Non-Brand_Big&Tall':'B_Non-Brand_Original Penguin_Big & Tall_EST-2775'}
Non_HVA_In=non_hva[non_hva['campaign'].isin(Campaign_Dic.values())]


In [26]:
d=dict()
for campaign,group in Non_HVA_In.groupby('campaign'):
    d[campaign]=group
KeyWords=dict()
for i in list(d.keys()):
    KeyWords[i]=d[i]['keywordText'].unique()
    
KeyWords_DF=pd.DataFrame.from_dict(KeyWords,orient='index').T
KeyWords_DF.to_excel('KeyWords_DF.xlsx',index=False)

In [173]:
len(non_hva[non_hva['campaign'].str.contains('G_Brand_Original')]['keywordText'].unique())

566

In [176]:
non_hva

Unnamed: 0,campaign,adGroup,keywordText,keywordMatchType,status,effectiveKeywordMaxCpc,keywordMaxCpc,topOfPageBidCurrent,topOfPageBidAvg,impr,clicks,cost,avgCpc,avgPos,dfaRevenue,Brand_Campaign
0,OPG_US || NB | Holiday,OPG || NB | Holiday >> Christmas Presents > Men,christmas present for men,Phrase,Active,2.25,2.25,,,0,0,0.0,,,0.0,OPG_US || NB | H
1,OPG_US || NB | Holiday,OPG || NB | Holiday >> Christmas Presents > Men,mens presents for christmas,Phrase,Active,1.75,1.75,,,0,0,0.0,,,0.0,OPG_US || NB | H
2,OPG_US || NB | Holiday,OPG || NB | Holiday >> Christmas Presents > Men,man presents for christmas,Phrase,Active,1.50,1.50,,,0,0,0.0,,,0.0,OPG_US || NB | H
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399091,G_Non-Brand_Original Penguin_Fragrances_EST-2775,Iconic Blend - Exact,Display Network Stats,,Active,0.05,,,,0,0,0.0,,,0.0,G_Non-Brand_Orig
399092,G_Non-Brand_Original Penguin_Fragrances_EST-2775,Iconic Blend - BMM,Display Network Stats,,Active,0.05,,,,0,0,0.0,,,0.0,G_Non-Brand_Orig
399093,G_Non-Brand_Original Penguin_Fragrances_EST-2775,Premium Blend - BMM,Display Network Stats,,Active,0.05,,,,0,0,0.0,,,0.0,G_Non-Brand_Orig
