### Step 1: Set up

https://developers.google.com/analytics/devguides/reporting/core/v4/quickstart/service-py

In [1]:
#import sys
#!{sys.executable} -m pip install --upgrade pip
#!{sys.executable} -m pip install --upgrade oauth2client
#!{sys.executable} -m pip install --upgrade httplib2
#!{sys.executable} -m pip install --upgrade google-api-python-client
#!{sys.executable} -m pip install --upgrade gav4

In [2]:
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
from oauth2client import file, client, tools
import pandas as pd
import numpy as np
from apiclient.errors import HttpError

#### Create two versions of APIs - one for analytics api v3 & another for analytics reporting api v4

In [3]:
# analytics api v3

obj = lambda: None # this code allows for an empty class
auth = {"auth_host_name":'localhost', 'noauth_local_webserver':'store_true', 'auth_host_port':[8080, 8090], 'logging_level':'ERROR'}
for k, v in auth.items():
    setattr(obj, k, v)
    
# authorization boilerplate code
SCOPES = 'https://www.googleapis.com/auth/analytics.edit'
store = file.Storage('token_ga_v3.json')
creds = store.get()
# The following will give you a link if token.json does not exist, the link allows the user to give this app permission
if not creds or creds.invalid:
    flow = client.flow_from_clientsecrets('client_id.json', SCOPES)
    creds = tools.run_flow(flow, store, obj)

analytics_v3 = build('analytics', 'v3', credentials=creds)

In [6]:
auth = {"auth_host_name":'localhost', 'noauth_local_webserver':'store_true', 'auth_host_port':[8080, 8090], 'logging_level':'ERROR'}
for k, v in auth.items():
    setattr(obj, k, v)
    
# authorization boilerplate code
SCOPES = 'https://www.googleapis.com/auth/analytics.readonly'
store = file.Storage('token_ga_v4.json')
creds = store.get()
# The following will give you a link if token.json does not exist, the link allows the user to give this app permission
if not creds or creds.invalid:
    flow = client.flow_from_clientsecrets('client_id.json', SCOPES)
    creds = tools.run_flow(flow, store, obj)

analytics_v4 = build('analyticsreporting', 'v4', credentials=creds)

### Step 2: Create profile table

#### Get Account ID, Web property ID, and View (Profile) ID
(AccountID)&emsp;accountId&emsp;string&emsp;	Account ID to create the unsampled report for.<br>
(ProfileID)&emsp;id&emsp;	  string&emsp;	Web property ID to create the unsampled report for.<br>
(ViewID)&emsp;   internalWebPropertyId&emsp;	      string&emsp;	View (Profile) ID to create the unsampled report for.<br>

In [7]:
# Account ID
accounts = analytics_v3.management().accounts().list().execute()
#accounts          # list a whole bunch of accountid to select from

accountid_ = accounts.get('items')[0].get('id') #first one is what we need

In [8]:
# Web property IDs
webPropertyIds = analytics_v3.management().webproperties().list(accountId='10401800').execute()

# Web propertyId is a weird dataset, it's a bunch of dictionary encapsulated in a list then encapsulated in a dictionary
# You gotta take one layer at a time
webPropertyIds = webPropertyIds['items']

In [9]:
webPropertyIds

[{'id': 'UA-10401800-1',
  'kind': 'analytics#webproperty',
  'selfLink': 'https://www.googleapis.com/analytics/v3/management/accounts/10401800/webproperties/UA-10401800-1',
  'accountId': '10401800',
  'internalWebPropertyId': '22690962',
  'name': 'http://trader_websites.com',
  'websiteUrl': 'http://trader_websites.com',
  'level': 'PREMIUM',
  'profileCount': 64,
  'industryVertical': 'AUTOMOTIVE',
  'defaultProfileId': '20946992',
  'dataRetentionTtl': 'INDEFINITE',
  'dataRetentionResetOnNewActivity': True,
  'permissions': {'effective': ['COLLABORATE', 'EDIT', 'READ_AND_ANALYZE']},
  'created': '2009-08-26T21:16:53.000Z',
  'updated': '2019-09-10T21:08:20.382Z',
  'parentLink': {'type': 'analytics#account',
   'href': 'https://www.googleapis.com/analytics/v3/management/accounts/10401800'},
  'childLink': {'type': 'analytics#profiles',
   'href': 'https://www.googleapis.com/analytics/v3/management/accounts/10401800/webproperties/UA-10401800-1/profiles'}},
 {'id': 'UA-10401800-11'

In [61]:
print(f'WebPropertyIds contains {len(webPropertyIds)} items and each component is {type(webPropertyIds)}')

WebPropertyIds contains 30 items and each component is <class 'list'>


In [62]:
# Create a dataframe with profiles we need

nameId_list =[]
accountId_list = []
webPropertyId_list = []
profileId_list = []

for i in range(len(webPropertyIds)):
    nameId = webPropertyIds[i]['name']
    accountId = webPropertyIds[i]['accountId']
    webPropertyId = webPropertyIds[i]['id']
    profileId = webPropertyIds[i]['defaultProfileId']
    
    nameId_list.append(nameId)
    accountId_list.append(accountId)
    webPropertyId_list.append(webPropertyId)
    profileId_list.append(profileId)
    
# now turn these lists into a dataframe    
profiles = list(zip(nameId_list,accountId_list,webPropertyId_list,profileId_list))
profiles = pd.DataFrame(profiles,columns=['name','accountId','PropertyId','ViewId'])

# #Delete the rows that you don't need

profiles = profiles[profiles['name'].str.contains("Prod")].reset_index(drop=True)
profiles.drop([4,5],inplace=True)
profiles

Unnamed: 0,name,accountId,PropertyId,ViewId
0,autoTRADER - UA - Desktop - Prod,10401800,UA-10401800-37,112082609
1,autoTRADER - UA - iOS - Prod,10401800,UA-10401800-42,113592302
2,autoTRADER - UA - Android - Prod,10401800,UA-10401800-43,113588605
3,autoTRADER - UA - mDOT - Prod,10401800,UA-10401800-44,112044696


### Step 3: Create a report
But first, gotta define the configuration <br>

Dimensions
1. date
2. region
3. fuel
4. make
5. model
6. trim
7. refined keywords
<br>

Metrics
1. total search
2. unique search
3. refined search
4. external search

#### Specifying report query

In [63]:
startdate = '2019-08-11'
enddate = '2019-09-13'

dimensions_ = [{'name':'ga:country'}]
metrics_ = [{'expression':'ga:pageviews'},{'expression':'ga:avgTimeonPage'},{'expression':'ga:TimeonPage'}]

pagesize_ = '100000'

In [64]:
sample_request = {
    'viewId':'112082609',
    'dateRanges':{
        'startDate':startdate,
        'endDate':enddate},
    'dimensions':dimensions_,
    'metrics':metrics_,
    'pageSize':pagesize_,
    ''
}

response = analytics_v4.reports().batchGet(
      body={
        'reportRequests': sample_request
      }).execute()

#Parse the response of API
def parse_response(report):

    """Parses and prints the Analytics Reporting API V4 response"""
    #Initialize results, in list format because two dataframes might return
    result_list = []

    #Initialize empty data container for the two dateranges (if there are two that is)
    data_csv = []
    data_csv2 = []

    #Initialize header rows
    header_row = []

    #Get column headers, metric headers, and dimension headers.
    columnHeader = report.get('columnHeader', {})
    metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
    dimensionHeaders = columnHeader.get('dimensions', [])

    #Combine all of those headers into the header_row, which is in a list format
    for dheader in dimensionHeaders:
        header_row.append(dheader)
    for mheader in metricHeaders:
        header_row.append(mheader['name'])

    #Get data from each of the rows, and append them into a list
    rows = report.get('data', {}).get('rows', [])
    for row in rows:
        row_temp = []
        dimensions = row.get('dimensions', [])
        metrics = row.get('metrics', [])
        for d in dimensions:
            row_temp.append(d)
        for m in metrics[0]['values']:
            row_temp.append(m)
        data_csv.append(row_temp)

        #In case of a second date range, do the same thing for the second request
        if len(metrics) == 2:
            row_temp2 = []
            for d in dimensions:
                row_temp2.append(d)
            for m in metrics[1]['values']:
                row_temp2.append(m)
            data_csv2.append(row_temp2)

    #Putting those list formats into pandas dataframe, and append them into the final result
    result_df = pd.DataFrame(data_csv, columns=header_row)
    result_list.append(result_df)
    if data_csv2 != []:
        result_list.append(pandas.DataFrame(data_csv2, columns=header_row))

    return result_list

#### Show results

In [65]:
response_data = response.get('reports', [])[0]
print(parse_response(response_data)[0])

         ga:country ga:pageviews    ga:avgTimeonPage ga:TimeonPage
0         (not set)        46464   57.25120877926876     2107646.0
1       Afghanistan         1129  103.59617834394905       81323.0
2     Åland Islands            8                 0.0           0.0
3           Albania        25594   53.91740655723505     1244899.0
4           Algeria         5909   82.78172336727093      333362.0
..              ...          ...                 ...           ...
216         Vietnam         9273   94.60985810306198      633413.0
217  Western Sahara            8                 0.0           0.0
218           Yemen           90             124.875         999.0
219          Zambia          565   80.09485094850949       29555.0
220        Zimbabwe         1080   87.43426294820718       65838.0

[221 rows x 4 columns]


#### Check to see if the report is sampled

In [66]:
#Check to see if the result is sampled

# Source: https://developers.google.com/analytics/devguides/reporting/core/v4/migration#sampled_data

def SampleDataCheck(report):
    #insert "response_data" as a variable
    
    """Determines if the report contains sampled data.
    Args:
       report (Report): An Analytics Reporting API v4 response report.
    Returns:
        bool: True if the report contains sampled data.
    """
    
    report_data = report.get('data', {})
    sample_sizes = report_data.get('samplesReadCounts', [])
    sample_spaces = report_data.get('samplingSpaceSizes', [])
    
    try:
        print(f'the result uses {round(int(sample_sizes[0])/int(sample_spaces[0])*100,2)}% of the populated data') 
    except:
        print('The result is not sampled')

SampleDataCheck(response_data)

the result uses 12.22% of the populated data


### Step 4: Request an unsampled report if sampled
https://stackoverflow.com/questions/37183383/how-to-detect-sampling-in-google-analytics-api-v4
https://developers.google.com/analytics/devguides/config/mgmt/v3/mgmtReference/management/unsampledReports/insert

In [10]:
# Create a dataframe with profiles we need

nameId_list =[]
accountId_list = []
webPropertyId_list = []
profileId_list = []

for i in range(len(webPropertyIds)):
    nameId = webPropertyIds[i]['name']
    accountId = webPropertyIds[i]['accountId']
    webPropertyId = webPropertyIds[i]['id']
    profileId = webPropertyIds[i]['defaultProfileId']
    
    nameId_list.append(nameId)
    accountId_list.append(accountId)
    webPropertyId_list.append(webPropertyId)
    profileId_list.append(profileId)
    
# now turn these lists into a dataframe    
profiles = list(zip(nameId_list,accountId_list,webPropertyId_list,profileId_list))
profiles = pd.DataFrame(profiles,columns=['name','accountId','PropertyId','ViewId'])

# #Delete the rows that you don't need

profiles = profiles[profiles['name'].str.contains("Prod")].reset_index(drop=True)
profiles.drop([4,5],inplace=True)
profiles

Unnamed: 0,name,accountId,PropertyId,ViewId
0,autoTRADER - UA - Desktop - Prod,10401800,UA-10401800-37,112082609
1,autoTRADER - UA - iOS - Prod,10401800,UA-10401800-42,113592302
2,autoTRADER - UA - Android - Prod,10401800,UA-10401800-43,113588605
3,autoTRADER - UA - mDOT - Prod,10401800,UA-10401800-44,112044696


In [None]:
for index, row in df.iterrows():
    print(row['c1'], row['c2'])

for index, row in profiles.iterrows():
    accountid = '10401800'
    propertyid = profiles['PropertyId'].iloc[i]
    profileid = profiles['ViewId'].iloc[i]
    name_id = profiles['name'].iloc[i]
    title = f'SRP for {name_id}'

    reports = analytics_v3.management().unsampledReports().insert(
    accountId = accountid,
    webPropertyId = propertyid,
    profileId = profileid,
    body={
        'title':title,
        'start-date': '2019-01-01',
        'end-date':'yesterday',
        'metrics':'ga:pageviews',
        'dimensions':'ga:date,ga:'
    }
    )

reports = analytics_v3.management().unsampledReports().insert(
    accountId='10401800',
    webPropertyId='UA-10401800-37',
    profileId='112082609',
    body={
        'title': 'desktop search history',
        'start-date': '2019-10-20',
        'end-date': '2019-10-29',
        'metrics': 'ga:sessions,ga:users',
        'dimensions': 'ga:browser,ga:country',
        'segment':'gaid::cyy75VpBS2SF_Gg2n3bVUw',
        'filters':'ga:country==Canada',
        'sort':'-ga:sessions'
  }
).execute()

In [68]:
from apiclient.errors import HttpError

try:
    reports = analytics_v3.management().unsampledReports().insert(
        accountId='10401800',
        webPropertyId='UA-10401800-37',
        profileId='112082609',
        body={
            'title': 'desktop search history',
            'start-date': '2019-10-20',
            'end-date': '2019-10-29',
            'metrics': 'ga:sessions,ga:users',
            'dimensions': 'ga:browser,ga:country',
            'segment':'gaid::cyy75VpBS2SF_Gg2n3bVUw',
            'filters':'ga:country==Canada',
            'sort':'-ga:sessions'
      }
  ).execute()

except TypeError as error:
  # Handle errors in constructing a query.
  print ('There was an error in constructing your query : %s' % error)

except HttpError as error:
  # Handle API errors.
  print ('There was an API error : %s : %s' %
         (error.resp.status, error.resp.reason))    

# V3_REQUEST = {
#     "ids": "ga:90851825",
#     "start_date": "2014-11-01",
#     "end_date": "2014-11-30",
#     "metrics": "ga:sessions,ga:users",
#     "dimensions": "ga:source,ga:city",
#     "segment": "sessions::condition::ga:city!~not",
#     "sort": "-ga:sessions,ga:source",
#     "filters": "ga:source==google",
#     "max_results": "25",
#     "samplingLevel": "FASTER",

#### Refer to list of unsample reports

In [None]:
try:
    reports = analytics_v3.management().unsampledReports().list(
      accountId='10401800',
      webPropertyId='UA-10401800-37',
      profileId='112082609').execute()

except TypeError as error:
  # Handle errors in constructing a query.
  print ('There was an error in constructing your query : %s' % error)

except HttpError as error:
  # Handle API errors.
  print ('There was an API error : %s : %s' %
         (error.resp.status, error.resp.reason))

In [None]:
for report in reports.get('items', []):
    driveDownloadDetails = report.get('driveDownloadDetails', {})
    cloudStorageDownloadDetails = report.get('cloudStorageDownloadDetails', {})
    
    print ('Account Id            = %s' % report.get('accountId'))
    print ('Property Id           = %s' % report.get('webPropertyId'))
    print ('Report Id             = %s' % report.get('id'))
    print ('Report Title          = %s' % report.get('title'))
    print ('Report Kind           = %s' % report.get('kind'))
    #print ('Report start-date = %s' % report.get('start-date'))
    #print ('Report end-date = %s' % report.get('end-date'))
    #print ('Report metrics        = %s' % report.get('metrics'))
    #print ('Report dimensions = %s' % report.get('dimensions'))
    #print ('Report filters = %s' % report.get('filters'))
    print ('Report Status         = %s\n' % report.get('status'))
    #print ('Report downloadType = %s' % report.get('downloadType'))
    #print ('Drive Document Id = %s' % driveDownloadDetails.get('document Id'))
    #print ('Cloud Bucket Id = %s' % cloudStorageDownloadDetails.get('bucketId'))
    #print ('Cloud Object Id = %s' % cloudStorageDownloadDetails.get('objectId'))
    print ('Report Created = %s' % report.get('created'))
    print ('Report Updated = %s' % report.get('updated'))

#### Set parameters

In [None]:
# from pandas.io.json import json_normalize

# sample_request = {
#       'viewId': '112082609',
#       'dateRanges': {
#           'startDate': '2019-09-11',
#           'endDate': '2019-09-13'
#       },
#       'dimensions': [
#           {'name':'ga:country'}
#       ],
#       'metrics': [
#           {'expression':'ga:pageviews'},
#           {'expression':'ga:avgTimeonPage'},
#           {'expression':'ga:TimeonPage'}
#       ],
#     'pageSize': '100000'
#     }

# response = analytics_v4.reports().batchGet(
#       body={
#         'reportRequests': sample_request
#       }).execute()

#### Action Items
1. I have no problem seeing the list of unsampled reports but I cannot create a unsampled reports - find out why
> Maybe it has to do with authentication : currently I only have readonly, maybe it requires edit
>>../auth/drive.readonly	
>>../auth/analytics.readonly	
>>../auth/analytics.edit	
>>../auth/analytics

2. Create list of account id, property id, and view id<br>
3. Iterate unsampled reports creation through the list<br>
4. Delete unsampled reports that's stored in Google Drive because you don't want to clogg the drive<br>