<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# Google Analyics - Get traffic data
<a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/template.ipynb" target="_parent">
<img src="https://img.shields.io/badge/-Open%20in%20Naas-success?labelColor=000000&logo="/>
</a>

**Tags:** #gcp #ga #googleanalytics #webanalytic #trafficdata

## Input

### Import library

In [2]:
import numpy as np
import pandas as pd
from google.oauth2 import service_account
from apiclient.discovery import build

#import naas_drivers

### API Keys

## Model

### Function

In [15]:
body = {'reportRequests': [{'viewId': "236707574", 
                            'dateRanges': [{'startDate': '2021-01-01', 'endDate': '2021-12-31'}],
                            'metrics': [{'expression': 'ga:users'}, 
                                        {"expression": "ga:bounceRate"}],
                            'dimensions': [{'name': 'ga:yearMonth'}],
                            "pivots": [{"dimensions": [{"name": "ga:channelGrouping"}],
                                        "metrics": [{"expression": "ga:users"},
                                                    {"expression": "ga:bounceRate"}]
                                       }]
                          }]}

In [16]:
def format_summary(response):
    try:
        # create row index
        try: 
            row_index_names = response['reports'][0]['columnHeader']['dimensions']
            row_index = [ element['dimensions'] for element in response['reports'][0]['data']['rows'] ]
            row_index_named = pd.MultiIndex.from_arrays(np.transpose(np.array(row_index)), 
                                                        names = np.array(row_index_names))
        except:
            row_index_named = None
        
        # extract column names
        summary_column_names = [item['name'] for item in response['reports'][0]
                                ['columnHeader']['metricHeader']['metricHeaderEntries']]
    
        # extract table values
        summary_values = [element['metrics'][0]['values'] for element in response['reports'][0]['data']['rows']]
    
        # combine. I used type 'float' because default is object, and as far as I know, all values are numeric
        df = pd.DataFrame(data = np.array(summary_values), 
                          index = row_index_named, 
                          columns = summary_column_names).astype('float')
    
    except:
        df = pd.DataFrame()
        
    return df

def format_pivot(response):
    try:
        # extract table values
        pivot_values = [item['metrics'][0]['pivotValueRegions'][0]['values'] for item in response['reports'][0]
                        ['data']['rows']]
        
        # create column index
        top_header = [item['dimensionValues'] for item in response['reports'][0]
                      ['columnHeader']['metricHeader']['pivotHeaders'][0]['pivotHeaderEntries']]
        column_metrics = [item['metric']['name'] for item in response['reports'][0]
                          ['columnHeader']['metricHeader']['pivotHeaders'][0]['pivotHeaderEntries']]
        array = np.concatenate((np.array(top_header),
                                np.array(column_metrics).reshape((len(column_metrics),1))), 
                               axis = 1)
        column_index = pd.MultiIndex.from_arrays(np.transpose(array))
        
        # create row index
        try:
            row_index_names = response['reports'][0]['columnHeader']['dimensions']
            row_index = [ element['dimensions'] for element in response['reports'][0]['data']['rows'] ]
            row_index_named = pd.MultiIndex.from_arrays(np.transpose(np.array(row_index)), 
                                                        names = np.array(row_index_names))
        except: 
            row_index_named = None
        # combine into a dataframe
        df = pd.DataFrame(data = np.array(pivot_values), 
                          index = row_index_named, 
                          columns = column_index).astype('float')
    except:
        df = pd.DataFrame()
    return df

def format_report(response):
    summary = format_summary(response)
    pivot = format_pivot(response)
    if pivot.columns.nlevels == 2:
        summary.columns = [['']*len(summary.columns), summary.columns]
    
    return(pd.concat([summary, pivot], axis = 1))

In [17]:
credentials = service_account.Credentials.from_service_account_file("naas-335023-93670af3d1df.json", 
                                scopes = ['https://www.googleapis.com/auth/analytics.readonly'])
service = build('analyticsreporting', 'v4', credentials=credentials)
response = service.reports().batchGet(body=body).execute()


In [20]:
format_pivot(response)

Unnamed: 0_level_0,Direct,Direct,Organic Search,Organic Search,Referral,Referral,Social,Social
Unnamed: 0_level_1,ga:users,ga:bounceRate,ga:users,ga:bounceRate,ga:users,ga:bounceRate,ga:users,ga:bounceRate
ga:yearMonth,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
202102,112.0,61.744966,45.0,43.636364,4.0,80.0,26.0,73.076923
202103,75.0,58.333333,46.0,48.717949,4.0,50.0,2.0,25.0
202104,74.0,71.604938,27.0,62.068966,1.0,100.0,2.0,66.666667
202105,41.0,47.916667,31.0,70.27027,1.0,50.0,2.0,100.0
202106,38.0,51.111111,28.0,68.75,2.0,50.0,3.0,33.333333
202107,21.0,43.333333,17.0,47.368421,7.0,62.5,1.0,100.0
202108,23.0,68.0,26.0,54.83871,8.0,87.5,1.0,0.0
202109,50.0,74.545455,24.0,65.384615,3.0,20.0,2.0,100.0
202110,35.0,58.695652,35.0,56.756757,4.0,80.0,3.0,33.333333
202111,32.0,60.526316,32.0,61.111111,6.0,76.923077,0.0,0.0


In [21]:
df = format_report(response)

## Output

### Display result

In [22]:
df.reset_index()

Unnamed: 0_level_0,ga:yearMonth,Unnamed: 2_level_0,Unnamed: 3_level_0,Direct,Direct,Organic Search,Organic Search,Referral,Referral,Social,Social
Unnamed: 0_level_1,Unnamed: 1_level_1,ga:users,ga:bounceRate,ga:users,ga:bounceRate,ga:users,ga:bounceRate,ga:users,ga:bounceRate,ga:users,ga:bounceRate
0,202102,187.0,59.148936,112.0,61.744966,45.0,43.636364,4.0,80.0,26.0,73.076923
1,202103,127.0,53.608247,75.0,58.333333,46.0,48.717949,4.0,50.0,2.0,25.0
2,202104,104.0,69.298246,74.0,71.604938,27.0,62.068966,1.0,100.0,2.0,66.666667
3,202105,75.0,58.426966,41.0,47.916667,31.0,70.27027,1.0,50.0,2.0,100.0
4,202106,71.0,57.317073,38.0,51.111111,28.0,68.75,2.0,50.0,3.0,33.333333
5,202107,46.0,48.275862,21.0,43.333333,17.0,47.368421,7.0,62.5,1.0,100.0
6,202108,58.0,63.076923,23.0,68.0,26.0,54.83871,8.0,87.5,1.0,0.0
7,202109,79.0,69.318182,50.0,74.545455,24.0,65.384615,3.0,20.0,2.0,100.0
8,202110,77.0,58.241758,35.0,58.695652,35.0,56.756757,4.0,80.0,3.0,33.333333
9,202111,70.0,63.218391,32.0,60.526316,32.0,61.111111,6.0,76.923077,0.0,0.0
