<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# Google Analyics - Get traffic data
<a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/template.ipynb" target="_parent">
<img src="https://img.shields.io/badge/-Open%20in%20Naas-success?labelColor=000000&logo="/>
</a>

**Tags:** #gcp #ga #googleanalytics #webanalytic #trafficdata

## Input

### Import library

In [1]:
import numpy as np
import pandas as pd
from google.oauth2 import service_account
from apiclient.discovery import build

#import naas_drivers

### API Keys

## Model

### Function

In [2]:
metrics = ["ga:users", "ga:bounceRate",  "ga:avgTimeOnPage", "ga:uniquePageviews"]
metrics = [{"expression": m} for m in metrics]

In [3]:
metrics

[{'expression': 'ga:users'},
 {'expression': 'ga:bounceRate'},
 {'expression': 'ga:avgTimeOnPage'},
 {'expression': 'ga:uniquePageviews'}]

In [63]:
body_channel = {'reportRequests': [{'viewId': "236707574", 
                            'dateRanges': [{'startDate': '2021-01-01', 'endDate': '2021-12-31'}],
                            'metrics': metrics,
                            'dimensions': [{'name': 'ga:yearMonth'}],
                            "pivots": [{"dimensions": [{"name": "ga:channelGrouping"}],
                                        "metrics": metrics
                                       }]
                          }]}

body_country = {'reportRequests': [{'viewId': "236707574", 
                            'dateRanges': [{'startDate': '2021-01-01', 'endDate': '2021-12-31'}],
                            'metrics': [{"expression": "ga:sessions"}],
                            'dimensions': [{'name': 'ga:year'}],
                            "pivots": [{"dimensions": [{"name": "ga:country"}],
                                        "metrics": [{"expression": "ga:sessions"}]
                                       }]
                          }]}


body_pages = {'reportRequests': [{'viewId': "236707574", 
                            'dateRanges': [{'startDate': '2021-01-01', 'endDate': '2021-12-31'}],
                            'metrics': [{"expression": "ga:pageviews"}],
                            'dimensions': [{'name': 'ga:year'}],
                            "pivots": [{"dimensions": [{"name": "ga:pagePath"}],
                                        "metrics": [{"expression": "ga:pageviews"}]
                                       }]
                          }]}


In [5]:
def format_summary(response):
    try:
        # create row index
        try: 
            row_index_names = response['reports'][0]['columnHeader']['dimensions']
            row_index = [ element['dimensions'] for element in response['reports'][0]['data']['rows'] ]
            row_index_named = pd.MultiIndex.from_arrays(np.transpose(np.array(row_index)), 
                                                        names = np.array(row_index_names))
        except:
            row_index_named = None
        
        # extract column names
        summary_column_names = [item['name'] for item in response['reports'][0]
                                ['columnHeader']['metricHeader']['metricHeaderEntries']]
    
        # extract table values
        summary_values = [element['metrics'][0]['values'] for element in response['reports'][0]['data']['rows']]
    
        # combine. I used type 'float' because default is object, and as far as I know, all values are numeric
        df = pd.DataFrame(data = np.array(summary_values), 
                          index = row_index_named, 
                          columns = summary_column_names).astype('float')
    
    except:
        df = pd.DataFrame()
        
    return df

def format_pivot(response):
    try:
        # extract table values
        pivot_values = [item['metrics'][0]['pivotValueRegions'][0]['values'] for item in response['reports'][0]
                        ['data']['rows']]
        
        # create column index
        top_header = [item['dimensionValues'] for item in response['reports'][0]
                      ['columnHeader']['metricHeader']['pivotHeaders'][0]['pivotHeaderEntries']]
        column_metrics = [item['metric']['name'] for item in response['reports'][0]
                          ['columnHeader']['metricHeader']['pivotHeaders'][0]['pivotHeaderEntries']]
        array = np.concatenate((np.array(top_header),
                                np.array(column_metrics).reshape((len(column_metrics),1))), 
                               axis = 1)
        column_index = pd.MultiIndex.from_arrays(np.transpose(array))
        
        # create row index
        try:
            row_index_names = response['reports'][0]['columnHeader']['dimensions']
            row_index = [ element['dimensions'] for element in response['reports'][0]['data']['rows'] ]
            row_index_named = pd.MultiIndex.from_arrays(np.transpose(np.array(row_index)), 
                                                        names = np.array(row_index_names))
        except: 
            row_index_named = None
        # combine into a dataframe
        df = pd.DataFrame(data = np.array(pivot_values), 
                          index = row_index_named, 
                          columns = column_index).astype('float')
    except:
        df = pd.DataFrame()
    return df

def format_report(response):
    summary = format_summary(response)
    pivot = format_pivot(response)
    if pivot.columns.nlevels == 2:
        summary.columns = [['']*len(summary.columns), summary.columns]
    
    return(pd.concat([summary, pivot], axis = 1))

In [64]:
credentials = service_account.Credentials.from_service_account_file("naas-335023-93670af3d1df.json", 
                                scopes = ['https://www.googleapis.com/auth/analytics.readonly'])
service = build('analyticsreporting', 'v4', credentials=credentials)
response = service.reports().batchGet(body=body_country).execute()


In [99]:
country = format_pivot(response)
country.columns = [c[0] for c in country.columns]
#country.reset_index(inplace=True)

In [93]:
country.columns[1:]

Index(['United States', 'France', 'China', 'Pakistan', 'Germany', 'India',
       'United Kingdom', 'Ukraine', 'Philippines'],
      dtype='object')

In [100]:
country = country.T
country.reset_index(inplace=True)
country.columns = ["country", "session"]

In [101]:
country

Unnamed: 0,country,session
0,Canada,685.0
1,United States,197.0
2,France,64.0
3,China,27.0
4,Pakistan,17.0
5,Germany,13.0
6,India,13.0
7,United Kingdom,13.0
8,Ukraine,10.0
9,Philippines,9.0


In [73]:
pd.DataFrame({key[0]: value for key, value in country.to_dict("records")[0].items()}, index=[0])

Unnamed: 0,Canada,United States,France,China,Pakistan,Germany,India,United Kingdom,Ukraine,Philippines
0,685.0,197.0,64.0,27.0,17.0,13.0,13.0,13.0,10.0,9.0


In [20]:
country.columns = [col[0] for col in country.columns]

In [62]:
[{key[0]: {key[1]: value}} for key, value in country.to_dict("records")[0].items()]

[{'Canada': {'ga:users': 501.0}},
 {'Canada': {'ga:sessions': 685.0}},
 {'United States': {'ga:users': 159.0}},
 {'United States': {'ga:sessions': 197.0}},
 {'France': {'ga:users': 53.0}},
 {'France': {'ga:sessions': 64.0}},
 {'China': {'ga:users': 27.0}},
 {'China': {'ga:sessions': 27.0}},
 {'India': {'ga:users': 13.0}},
 {'India': {'ga:sessions': 13.0}},
 {'Germany': {'ga:users': 11.0}},
 {'Germany': {'ga:sessions': 13.0}},
 {'Pakistan': {'ga:users': 10.0}},
 {'Pakistan': {'ga:sessions': 17.0}},
 {'Ukraine': {'ga:users': 10.0}},
 {'Ukraine': {'ga:sessions': 10.0}},
 {'Philippines': {'ga:users': 8.0}},
 {'Philippines': {'ga:sessions': 9.0}},
 {'Australia': {'ga:users': 7.0}},
 {'Australia': {'ga:sessions': 7.0}}]

In [51]:
 pd.DataFrame([[c, m, v] for (c, m), v in country.to_dict("records")[0].items()]).pivot(index=[0], columns=[1, 2], values=[2])

Unnamed: 0_level_0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
1,ga:users,ga:sessions,ga:users,ga:sessions,ga:users,ga:sessions,ga:users,ga:sessions,ga:users,ga:sessions,ga:users,ga:users,ga:sessions,ga:sessions,ga:users,ga:sessions,ga:users,ga:sessions
2,501.0,685.0,159.0,197.0,53.0,64.0,27.0,27.0,13.0,13.0,11.0,10.0,17.0,10.0,8.0,9.0,7.0,7.0
0,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
Australia,,,,,,,,,,,,,,,,,7.0,7.0
Canada,501.0,685.0,,,,,,,,,,,,,,,,
China,,,,,,,27.0,27.0,,,,,,,,,,
France,,,,,53.0,64.0,,,,,,,,,,,,
Germany,,,,,,,,,,13.0,11.0,,,,,,,
India,,,,,,,,,13.0,13.0,,,,,,,,
Pakistan,,,,,,,,,,,,10.0,17.0,,,,,
Philippines,,,,,,,,,,,,,,,8.0,9.0,,
Ukraine,,,,,,,,,,,,10.0,,10.0,,,,
United States,,,159.0,197.0,,,,,,,,,,,,,,


In [None]:
{}

## Output

### Display result

In [None]:
df.reset_index()