In [2]:
from helper_functions import *

def google_analytics(df_conf_req, view_id, key_file_location, scopes):
    
    
    # Initializes an Analytics Reporting API V4 service object.
    try:
        credentials = ServiceAccountCredentials.from_json_keyfile_name(
                key_file_location, scopes)
    except(NameError, IOError, FileNotFoundError) as error:
        print('Could not read configuration file(s)')
        print(error)
        sys.exit(1)   

    # Build the service object.
    analytics = build('analyticsreporting', 'v4', credentials=credentials)
    
    # define empty pandas dataframe
    df_response = pd.DataFrame()
    
    # create lists for metrics and dimensions
    dim_lst = []
    met_lst = []
    start_date = df_conf_req.iat[0,2]
    end_date = df_conf_req.iat[1,2]
    for index, req in df_conf_req.iterrows():
        if not 'nan' in str(req['dimensions']):
            dim_lst.append(dict({'name':req['dimensions']}))
        if not 'nan' in str(req['metrics']):
            met_lst.append(dict({'expression':req['metrics']}))  
    
    met_batches = list()
    
    # split metric list into batches, since with a single API call, a max of 10 metrics can be requested
    i = 0
    while True:
        met_batch = list()
        stop = 0
        for index, met in enumerate(met_lst):
            if len(met_batch) < 10 and i < len(met_lst):
                res = met_batch.append(met_lst[i])
                i=i+1
        if len(met_batch) != 0:
            met_batches.append(met_batch)
        if i == len(met_lst):
            x = True
            break 

    # create empty dataframe for response segment
    
    print('Calling Google Analytics API...')
    
    try:
        # iterate over metric batches
        for index, batch in enumerate(met_batches):
            
            # define request body
            body={
            'reportRequests': [
            {
                'viewId': view_id,
                'dateRanges': [{'startDate': start_date, 'endDate': end_date}],
                'metrics': batch,
                'dimensions': dim_lst,

                'pageSize': 100000,
                'includeEmptyRows': True

            }]
            }    
            # make the call to Google Analytics API
            response = analytics.reports().batchGet(body=body).execute()
            
            df_res_part = pd.DataFrame()
            
            # deconstruct JSON response
            for report in response.get('reports', []):
                columnHeader = report.get('columnHeader', {})
                dimensionHeaders = columnHeader.get('dimensions', [])
                metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])

                # iterate over rows
                for data_row in report.get('data', {}).get('rows', []):
                    dimensions = data_row.get('dimensions', [])
                    dateRangeValues = data_row.get('metrics', [])
                    var_dict = {}

                    #iterate over dimensions
                    for header, dimension in zip(dimensionHeaders, dimensions):
                        if header not in df_res_part:
                            df_res_part[header] = pd.Series()
                            df_res_part.astype({header: 'object'}).dtypes
                        var_dict.update({header : str(dimension)})

                    # iterate over metrics
                    for i, values in enumerate(dateRangeValues):
                        for metricHeader, value in zip(metricHeaders, values.get('values')):
                            if metricHeader.get('name') not in df_res_part.columns:
                                df_res_part[metricHeader.get('name')] = pd.Series()
                            var_dict.update({metricHeader.get('name') : value})
                    df_res_part = df_res_part.append(var_dict, ignore_index=True)
        
            # if iteration is first and main dataframe is empty assign current response segment
            if df_response.empty:
                df_response = df_res_part
            # else do a left join and combine the two
            else:
                df_response = pd.merge(df_response, df_res_part,  how='inner', on=['ga:campaign', 'ga:adcontent', 'ga:channelGrouping', 'ga:keyword', 'ga:date', 'ga:sourceMedium'])
            row_count_part = len(df_res_part.index)
            row_count_full = len(df_response.index)
            print('Batch ' + str(index + 1))
            print(str(row_count_full) + ' row(s) received')
            df_response['ga_viewid'] = view_id

    except(http.HttpError) as error:
        print(' GA API error')
        print(error)  
        sys.exit(1)
            
    return df_response


if __name__ == '__main__':
    print('Starting...')
    try:
        # read configuration from excel
        df_conf_base = pd.read_excel('google_analytics_conf_1.xlsx', sheet_name='base', header=0)
        df_conf_req = pd.read_excel('google_analytics_conf_1.xlsx', sheet_name='parameters', header=0)
        if len(df_conf_base) == 0:
            raise KeyError('No base data provided (view_id)')
        if pd.isna(df_conf_req['dimensions'].iloc[0]):
            raise KeyError('One or more dimensions missing')    
        for index, row in df_conf_req.iterrows():
            if pd.isna(row['metrics']):
                raise KeyError('One or more metrics missing')        
            if pd.isna(row['date_range']) and index < 2:
                raise KeyError('No date range provided')
        db_config(filename = 'database.ini', section='postgresql')
        key_file_location = 'client_secrets.json'
        scopes = ['https://www.googleapis.com/auth/analytics.readonly']
    except(NameError, XLRDError, KeyError) as error:
        print('Error while reading configuration file(s)')
        print(error)
        sys.exit(1)      

    # iterate over view IDs
    for index, row in df_conf_base.iterrows():
        try:
            view_id = str(int(row['view_id']))
            print('View ID: ' + view_id)
        except(KeyError) as error:
            print('Could not read column')
            print(error)
            sys.exit(1)
        

        # call defined methods
        google_analytics_response = google_analytics(df_conf_req, view_id, key_file_location, scopes)


        t_name = 'google_analytics_new'
        pk_name = 'ga_new_pk'
        pk_lst = ['ga_viewID', 'ga_sourceMedium', 'ga_date', 'ga_campaign', 'ga_adcontent', 'ga_channelGrouping', 'ga_keyword']
        do_drop = False
        page_size = 100000
        src_col_name = ''
        is_pln_df = False

        postgre_write_main(google_analytics_response, t_name, pk_name, pk_lst, do_drop, page_size, src_col_name, is_pln_df)
        print('Success')
            
          

        
        

Starting...
View ID: 166655749
Calling Google Analytics API...


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\justinas.maciulis\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3325, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-2-fdfddfb9c1a7>", line 158, in <module>
    google_analytics_response = google_analytics(df_conf_req, view_id, key_file_location, scopes)
  File "<ipython-input-2-fdfddfb9c1a7>", line 101, in google_analytics
    df_res_part = df_res_part.append(var_dict, ignore_index=True)
  File "C:\Users\justinas.maciulis\Anaconda3\lib\site-packages\pandas\core\frame.py", line 6692, in append
    sort=sort)
  File "C:\Users\justinas.maciulis\Anaconda3\lib\site-packages\pandas\core\reshape\concat.py", line 229, in concat
    return op.get_result()
  File "C:\Users\justinas.maciulis\Anaconda3\lib\site-packages\pandas\core\reshape\concat.py", line 426, in get_result
    copy=self.copy)
  File "C:\Users\justinas.maciulis\Anaconda3\lib\site-packages\pandas\core\internals\mana

KeyboardInterrupt: 