In [None]:
"""Hello Analytics Reporting API V4."""
#import everything upwards from home dir
from __future__ import absolute_import
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
from configparser import ConfigParser
import psycopg2
import argparse
import six
import sys
import google.ads.google_ads.client
import pandas as pd
import re
import numpy as np
import time

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
KEY_FILE_LOCATION = 'client_secrets.json'


VIEW_ID = '6485278'
VIEW_ID = '166655749'
#VIEW_ID = '83024411'
# session count, source / medium, goals


def initialize_analyticsreporting():
    """Initializes an Analytics Reporting API V4 service object.

    Returns:
        An authorized Analytics Reporting API V4 service object.
    """
    credentials = ServiceAccountCredentials.from_json_keyfile_name(
            KEY_FILE_LOCATION, SCOPES)

    # Build the service object.
    analytics = build('analyticsreporting', 'v4', credentials=credentials)

    return analytics

def db_config(filename='database.ini', section='postgresql'):
    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read(filename)
 
    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))
 
    return db

def get_report(analytics, df_conf_base, df_conf_req):
    """Queries the Analytics Reporting API V4.

    Args:
        analytics: An authorized Analytics Reporting API V4 service object.
    Returns:
        The Analytics Reporting API V4 response.
    """
    
    df_response = pd.DataFrame()
    
    dim_lst = []
    met_lst = []
    start_date = df_conf_req.iat[0,2]
    end_date = df_conf_req.iat[1,2]
    for index, req in df_conf_req.iterrows():
        if not 'nan' in str(req['dimensions']):
            dim_lst.append(dict({'name':req['dimensions']}))
        if not 'nan' in str(req['metrics']):
            met_lst.append(dict({'expression':req['metrics']}))  
    
    dim_met_lst = dim_lst +  met_lst
    
    met_batches = list()
    
    i = 0
    while True:
        met_batch = list()
        stop = 0
        for index, met in enumerate(met_lst):
            if len(met_batch) < 9 and i < len(met_lst):
                res = met_batch.append(met_lst[i])
                i=i+1
        if len(met_batch) != 0:
            met_batches.append(met_batch)
        if i == len(met_lst):
            x = True
            break 
            
#     met_batches_fix = {}
#     for batch in met_batches:
#         if "{'expression': 'ga:sessions'}" not in str(batch):
#             print(met_batches)
#             print(met_batches_fix)
#             #batch = dict(batch[0])
#             print(batch)
#             batch = dict(batch[0]).update({'expression' : 'ga:sessions'})
#         met_batches_fix = met_batches_fix.update(batch)
    
    
    
#     print(met_batches)
#     print(met_batches_fix)
    df_res_part = pd.DataFrame()
    #met_batches = met_batches_fix
    for index, batch in enumerate(met_batches):
        #time.sleep(3)
        body1={
        'reportRequests': [
        {
            'viewId': VIEW_ID,
            'dateRanges': [{'startDate': start_date, 'endDate': end_date}],
            'metrics': batch,
            'dimensions': dim_lst,
            'pageSize': 10000

        }]
    }    
        #print(batch)
        #print(body1)
        response = analytics.reports().batchGet(body=body1).execute()
        #print(response)
        for report in response.get('reports', []):
            columnHeader = report.get('columnHeader', {})
            dimensionHeaders = columnHeader.get('dimensions', [])
            metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
            
#             print(batch)
#             print('shit')
#             print(report.get('data', {}).get('rows', []))
#             print(response)
            
            for data_row in report.get('data', {}).get('rows', []):
                dimensions = data_row.get('dimensions', [])
                dateRangeValues = data_row.get('metrics', [])
                var_dict = {}
#                 print(batch)
#                 print('shit')
                for header, dimension in zip(dimensionHeaders, dimensions):
                    if header not in df_res_part:
                        df_res_part[header] = pd.Series()
                        df_res_part.astype({header: 'object'}).dtypes
                    var_dict.update({header : str(dimension)})
                    #print(var_dict)
                for i, values in enumerate(dateRangeValues):
                    for metricHeader, value in zip(metricHeaders, values.get('values')):
                        if metricHeader.get('name') not in df_res_part.columns:
                            df_res_part[metricHeader.get('name')] = pd.Series()
                        var_dict.update({metricHeader.get('name') : value})
                #print(var_dict)
                #print(var_dict)
                df_res_part = df_res_part.append(var_dict, ignore_index=True)
        #print(df_res_part)
        row_count_part = len(df_res_part.index)
        #print('DF_part_row_count: ' + str(row_count_part))
    if df_response.empty:
        df_response = df_res_part
    else:
        df_response = pd.merge(df_response, df_res_part,  how='left', left_on=['ga:campaign', 'ga:adcontent', 'ga:channelGrouping', 'ga:keyword', 'ga:date'], right_on = ['ga:campaign', 'ga:adcontent', 'ga:channelGrouping', 'ga:keyword', 'ga:date'])
    #new_df = pd.merge(A_df, B_df,  how='left', left_on=['A_c1','c2'], right_on = ['B_c1','c2'])
    row_count_full = len(df_response.index)
    #print('DF_full_row_count: ' + str(row_count_full))
    return df_response, dim_met_lst
    

if __name__ == '__main__':
    df_conf_base = pd.read_excel('google_analytics_conf_1.xlsx', sheet_name='base', header=0)
    df_conf_req = pd.read_excel('google_analytics_conf_1.xlsx', sheet_name='parameters', header=0)                    
    analytics = initialize_analyticsreporting()
    google_analytics_response = get_report(analytics, df_conf_base, df_conf_req)
    df_response = google_analytics_response[0]
    dim_met_lst = google_analytics_response[1]
    db_config(filename = 'database.ini')
    postgre_write(df_response, dim_met_lst)

In [153]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
response

(                                        ga:sourceMedium  \
 0                                     (direct) / (none)   
 1                                     (direct) / (none)   
 2                                     (direct) / (none)   
 3                                     (direct) / (none)   
 4                                     (direct) / (none)   
 5                                     (direct) / (none)   
 6                                     (direct) / (none)   
 7                   15min.lt / 300x600_All_environement   
 8                   15min.lt / 300x600_All_environement   
 9                   15min.lt / 300x600_All_environement   
 10                  15min.lt / 300x600_All_environement   
 11                  15min.lt / 300x600_All_environement   
 12                                  15min.lt / referral   
 13                                  15min.lt / referral   
 14                                  15min.lt / referral   
 15                                  15m

In [None]:
def db_config(filename='database.ini', section='postgresql'):
    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read(filename)
 
    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))
 
    return db

def postgre_write(df_response, dim_lst):
    #Connect to the PostgreSQL database server
    conn = None
    # read connection parameters
    params = db_config()
    
    # connect to the PostgreSQL server
    print('Connecting to the PostgreSQL database...')
    conn = psycopg2.connect(**params)
 
    try:
        # create a cursor
        cur = conn.cursor()

       # execute db write statements
        print('PostgreSQL database version:')
        #cur.execute("DROP TABLE google_analytics_temp;")
        cur.execute("CREATE TABLE IF NOT EXISTS google_analytics_temp("
                       "ga_sourceMedium VARCHAR(200), "
                       "ga_date DATE, "
                       "ga_campaign VARCHAR(150), "
                       "ga_adcontent VARCHAR(150), "
                       "ga_channelGrouping VARCHAR(150), "
                       "ga_keyword VARCHAR(150), "
                       "CONSTRAINT table_analytics_pk PRIMARY KEY (ga_sourceMedium, ga_date, ga_campaign, ga_adcontent, ga_channelGrouping, ga_keyword));")
        conn.commit()    
        
        for i, row in df_response.iterrows():
            ins_query_dim =''
            ins_query_val = ''
            col_dtype = ''
            print(row.index)
            for idx, dim in enumerate(list(row.index)):
                db_dim = dim
                db_dim = dim.replace(':', '_')
                print(dim)
#                 if '.' in dim:
#                     db_dim = dim.replace('.', '_')
# #                 print(row[dim])
# #                 print(str(df_response[dim].dtype))
#                 if 'campaign.id' == dim:
#                     col_dtype = 'bigint NOT NULL'
#                     quer = (f"ALTER TABLE google_analytics_temp "
#                             f"ADD COLUMN IF NOT EXISTS {db_dim} {col_dtype};")
#                     ins_query_dim = ins_query_dim + db_dim + ','
#                     ins_query_val = ins_query_val + str(row[dim]) + ','
#                 elif 'date' in str(dim):
#                     col_dtype = 'date'
#                     cur.execute(f"ALTER TABLE google_analytics_temp "
#                                 f"ADD COLUMN IF NOT EXISTS {db_dim} {col_dtype};")
#                     ins_query_dim = ins_query_dim + db_dim + ','
#                     ins_query_val = ins_query_val + "'" + str(row[dim]) + "',"
#                 elif 'int' in str(df_response[dim].dtype):
#                     col_dtype = 'bigint'
#                     cur.execute(f"ALTER TABLE google_analytics_temp "
#                                 f"ADD COLUMN IF NOT EXISTS {db_dim} {col_dtype};")
#                     ins_query_dim = ins_query_dim + db_dim + ','
#                     ins_query_val = ins_query_val + "'" + str(row[dim]) + "',"
#                 elif 'float' in str(df_response[dim].dtype):
#                     col_dtype = 'real'
#                     cur.execute(f"ALTER TABLE google_analytics_temp "
#                                 f"ADD COLUMN IF NOT EXISTS {db_dim} {col_dtype};")
#                     ins_query_dim = ins_query_dim + db_dim + ','
#                     ins_query_val = ins_query_val + "'" + str(row[dim]) + "',"                    
#                 else:
                col_dtype = 'varchar (150)'
                cur.execute(f"ALTER TABLE google_analytics_temp "
                            f"ADD COLUMN IF NOT EXISTS {db_dim} {col_dtype};")
                ins_query_dim = ins_query_dim + db_dim + ','
                ins_query_val = ins_query_val + "'" + str(row[dim]) + "',"
            ins_query_dim = ins_query_dim[:-1]
            ins_query_val = ins_query_val[:-1]

        
            print(ins_query_dim)
            print(ins_query_val)

            print('crap')
    #         print(ins_query_val)
            cur.execute(f"INSERT INTO google_analytics_temp ({ins_query_dim}) "
                        f"VALUES ({ins_query_val}) "
                        f"ON CONFLICT (ga_sourceMedium, ga_date, ga_campaign, ga_adcontent, ga_channelGrouping, ga_keyword) " 
                            f"DO "
                                f"UPDATE "
                                f"SET ga_sessions = {row['ga_sessions']}; ")
            conn.commit()
       
       # close the communication with the PostgreSQL
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
    print('Database connection closed.')

In [26]:
import json
data = json.load(response)

AttributeError: 'dict' object has no attribute 'read'

In [56]:
jupyter notebook --NotebookApp.iopub_data_rate_limit=1.0e10

SyntaxError: invalid syntax (<ipython-input-56-d3bfc748244a>, line 1)

In [181]:
shit = {'expression': 'ga:goal20Completions'}

In [189]:
ass = list(shit.values())[0]

In [190]:
print(ass)

ga:goal20Completions
