In [28]:
###### import everything upwards from home dir
from __future__ import absolute_import
import psycopg2
import psycopg2.extras
from configparser import ConfigParser
import argparse
import six
import sys
import pandas as pd
import re
import datetime
import numpy as np
import json
import yaml
from xlrd import XLRDError
from facebook_business.api import FacebookAdsApi
from facebook_business.api import FacebookRequestError
from facebook_business.adobjects.user import User
from facebook_business.adobjects.adaccount import AdAccount
from facebook_business.adobjects.campaign import Campaign
from facebook_business.adobjects.adset import AdSet
from facebookads.adobjects.adsinsights import AdsInsights
import threading
from ast import literal_eval
from datetime import datetime, timedelta
import io
import helper_functions
import google.ads.google_ads.client
from googleapiclient.discovery import build
from googleapiclient import http
from oauth2client.service_account import ServiceAccountCredentials

# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
# pd.set_option('display.width', None)

def facebook_marketing_api(account_id, df_conf_req, access_token, period): 
    
    # create a list containing dimensions
    breakdown_lst_call = []
    dim_lst_call = []
    for index, row in df_conf_req.iterrows():
        if row['dimensions'] in ('publisher_platform', 'platform_position', 'action_attribution_windows'):
            breakdown_lst_call.append(row['dimensions']) 
        else:
            dim_lst_call.append(row['dimensions'])
            
    param_set_lst = {
        'time_range': f"{period}",
        #'date_preset': f"{period}",
        'level': 'adset',
        'filtering': [],
        'action_attribution_windows': "['1d_view', '7d_view', '28d_view', '1d_click', '7d_click', '28d_click', 'default']",
        'breakdowns': f"{breakdown_lst_call}",
        'time_increment': {1}
    }
    
    print('Calling Facebook Marketing API...')
    FacebookAdsApi.init(access_token=access_token)
    response = AdAccount(account_id).get_insights(fields = dim_lst_call, params = param_set_lst)
    df_response = pd.DataFrame()
    df_response_action = pd.DataFrame()
    row_count = 0

    var_lst = []
    var_lst_action = []

    for index, row in enumerate(response):
        row_count = index
        row_dict = vars(row)['_data']
        var_dict_core = {}
        for key in row_dict:
            if key in ('account_id', 'campaign_id', 'adset_id', 'date_start', 'date_stop', 'objective', 'publisher_platform', 'platform_position'):
                var_dict_core.update({key : row_dict[key]})
        var_dict = {}
        var_dict_action = {}
        for key in row_dict:
            if key == 'action_values':
                action_values = row_dict['action_values']
                if action_values:
                    for key in action_values:
                        var_dict_action.update(key)
            else:
                var_dict.update({key : row_dict[key]})
        var_lst.append(var_dict)
        if 'action_values' in str(row_dict.keys()):
            if row_dict['action_values']:
                var_dict_action.update(var_dict_core)
                var_lst_action.append(var_dict_action)
    if var_lst:
        df_response = df_response.append(var_lst, ignore_index=True)
    if var_lst_action:
        df_response_action = df_response_action.append(var_lst_action, ignore_index=True)
                          
    print(str(row_count + 1) + ' row(s) received')
    return df_response, df_response_action

def db_config(filename='database.ini', section='postgresql'):
    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read(filename)
 
    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename)) 
    return db

def create_table(tab_cr_str, conn, cur):
    cur.execute(tab_cr_str)
    conn.commit()
    
def tab_creation_str(t_name, pk_name, pk_lst, dtype_dict):
    result = ''
    columns = ''
    for key in dtype_dict:
        columns = columns + key + ' ' + dtype_dict[key] + ', '
    query_str = ("CREATE TABLE IF NOT EXISTS " + t_name + " " + 
                "(" + columns + "CONSTRAINT " 
               + pk_name + " PRIMARY KEY (" + ', '.join(pk_lst) + "));")
    return query_str, dtype_dict

def init_conn():
    conn = None
    params = db_config()
    conn = psycopg2.connect(**params)
    return conn

def rplc_nan(df_response):
    df_response = df_response.replace({pd.np.nan: None})
    return df_response

def end_conn(conn):
    conn.close()
    
def init_cur(conn):
    cur = conn.cursor()
    return cur
    
def add_column(t_name, db_dim, col_dtype, cur):
    cur.execute(f"ALTER TABLE {t_name} "
                f"ADD COLUMN IF NOT EXISTS {db_dim} {col_dtype};")

def drop_table(t_name, do_drop, cur):
    if do_drop:
        cur.execute(f"DROP TABLE IF EXISTS {t_name};")
    
def add_ts(dataframe):
    dataframe.insert(0, 'creation_ts', datetime.now())
    dataframe.insert(0, 'last_updated_ts', datetime.now())
    return dataframe 

def get_pln_no(df_response, src_col_name, is_in_df):
    if is_in_df:
        df_response.insert(0, 'pln_no', df_response[src_col_name].apply(pln_no_reg))
    return df_response
    
def pln_no_reg(campaign_name):
    return str(re.findall('(PLN?[\-]\d{1,4}?[\-]\d{1,4})', campaign_name))[2:-2]

def rename_col(col_name):
    if '.' in col_name:
        return col_name.replace('.', '_')
    elif col_name[0].isdigit():
        return ('_' + col_name)
    else:
        return col_name
    
def get_types(df_response):   
    col_names = df_response.head(0).to_dict()
    col_dtypes = {}
    dtype = ''
    for key in col_names:
            for i, row in df_response.iterrows():
                try:
                    dtype = ''
                    if re.match('\d{4}\-\d{2}\-\d{2}\s+\d{2}\:\d{2}\:\d{2}\.\d+', str(row[key])):
                        dtype = 'TIMESTAMP'
                        break
                    elif re.match('\d{4}\-\d{2}\-\d{2}', str(row[key])):
                        dtype = 'DATE'
                        break
                    elif 'float' in str(type(literal_eval(row[key]))):
                        dtype = 'REAL'
                        break
                    else:
                        dtype = 'BIGINT'
                except (ValueError, SyntaxError):
                    dtype = 'VARCHAR'
            col_dtypes.update({key : dtype})
    return col_dtypes
        
def upsert_str(dims, t_name, pk):
    upsert_q = (f"INSERT INTO {t_name} ({dims}) "
                        "VALUES %s "
                        f"ON CONFLICT ({pk}) " 
                            f"DO "
                                f"UPDATE "
                                f"SET creation_ts = EXCLUDED.creation_ts; ")
    return str(upsert_q)

def upsert(conn, cur, upsert_q, df_res_tuple, page_size):
    psycopg2.extras.execute_values(cur, upsert_q, df_res_tuple)
    conn.commit()
    cur.close()

def postgre_write_main(df_response, t_name, pk_name, pk_lst, do_drop, page_size, src_col_name, is_pln_df):
    df_response = df_response
    t_name = t_name
    pk_name = pk_name
    pk_lst = pk_lst
    do_drop = do_drop
    page_size = page_size
    src_col_name = src_col_name
    is_pln_df = is_pln_df

    try:
        df_response = get_pln_no(df_response, src_col_name, is_pln_df)
        df_response = rplc_nan(df_response)
        df_response = df_response.rename(columns=rename_col)
        df_response = add_ts(df_response)
        dtype_dict = get_types(df_response)
        conn = init_conn()
        cur = init_cur(conn)
        drop_table(t_name, do_drop, cur)
        tab_creation = tab_creation_str(t_name, pk_name, pk_lst, dtype_dict)
        tab_cr_str = tab_creation[0]
        tab_types = tab_creation[1]
        create_table(tab_cr_str, conn, cur)

        df_res_tuple = list(df_response.itertuples(index=False, name=None))
        upsert_q = upsert_str((','.join(dtype_dict.keys())), t_name, (','.join(pk_lst)))
        upsert(conn, cur, upsert_q, df_res_tuple, page_size)    

        end_conn(conn)
    except (Exception, psycopg2.DatabaseError) as error:
        print('Database error')
        print(error)
        sys.exit(1)
    finally:
        if conn is not None:
            conn.close()
    print('Database connection closed.')
    print('')

def period_split(def_period, def_intv):

    start = datetime.strptime(list(def_period.values())[0],"%Y-%m-%d")
    end = datetime.strptime(list(def_period.values())[1],"%Y-%m-%d")
    diff = (end  - start ) / def_intv
    period_lst = []
    curr = start
    period_lst.append(start.strftime("%Y-%m-%d"))
    while True:
        curr = curr + timedelta(days = def_intv)
        if curr >= end:
            break
        else:
            period_lst.append(curr.strftime("%Y-%m-%d"))
    period_lst.append(end.strftime("%Y-%m-%d"))
    return period_lst
    
def facebook_marketing_prep(def_intv, account_id):

    try:
        print('Starting...')
        try:
            # read configuration from excel
            df_conf_req = pd.read_excel('facebook_marketing_conf_1.xlsx', sheet_name='parameters', header=0)
            def_period = dict(eval(df_conf_req.iat[0,1]))
            db_config(filename = 'database.ini')
            if pd.isna(df_conf_req['period'].iloc[0]):
                raise KeyError('Period is missing')    
            for index, row in df_conf_req.iterrows():
                if pd.isna(row['dimensions']):
                    raise KeyError('One or more dimensions missing')        
        except(NameError, XLRDError, KeyError) as error:
            print('Error while reading configuration file(s)')
            print(error)
            sys.exit(1)

        with open("fb_secrets.yaml", 'r') as secrets:
            try:
                secrets = yaml.safe_load(secrets)
                app_id = str(secrets['app_id'])
                app_secret = str(secrets['app_secret'])
                access_token = str(secrets['access_token'])
            except yaml.YAMLError as error:
                print('Could not read FB secrets')
                print(error)
                sys.exit(1)
        period_lst = period_split(def_period, def_intv)
        per_dct_lst = []
        for idx in range(len(period_lst) - 1):
            start = period_lst[idx]
            end = (datetime.strptime(period_lst[idx + 1], "%Y-%m-%d") - timedelta(days = 1)).strftime("%Y-%m-%d")
            if start > end:
                period = {'since':f'{start}', 'until':f'{start}'}
            else:
                period = {'since':f'{start}', 'until':f'{end}'}
            per_dct_lst.append(period)
        print(per_dct_lst)
        # iterate over customers            
        try:
            print('Account ID: ' + account_id)
        except(KeyError) as error:
            print('Could not read column')
            print(error)
            sys.exit(1)

        for per_dct in per_dct_lst:
            # call defined methods
            df_response = pd.DataFrame()
            df_response_action = pd.DataFrame()
            period = per_dct
            facebook_marketing_resp = facebook_marketing_api(account_id, df_conf_req, access_token, period)
            df_response = facebook_marketing_resp[0]
            df_response_action = facebook_marketing_resp[1]

            t_name = 'facebook_marketing_temp_test'
            pk_name = 'table_fb_pk_tst'
            pk_lst = ['account_id', 'campaign_id', 'adset_id', 'date_start',
                  'date_stop', 'objective', 'publisher_platform', 'platform_position']
            do_drop = False
            page_size = 1000
            src_col_name = 'campaign_name'
            is_pln_df = True

            postgre_write_main(df_response, t_name, pk_name, pk_lst, do_drop, page_size, src_col_name, is_pln_df)
            t_name = 'facebook_marketing_temp_action_test'
            pk_name = 'table_fb_pk_action_tst'
            src_col_name = ''
            is_pln_df = False
            postgre_write_main(df_response_action, t_name, pk_name, pk_lst, do_drop, page_size, src_col_name, is_pln_df)

            print('Success')

        return df_response, df_response_action
    except(KeyError) as error:
        print('Key error')
        print(error)
        sys.exit(1)
    except(NameError) as error:
        print('Name Error')
        print(error)
        sys.exit(1)
    except(FacebookRequestError) as error:
        if "Please reduce the amount of data you're asking for" in str(error):
            if def_intv < 2:
                print('day chunk size too large')
                print(error)
                sys.exit(1)
            facebook_marketing_prep(def_intv - 1, account_id)
        else:              
            print('Facebook marketing API Error')
            print(error)
            sys.exit(1)
            
df_conf_base = pd.read_excel('facebook_marketing_conf_1.xlsx', sheet_name='base', header=0)
def_intv = 7

for index, row in df_conf_base.iterrows():  
    try:
        account_id = str(row['account_id'])
        #account_id = str(df_conf_base.iat[0,0])
        if pd.isna(df_conf_base['account_id'].iloc[0]):
            raise KeyError('No base data provided (account_id(s))')
        start = facebook_marketing_prep(def_intv, account_id)
        df_response = start[0]
        df_response_action = start[1]
    except(KeyError) as error:
        print(error)
        sys.exit(1)

Starting...
[{'since': '2019-10-01', 'until': '2019-10-07'}, {'since': '2019-10-08', 'until': '2019-10-14'}, {'since': '2019-10-15', 'until': '2019-10-21'}, {'since': '2019-10-22', 'until': '2019-10-28'}, {'since': '2019-10-29', 'until': '2019-10-31'}]
Account ID: act_1212725268803451
Calling Facebook Marketing API...
Starting...
[{'since': '2019-10-01', 'until': '2019-10-06'}, {'since': '2019-10-07', 'until': '2019-10-12'}, {'since': '2019-10-13', 'until': '2019-10-18'}, {'since': '2019-10-19', 'until': '2019-10-24'}, {'since': '2019-10-25', 'until': '2019-10-30'}, {'since': '2019-10-31', 'until': '2019-10-31'}]
Account ID: act_1212725268803451
Calling Facebook Marketing API...
Starting...
[{'since': '2019-10-01', 'until': '2019-10-05'}, {'since': '2019-10-06', 'until': '2019-10-10'}, {'since': '2019-10-11', 'until': '2019-10-15'}, {'since': '2019-10-16', 'until': '2019-10-20'}, {'since': '2019-10-21', 'until': '2019-10-25'}, {'since': '2019-10-26', 'until': '2019-10-30'}, {'since': '

SystemExit: 1

### from ast import literal_eval
import datetime
def get_types(dataframe):   
    col_names = df_response.head(0).to_dict()
    col_dtypes = {}
    dtype = ''
    for key in col_names:
            for i, row in df_response.iterrows():
                try:
                    if re.match('\d{4}\-\d{2}\-\d{2}\s+\d{2}\:\d{2}\:\d{2}\.\d+', row[key]):
                        dtype = 'TIMESTAMP'
                        break
                    elif re.match('\d{4}\-\d{2}\-\d{2}', row[key]):
                        dtype = 'DATE'
                        break
                    elif 'float' in str(type(literal_eval(row[key]))):
                        dtype = 'REAL'
                        break
                    else:
                        dtype = 'BIGINT'
                except (ValueError, SyntaxError):
                    dtype = 'VARCHAR'
            col_dtypes.update({key : dtype})
    return col_dtypes
print(get_types(df_response))

In [None]:
print(access_token)

In [None]:
#df_response['Position'].str.contains("PLN")
#df_response.filter(like="PLN")
print(df_response.columns[df_response.isin(['PLN']).any()])

In [None]:
base = datetime.date.today()
date_list = [base - datetime.timedelta(days=x) for x in range(100)]

In [None]:
print(date_list)

In [None]:
print(base)