In [None]:
import sys
import os
sys.path.insert(1, os.getcwd())
from datetime import date
from datetime import timedelta
import logging

from data_retrieval.google_analytics_api_retrieval import GoogleAnalyticsApiRetrieval, GoogleAuthenticationMethod
from helpers.file_helper import save_list_to_csv, get_data_path
from helpers.settings_helper import get_google_analytics_view_id_from_settings, get_file_storage_root_folder_from_settings
from helpers.metadata_helper import load_metadata, DataFrequency, DataModule, JobStatus, get_start_date

CLIENT_SECRETS_FILE_PATH = './credentials/credentials.json'
TOKEN_FILE_PATH = './credentials/token.json'
METADATA_FILE_PATH = "./metadata/metadata.json"
SETTINGS_FILE_PATH = './settings/app_settings.json'

ga_data_log = logging.getLogger(__name__)


In [None]:
view_id = get_google_analytics_view_id_from_settings()
print('google analytics view id ', view_id)
root_dir = get_file_storage_root_folder_from_settings()
print('root_dir to store data is ', root_dir)

In [None]:
# Job 1: Age, Daily Data
ga = GoogleAnalyticsApiRetrieval(google_authentication_method = GoogleAuthenticationMethod.OAuth,
                                oauth_credentials_filepath= CLIENT_SECRETS_FILE_PATH,
                                oauth_token_filepath=TOKEN_FILE_PATH,
                                view_id=view_id)

metadata = load_metadata(DataFrequency.Daily, DataModule.Age)
# of the current job is in progress, then don't run another job
if metadata.status == JobStatus.InProgress.value:
    ga_data_log.info("Another job is in progress")
else:
    start_date = get_start_date(metadata.last_data_extraction_date, )
    data = ga.get_sessions_by_age()


In [None]:
ga = GoogleAnalyticsApiRetrieval(google_authentication_method = GoogleAuthenticationMethod.OAuth,
                                oauth_credentials_filepath= CLIENT_SECRETS_FILE_PATH,
                                oauth_token_filepath=TOKEN_FILE_PATH,
                                view_id=view_id)

modules = ['age', 'gender', 'landing_page']

# while start_date < end_date:
for module in modules:
    print('getting data for ')
    print(f'getting {module} data for date {start_date}')
    data = None
    if module == 'age':
        data = ga.get_sessions_by_age(start_date, start_date)
    elif module == 'gender':
        data = ga.get_sessions_by_gender(start_date, start_date)
    elif module == 'landing_page':
        data = ga.get_sessions_by_landing_page(start_date, start_date)

    # print('data is ', data)
    file_path = get_data_path(root_dir, 'daily', module, start_date)
    print('saving file to ', file_path)
    save_list_to_csv(data, file_path)
    start_date = start_date + timedelta(days=1)