In [1]:
import numpy as np
import requests
import os
import time
import pprint
from datetime import datetime, timedelta

https://support.appsflyer.com/hc/en-us/articles/207034346-Pull-APIs-Pulling-AppsFlyer-Reports-by-APIs

- API calls return a max. of 200K rows.
- If a report has exactly 200K rows, then assume rows are missing.
- Make multiple API calls, using from/to parameters that include the time of day.



-------------

# 1. Params

In [2]:
DATA_DIRPATH = f'./'

In [3]:
SECONDS_IN_A_DAY = 86400

### token

##### Only the admin can get a Pull API token.  https://hq1.appsflyer.com

In [4]:
token = '1abcd2ef-3456-7gi8-j9k-101l1m1n2o13'
api_v = 'v5'
api_url = 'https://hq.appsflyer.com/export/'

### apps, reports  and in app events
https://hq1.appsflyer.com/apps/myapps

In [5]:
apps = ['com.my.androidapp', 'id123456789',
        'com.mysecond.androidapp', 'id987654321', 
        'com.myandroidapp.indev']

In [6]:
reports = ['installs_report', 'uninstall_events_report', 'in_app_events_report', 'geo_by_date_report']

In [7]:
in_app_events = ('first_open_custom', 'login', 'order','ordercancel')

#### look for your events here: 
https://hq1.appsflyer.com/apps/myapps app dashboard > Reports > Export data > Raw data reports > In-App Events



### additional fields

In [8]:
installs_additional_fields = ('install_app_store','match_type','contributor1_match_type',
                              'contributor2_match_type','contributor3_match_type','device_category',
                              'gp_referrer','gp_click_time','gp_install_begin',
                              'amazon_aid','keyword_match_type')

In [9]:
events_additional_fields = ('install_app_store','match_type','contributor1_match_type',
                            'contributor2_match_type','contributor3_match_type','device_category',
                            'gp_referrer','gp_click_time','gp_install_begin',
                            'amazon_aid','keyword_match_type')

In [10]:
uninstall_additional_fields = ('gp_referrer','gp_click_time','gp_install_begin',
                               'amazon_aid','keyword_match_type')

### timezone
https://support.appsflyer.com/hc/en-us/articles/207034346-Pull-API-aggregate-and-raw-data-reporting#raw-data-time-zone-and-currency

In [11]:
timezone = 'Asia/Yekaterinburg'
# timezone = 'Europe/Moscow'
# timezone = 'America/Chicago'
# timezone = 'Europe/Paris'
# timezone = 'Asia/Jakarta'

timezone

'Asia/Yekaterinburg'

------------

# 2. Functions

In [12]:
def get_months(stopdate):
    current_month = stopdate.replace(day=1)
    
    month_3 = stopdate
    month_2 = month_3.replace(day=1) - timedelta(days=1)
    month_1 = month_2.replace(day=1) - timedelta(days=1)
    
    
    
    return month_1, month_2, month_3 

In [13]:
def make_request(url, params):
    r = requests.get(url, params = params)
    if r.ok:
        return r
    
    time.sleep(61)
    r = requests.get(url, params = params)
    return r

In [14]:
def save_csv(r, filename):
    r_csv = open(filename, 'w')
    r_csv.write(r.text)
    r_csv.close()
    pass

In [15]:
def load_csv(url, params, filename):
    
    r = make_request(url, params)
    
    if not r.ok:
        print(r.text)
    elif len(r.text.split('\n')) < 2:
        print(r.text, 'Error: Empty file. Not saved')
    elif len(r.text.split('\n')) >= 200000:
        print(r.text, 'Notice. Report limitations: Up to 200K rows. Not saved')
    else:
        save_csv(r, filename)


    print(f'{r.reason} Status code - {r.status_code}')

In [16]:
def make_url(app, report, api_url=api_url, api_v=api_v):
    url = str(api_url + app + '/' + report + '/' + api_v)
    return url

In [17]:
def make_params(report, from_date, to_date, token=token, timezone=timezone, in_app_events=in_app_events):
    
    params = {
        'api_token': token, 
        'from': from_date, 
        'to': to_date
        }
    if report == 'installs_report':
        params.setdefault('additional_fields', installs_additional_fields)
    elif report == 'uninstall_events_report':
        params.setdefault('additional_fields', uninstall_additional_fields)
    elif report == 'in_app_events_report':
        params.setdefault('additional_fields', events_additional_fields)
        params.setdefault('event_name', in_app_events)

    
        
    params.setdefault('timezone', timezone)
       
    return params


In [18]:
def make_filename(app, report, dirpath, from_date, to_date, timezone=timezone):
    timezone = '_'.join(timezone.split('/'))
    filename = f'{dirpath}/{app}_{report}_{from_date}_{to_date}_{timezone}.csv'
    return filename

------------------

# 3. Download and Save

In [19]:
stopdate = datetime.today() - timedelta(days=1)
stopdate

datetime.datetime(2020, 6, 1, 13, 55, 56, 318695)

In [20]:
months = get_months(stopdate)

In [21]:
for month in months:
    if not os.path.isdir(DATA_DIRPATH + f"{month.strftime('%Y_%m')}"):
        os.makedirs(DATA_DIRPATH + f"{month.strftime('%Y_%m')}")
    print(f"Check/create directory {month.strftime('%Y_%m')}")

Check/create directory 2020_04
Check/create directory 2020_05
Check/create directory 2020_06


https://support.appsflyer.com/hc/en-us/articles/207034366-API-Policy

- Up to 120 calls per day, per account, 24 calls per app
- Up to 60 calls per day, per account, 12 calls per app

In [22]:
i = 1
last_geo_report_time = datetime.now()

In [23]:
counts = {
    'installs_count': dict.fromkeys(['all'] + apps, 0),
    'events_count': dict.fromkeys(['all'] + apps, 0)
    }

In [24]:
download_dict = dict()

for app in apps:
    download_dict[app] = dict()
    for report in reports:
        download_dict[app][report] = dict()
        for month in months:
            
            app_files = [x for x in os.listdir(DATA_DIRPATH + f"{month.strftime('%Y_%m')}") if x.startswith(f'{app}_')]
            report_files = [x for x in app_files if report[:-6] in x]
            actual_month_days = set(np.arange(1, month.day+1))
            
            downloaded_days = np.empty(0, dtype=int)
            for report_file in report_files:
                report_days = [int(x[:2]) for x in report_file.split(month.strftime('%Y-%m-'))[1:3]]
                downloaded_days = np.append(downloaded_days, np.arange(report_days[0], report_days[1]+1))
                                 
            download_dict[app][report][month.strftime('%Y_%m')] = list(actual_month_days - set(downloaded_days))
            download_dict[app][report][month.strftime('%Y_%m')].sort()
                                               

download = dict()

for month in months:
    download[month.strftime('%Y_%m')] = dict()
    actual_month_days = set(np.arange(1, month.day+1))
    
    for day in actual_month_days:
        download[month.strftime('%Y_%m')][day] = dict()
        
        for app in apps:
            download[month.strftime('%Y_%m')][day][app] = list()
            
            for report in reports:
                
                if day in download_dict[app][report][month.strftime('%Y_%m')]:
                    download[month.strftime('%Y_%m')][day][app].append(report)
            if not len(download[month.strftime('%Y_%m')][day][app]):
                download[month.strftime('%Y_%m')][day].pop(app)
        if not len(download[month.strftime('%Y_%m')][day]):
            download[month.strftime('%Y_%m')].pop(day)
                 

download_dict = download
                                               
pprint.pprint(download_dict)

{'2020_04': {},
 '2020_05': {27: {'com.myandroidapp.indev': ['installs_report',
                                             'uninstall_events_report',
                                             'in_app_events_report']},
             28: {'com.myandroidapp.indev': ['installs_report',
                                             'uninstall_events_report',
                                             'in_app_events_report']},
             29: {'com.myandroidapp.indev': ['installs_report',
                                             'uninstall_events_report',
                                             'in_app_events_report']},
             30: {'com.myandroidapp.indev': ['installs_report',
                                             'uninstall_events_report',
                                             'in_app_events_report']},
             31: {'com.myandroidapp.indev': ['installs_report',
                                             'uninstall_events_report',
                    

----------------

In [25]:
%%time

for month in download_dict.keys():
    
    for day in download_dict[month].keys():
        
        for app in download_dict[month][day].keys():
            
            for report in download_dict[month][day][app]:
                if (report == 'installs_report' or report == 'uninstall_events_report') and (counts['installs_count'][app] == 24 or counts['installs_count']['all'] == 120):
                    continue
                elif report == 'in_app_events_report' and (counts['events_count'][app] == 12 or counts['events_count']['all'] == 60):
                    continue
                
                

                if report == 'installs_report' or report == 'uninstall_events_report':
                    counts['installs_count']['all'] += 1
                    counts['installs_count'][app] += 1
                elif report == 'in_app_events_report':
                    counts['events_count']['all'] += 1
                    counts['events_count'][app] += 1
                else: # geo_by_date_report
                    seconds_passed = SECONDS_IN_A_DAY - (last_geo_report_time - datetime.now() + timedelta(1)).seconds
                    if seconds_passed < 61:
                        time.sleep(61 - seconds_passed)
                    last_geo_report_time = datetime.now()
                    
                
                url = make_url(app, report)
                one_day = datetime(year=int(month.split('_')[0]),
                                   month=int(month.split('_')[1]),
                                   day=day
                                  ).strftime('%Y-%m-%d')
                
                params = make_params(report, from_date=one_day, to_date=one_day)
                dirpath = f'{DATA_DIRPATH}{month}'
                filename = make_filename(app, report, dirpath=dirpath, from_date=one_day, to_date=one_day)
                             
                
                print(f'{i:5}. {app:30} {report:25} {month}_{day:02}')    
                load_csv(url, params, filename) 
                i += 1
        

    1. com.myandroidapp.indev         installs_report           2020_05_27
OK Status code - 200
    2. com.myandroidapp.indev         uninstall_events_report   2020_05_27
OK Status code - 200
    3. com.myandroidapp.indev         in_app_events_report      2020_05_27
OK Status code - 200
    4. com.myandroidapp.indev         installs_report           2020_05_28
OK Status code - 200
    5. com.myandroidapp.indev         uninstall_events_report   2020_05_28
OK Status code - 200
    6. com.myandroidapp.indev         in_app_events_report      2020_05_28
OK Status code - 200
    7. com.myandroidapp.indev         installs_report           2020_05_29
OK Status code - 200
    8. com.myandroidapp.indev         uninstall_events_report   2020_05_29
OK Status code - 200
    9. com.myandroidapp.indev         in_app_events_report      2020_05_29
OK Status code - 200
   10. com.myandroidapp.indev         installs_report           2020_05_30
OK Status code - 200
   11. com.myandroidapp.indev         un