## Import Libraries and Dependencies

In [None]:
import requests
import json
import pandas as pd
# from snowflake.sqlalchemy import URL
import unidecode
import zipfile
import io
import gzip
import os
import shutil

## Input API Key and Secret

In [None]:
KEY = "API_KEY"
SECRET = "API_SECRET"

## make_request() function for connecting to Amplitude API

In [None]:
def make_request(endpoint, params = ()):
    """
    Generic request function for making API requests to the amplitude API
    
    Example Endpoints: https://amplitude.zendesk.com/hc/en-us/articles/205469748-Dashboard-Rest-API-Export-Amplitude-Dashboard-Data
    """
    res = requests.get('https://amplitude.com/api/2/'+endpoint, params=params, auth=(KEY, SECRET))
    if res.status_code == 200:
        return res.content
    else:
        return res

### Sample direct uses of make_request()

In [None]:
# Sample use of make_request
params = (
    ('start', '20180921T07'),
    ('end', '20180921T08')
)


data = make_request('events/list')
# data = make_request('sessions/average', params)
# data = make_request('annotations')
# data = make_request('export', params)

In [None]:
out = json.loads(data)
out.keys()

In [None]:
sample_df = pd.DataFrame(out['data'])

## Helper functions for Raw Data Extraction

In [None]:
# Function to check and clear current exported data
def clear_raw_data(data_path='.'):
    if '180337' in os.listdir(data_path):
        shutil.rmtree(os.path.join(data_path,'180337'))
            

In [None]:
def load_zip(zip_fp):
    """
    Loads compressed zip binary into a pandas DataFrame
    """
    with gzip.GzipFile(zip_fp, 'r') as fin:
        raw_data = fin.read()
        
    raw_data_split = str(raw_data)[2:].split('\\n')
    parsed_data = [json.loads(i.replace('\\','')) for i in raw_data_split if len(i) > 1]
    df = pd.DataFrame(parsed_data)
    return df

In [None]:
def load_zip_dir(zip_dir):
    """
    Loads all contents of a zip directory into a pandas DataFrame (via load_zip())
    """
    dfs = []
    for n, zip_fp in enumerate(os.listdir(zip_dir)):
        loaded_percent = round(100.*(n+1)/len(os.listdir(zip_dir)),2)
        print(f'Parsing data: {loaded_percent}% complete ...', end="\r")
        dfs.append(load_zip(os.path.join(zip_dir,zip_fp)))
    
    print('Successfully parsed raw data. Concatenating and returning DF')
    return pd.concat(dfs)

## Main Raw data extraction function

In [None]:
def extract_data(start='20180921T07', end='20180921T08', clear_data=True):
    """
    Extracts raw data, decompresses, and loads into pandas DataFrame
    
    Kwargs:
      start -- <str> start date in 'YYYYMMDDTHH' format
      end -- <str> end date in 'YYYYMMDDTHH' format
      clear_data -- <bool> whether or not to clear the loaded data before exracting more (Default: True)
      
    Return:
      parsed_data -- <pd.DataFrame> pandas DataFrame of parsed raw data
    """
    if clear_data: 
        print('Clearing workspace ...')
        clear_raw_data()
        
    params = (
        ('start', start),
        ('end', end)
    )
    print('Exporting data from API ...')
    data = make_request('export', params)
    z = zipfile.ZipFile(io.BytesIO(data))
    z.extractall() 
    print('Successfully exported raw data. \nParsing raw data ...')
    parsed_data = load_zip_dir('180337')
    return parsed_data
    

In [None]:
out = extract_data()
out.head()