In [None]:
import requests
import base64
import time
import os
import gzip
import pandas as pd
import zipfile

Connect and download from Amplitude

In [None]:
def download_from_amplitude(api_key, secret_key, date_start, date_end, file_name='amplitude_data',url="https://amplitude.com/api/2/export"):
    """
    Connects to the Amplitude server, downloads RAW data for the specified period in json format and saves it to a zip file.
    Note: zip file will be saved to the root folder.

    Args:
        api_key (str): Amplitude API key.
        secret_key (str): Amplitude secret key.
        date_start (str): Start date in the format YYYYMMDD.
        date_end (str): End date in the format YYYYMMDD.
        file_name (str, optional): Name of the zip file. Defaults to 'amplitude_data'.
        url (str, optional): Amplitude server url. Defaults to "https://amplitude.com/api/2/export".
        
    Returns:
        str: Status message of the function. If the function is successful, it returns the message 'Download completed' and saves the data to the amplitude_data.zip file. If the function fails, it returns the message 'Download failed'. 
    """
    credentials = base64.b64encode(f"{api_key}:{secret_key}".encode())
    headers = {"Authorization": f"Basic {credentials.decode()}"}
    params = {"start": date_start, "end": date_end}
    response = requests.get(url, params=params, headers=headers)
    if response.status_code == 200:
        print('Connected to Amplitude server')
        # Simulate downloading progress
        print('Downloading data... [          ]', end='', flush=True)
        for i in range(10):
            time.sleep(0.5)  # Simulate downloading with a delay
            print('\b#', end='', flush=True)
        with open(f"{file_name}_{date_start}_{date_end}.zip", "wb") as f:
            f.write(response.content)
        print('\nAmplitude data downloaded and saved to zip file:', f"{file_name}_{date_start}_{date_end}.zip")
        return 'Download completed'
    else:
        print('Error:', response.text)
        return 'Download failed'

In [None]:
# Example usage:
api_key = "YOUR_API_KEY
secret_key = "YOUR_SECRET_KEY"
start_date = "20230817"
end_date = "20230818"

result = download_from_amplitude(api_key, secret_key, start_date, end_date)
print(result)  # Print the status message returned by the function

Extract zip file and convert JSON to the Pandas Dataframe

In [None]:
def unzip_json_to_df(zip_file, folder_name='amplitude_data'):
    """
    Extracts the zip file and converts the JSON files to the Pandas Dataframe.
    Note: zip file must contain only JSON files. And stored in the root folder.
    
    Args:
        zip_file (str): Name of the zip file.
        folder_name (str, optional): Name of the folder where the data will be extracted. Defaults to 'amplitude_data'.
    
    Returns:
        pd.DataFrame: Pandas Dataframe with the Amplitude data.
    """ 
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(folder_name)
    print('\nAmplitude data extracted to the folder:', folder_name)
    print('Creating Dataframe... [          ]', end='', flush=True)
    for i in range(10):
        time.sleep(0.5) 
        print('\b#', end='', flush=True)
    df_func_combined = pd.DataFrame()
    for root, dirs, files in os.walk(folder_name):
        for file in files: 
            if file.endswith('.gz'):
                with gzip.open(os.path.join(root, file), 'rb') as f:
                    file_func_content = f.read()
                    df_func_i = pd.read_json(file_func_content, lines=True)
                    df_func_combined = pd.concat([df_func_combined, df_func_i], axis=0)
    print('\nDataframe created (rows, columns):', df_func_combined.shape)
    return df_func_combined

In [None]:
df = unzip_json_to_df('amplitude_data_20230817_20230818.zip', folder_name='amplitude_data1')
df2 = unzip_json_to_df('amplitude_data.zip', folder_name='amplitude_data2')

Concatenate and save to csv file

In [None]:
def concat_df_to_csv(dataframes, csv='amplitude_data.csv'):
    """
    Concatenate a list of DataFrames vertically. And save it to csv file.
    Note: csv file will be saved to the root folder.

    Args:
        dataframes (list): List of pandas DataFrames.
        csv (str, optional): Name of the csv file. Defaults to 'amplitude_data.csv'.

    Returns:
        pd.DataFrame: Concatenated DataFrame.
    """
    concatenated_df = pd.concat(dataframes, axis=0)
    concatenated_df.to_csv(csv, index=False)
    return print('Data saved to csv file: ' + csv)

In [None]:
# Example usage:
concat_df_to_csv([df, df2], csv='amplitude_data.csv')