# Load your data

The code below will download all the provided data. Sit back and relax while Google Colab takes care of it.

In [None]:
%%capture

# install and load necessary Libraries
!pip install pyarrow
!pip install gdown

import gdown
import pandas as pd

In [None]:
data = {
    'bank': "1dzL_SWBkBs5xrUxuGQTm04oe3USgkL9u",    # banking data
    'sales': "1QK-VgSU3AxXUw330KjYFUj8S9hzKJsG6",   # sales data
    'mcc': "1JN0bR84sgZ_o4wjKPBUmz45NeEEkVgt7",     # mcc description
}

In [None]:
# Download all files from Google Drive
for name, file_id in data.items():
    gdown.download(f'https://drive.google.com/uc?id={file_id}', name + '.parquet', quiet=False)

In [None]:
# Read all files and store on a dictionary of pandas dataframes
df = {}
for name in data.keys():
    df[name] = pd.read_parquet(name + '.parquet')

# View your data

You can access each dataframe inside the `df` dictionary using the code shown below

In [None]:
df['sales']

In [None]:
df['bank']

In [None]:
df['mcc']

# Explore your data

You can experiment with your data to gain insights into customer behavior.

In [None]:
summary = df['bank'].groupby('document_id')['value'].agg(['max', 'mean'])
summary

In [None]:
summary = df['sales'].groupby('document_id')['value'].agg(['max', 'mean'])
summary