In [5]:
import import_ipynb
import polars as pl
import pandas as pd
import miceforest as mf
import coingecko_api as ca

In [6]:
def clean_xrp_bsv(scraped: dict) -> dict:
    """Calculate average_transaction_fees for XRP/BSV dataframes. Merge API and Messari dataframes for those projects"""
    # Merge XRP and BSV dataframes
    for name in ['xrp', 'bsv']:
        scraped[name] = scraped[f'{name}_bit'].join(scraped[f'{name}_mes'], on='date', how='inner')
        # Calculate average_transaction_fees column
        scraped[name] = scraped[name].with_columns((pl.col("total_fees") / pl.col("transactions_count")).alias("average_transaction_fees"))
        # Drop total_fees column
        scraped[name] = scraped[name].drop('total_fees')
    return scraped

In [7]:
def merge_api_scraped(filled_api: dict, filled_scraped: dict, names: list) -> dict:
    """Merge API and scraped data into a single dictionary of dataframes"""
    # Fill missing dates in Messari data with null values
    for name in ['xrp_mes', 'bsv_mes', 'xlm']:
        filled_scraped[name] = ca.fill_date(filled_scraped[name])

    # Clean XRP and BSV dataframes
    clean_xrp_bsv(filled_scraped)

    # Merge API and scraped data
    combined = {}
    for name in names:
        combined[name] = filled_api[name].join(filled_scraped[name], on='date', how='inner')
    
    return combined

In [8]:
def mf_impute(df: pd.DataFrame) -> pd.DataFrame:
    """Return imputed pandas dataframe using MICE"""
    kernel = mf.ImputationKernel(
        df,
        datasets=3,
        save_all_iterations=True,
        random_state=123
    )
    # Run the MICE algorithm for 3 iterations on each of the datasets
    kernel.mice(3)
    return kernel.complete_data()

In [10]:
def impute_crypto(dfs: dict) -> dict:
    """Impute missing values in dataframes of a dict"""
    # Convert Polars to pandas
    pandas_dfs = {k: df.to_pandas() for k, df in dfs.items()}

    # Impute each dataframe
    imputed = {}
    for k, df in pandas_dfs.items():
        date_series = df['date']  # Get date column as a series
        df_no_date = df.drop('date', axis=1)
        df_imputed = mf_impute(df_no_date)
        # Concatenate the date back to the imputed dataframe
        imputed[k] = pd.concat([date_series, df_imputed], axis=1)

    # Convert pandas to Polars
    return {k: pl.from_pandas(df) for k, df in pandas_dfs.items()}