In [1]:
from joblib import Memory
import pandas as pd

# Configure the cache directory
cache_dir = 'cache_directory'
memory = Memory(cache_dir, verbose=0)

In [2]:
@memory.cache
def data_processing():
    # Read the three CSV files
    df1 = pd.read_csv('./life_expectancy_years.csv')
    df2 = pd.read_csv('./population_total.csv')
    df3 = pd.read_csv('./ny_gnp_pcap_cn.csv')

    # Forward fill the missing data
    df1 = df1.ffill()
    df2 = df2.ffill()
    df3 = df3.ffill()

    # Reshape each dataframe into tidy format
    df1 = pd.melt(df1, id_vars=['country'], var_name='year', value_name='life_expectancy')
    df2 = pd.melt(df2, id_vars=['country'], var_name='year', value_name='population')
    df3 = pd.melt(df3, id_vars=['country'], var_name='year', value_name='gni_per_capita')

    # Merge the three dataframes into one
    merged_df = pd.merge(df1, df2, on=['country', 'year'], how='outer')
    merged_df = pd.merge(merged_df, df3, on=['country', 'year'], how='outer')

    return merged_df

# Call the function to process the data (results will be cached)
result_df = data_processing()




In [3]:
# Print the cached dataframe
result_df.head()

Unnamed: 0,country,year,life_expectancy,population,gni_per_capita
0,Afghanistan,1800,28.2,3.28M,
1,Angola,1800,27.0,1.57M,
2,Albania,1800,35.4,400k,
3,Andorra,1800,35.4,2650,
4,United Arab Emirates,1800,30.7,40.2k,
