In [3]:
# import important libraries

import numpy as np
import pandas as pd

from matplotlib import pyplot as plt 
import seaborn as sns 
import pickle
from joblib import Parallel, delayed

from numpy.linalg import LinAlgError 
from functools import reduce 

from sklearn.neighbors import KernelDensity, KNeighborsRegressor

In [None]:
# create Simulated Experiment

class Simulated_Experiment:
    def __init__(self, T, N, L, nan_rate=0.4):
        """ 
        T: Number of days
        N: Number of stocks/companies
        L: Number of characteristics
        nan_rate: Rate that controls the masking rate for the simulated data
        
        """
        self.T = T 
        self.N = N 
        self.L = L  

        self.nan_rate = nan_rate 


    def generate_masked_data(self):
        T, N, L = self.T, self.N, self.L 
        panel = np.zeros((T,N,L))
        for t in range(T):
            np.random.seed(t) # for reproducibility 
            # Generate random mean vector (mu)
            mu = np.random.normal(size=L)
            # Generate random covariance matrix (Sigma)
            Sigma = np.random.rand(L,L)
            Sigma = Sigma @ Sigma.T # Ensure positive-definite matrix 
            panel[t,::] = np.random.multivariate_normal(mu, Sigma, N)

        raw_chars = panel 

        self.raw_chars = raw_chars # raw characteristics

        # convert the raw_chars into rank_chars 
        rank_chars = percentile_rank_panel(raw_chars)
        self.rank_chars = rank_chars 


        # Simulate missing data 
        masked_rank_chars = np.array([simulate_nan(rank_chars[t], nan_rate=self.nan_rate)['X'] for t in range(T)])

        self.masked_rank_chars = masked_rank_chars

        return masked_rank_chars
    
    def evaluate_imputation_performance(self, method='xs', params=""):
        """ 
        Function can call any available imputation method 
        method could be 'em', 'xs', 'b_xs', 'xs-median', 'forward_filling'
        """

        # create an object of Imputer and just use of the defaults
        # then measure performance of imputation

        impute_model = Imputer(self.masked_rank_chars)

        # put params as input of each function
        if method == 'em':
            rank_imputed_chars = impute_model.impute_with_em(params)
        
        elif method == 'xs':
            rank_imputed_chars = impute_model.impute_with_xs(params)

        else: # Default 
            print("Method not recognied")
            print("Using default cross-sectional model")

            rank_imputed_chars = impute_model.impute_with_xs()
        
        self.rank_imputed_chars = impute_model.rank_imputed_chars 
        self.missing_mask_overall = impute_model.missing_mask_overall

        # calculate metrics for the imputation model
        # evaluate performance
        metrics = self.evaluate_imputations(self.rank_chars)

        return metrics
    
    