### TemporalDataFrame Class

In [10]:
class TemporalDataFrame(object):
    
    def __init__(self, df, lagged_obs = False, id_col = None, target_col = None, drop_ID = False):
        
        if id_col is None:  
            id_col = df.columns[0] # assumption: id column is the first column 
        
        if target_col is None:
            target_col = df.columns[-1] # assumption: target column is the last column
        
        self.df = df
        self.col_names =  df.columns.tolist()
        self.values = df.values
        self.id_col = id_col # ID as in Member ID, not to be confused with index
        self.target_col =  target_col
        self.target_cols = [id_col, target_col] # ID + Target
        if lagged_obs:
            self.feature_cols = self.col_names
        else:
            self.feature_cols = [n for n in self.col_names if n != self.target_col] # ID + Features
        
        # calculate n time steps (rows per member in long format)
        # based on assumption that all members have the same amount of time steps
        self.time_steps_per_id = len(df[df[self.id_col] == df[self.id_col].unique()[0]])
        print(self.time_steps_per_id, 'time steps per member')        
        self.unique_ids = len(df[self.id_col].unique())
        print(self.unique_ids, "unique members")
        
    def name_time_steps(self, col_name, n_time_steps):
    
        """
        col_name: column name (str)
        n_time_steps: number of time steps (int)

        Returns list wherein col_name is repeated n_time_steps (zero indexed)
        with a concatenated number indicating time step 

        [x_0, x_1, x_2,...]

        """
        return [(col_name + '_%d' % (j+1)) for j in range(-1, n_time_steps-1, 1)]
    

    def sort_by_time_step(self, *kwargs):
    
        """
        kwargs: nested list of column names
                preceded by an asterisk *[[str]]

                primary list of variable length 

                secondary lists contain time stepped column names of fixed length

                (list containing list containing str elements)


        Returns list sorted by time step rather than feature name
        i.e.
            input: *[['a_0', 'a_1'],['b_0','b_1']]
            output: [a_0, b_0, a_1, b_1]
        """
    
        zipped = zip(*kwargs)
        l = []
        for i in zipped:
            for j in i:
                l.append(j)
        return l
    
    def temporal_col_names(self, drop_ID = False, X_only = False, y_only = False, n_time_steps = None):
   
        """
        col_list: list of column names (list containing str elements)
        n_time_steps: number of time steps (int)
    
        Returns list of column names time-stepped and dovetailed
        """ 
        if n_time_steps is None:
            n_time_steps = self.time_steps_per_id
            
        if X_only:
            col_list = self.feature_cols        
        elif y_only:
            col_list = self.target_cols 
        else:
            col_list = self.col_names
            
        if drop_ID:
            col_list = [n for n in col_list if n != self.id_col] # remove index column at beggining

        return self.sort_by_time_step(*[self.name_time_steps(i, n_time_steps) for i in col_list])
    
    def long_to_wide(self, show_ID = False, X_only = False, y_only = False): 
        
        from pandas import DataFrame
        
        """
        X_only: Whether we want features only (bool)
        y_only: Whether we want targets only (bool)
        
        Takes in long data frame 
        Returns wide data frame (1 row per ID)
        With correct temporal column names
        with optional ID column
        """
        
        if X_only:  
            df = self.df[self.feature_cols].copy()    
        elif y_only:    
            df = self.df[self.target_cols].copy()    
        else:    
            df = self.df.copy()
        
        wide_values = []
        
        for i in df[self.id_col].unique():
            wide_values.extend(df[df[self.id_col] == i].
                               drop(self.id_col, axis=1).
                               values.reshape(1,-1))

        temporal_col_list = self.temporal_col_names(drop_ID = True, # must always be True
                                                    X_only = X_only,
                                                    y_only = y_only) 
        
        wide_df = DataFrame(wide_values, columns = temporal_col_list)
        
        if show_ID:
            wide_df.index = df[self.id_col].unique()
            wide_df.index.name = self.id_col 
            wide_df.reset_index(inplace=True)
            
        return wide_df


    def X_3D(self, X_wide = None, offset_by = 0):
        
        """
        Return RNN friendly 3D matrix for features
        [member][time step][feature]
        """
        if X_wide is None:
            
            X_wide = self.long_to_wide(X_only = True, show_ID = False)
            
        n_time_steps = self.time_steps_per_id - offset_by
        
        return X_wide.values.reshape(self.unique_ids, n_time_steps, -1)
    
    
    def y_3D(self, y_wide = None, offset_by = 0): # consider deleting offset_by and replacing n_time_steps with self.time_per...
        
        """
        Return RNN friendly 3D matrix for features
        [member][time step][target]
        """
        
        if y_wide is None:
            
            y_wide = self.long_to_wide(y_only = True, show_ID = False)
            
        n_time_steps = self.time_steps_per_id - offset_by
        
        return y_wide.values.reshape(self.unique_ids, n_time_steps, 1)
        
    
    def offset(self, offset_by):
        
        """
        offset_by: (int)
        
        Return offset X and y in long format 
        """
        n_features_per_time_step = self.long_to_wide(X_only = True, show_ID = False).shape[1] / self.time_steps_per_id
        
        offset_features_by = offset_by * n_features_per_time_step
        
        X = self.long_to_wide(X_only = True, show_ID = False)
        X = X.iloc[:,:-int(offset_features_by)].copy()
        y = self.long_to_wide(y_only = True, show_ID = False)
        y = y.iloc[:,offset_by:].copy()
        
        return X, y
        
        
    
    def offset_3D(self, offset_by):
        
        """
        offset_by: (int)
        
        Return offset X and y in 3D array format
        """
        
        X, y = self.offset(offset_by=offset_by)
        
        X_3D_ = self.X_3D(X, offset_by)
        y_3D_ = self.y_3D(y, offset_by)
        
        return X_3D_, y_3D_
        