In [46]:
import pandas as pd
import numpy as np


class ScoreCalculator:
    """
        Calcuating various metrics on the relationship between your predictions, example predictions and validation data.
        Call compute_numerai_diagnostics(a pd.Series of your predictions on the validation data)
        Add method to compute more differnent diagonistcs
        Currently not throughly tested.
        Primarily based on: example.py 
    """
    def __init__(self, validation_data, example_preds):
        self.validation_data = validation_data
        self._rank_normalized_validation_targets = validation_data['rank_target'] 
        self._feature_col_names = [column_name for column_name in self.validation_data.columns if 'feature' in column_name]
        self.example_predictions = example_preds
        self._rank_normalized_example_predictions = example_preds['rank_example_prediction'] # hardcoded
    
    def score(self, df: pd.DataFrame)-> float:
        """
          # You should replace with lambda for speed
            utility to compute corr on a grouping of self._validation_data 
        """
        return _compute_corr(df['rank_target'], df['rank_prediction'])

    # suspect
    def richards_dependence(self, df, target_col, era_col, prediction_col) -> float: 
        """
            Measures the independendence of prediction with the targets
            
            Currently unused 
            example call:
            richards_dependence(df, 'target', 'era', 'prediction'))
            Source: Numerai Forumn user:richai @ https://forum.numer.ai/t/independence-and-sharpe/2560 | May 3 ,2021
        """  
        scores_by_era = df.groupby(era_col).apply(lambda d: d[[prediction_col, target_col]].corr()[target_col][0])
            
        # these need to be ranked within era so "error" makes sense
        df[prediction_col] = df.groupby(era_col)[prediction_col].rank(pct=True)
        df[target_col] = df.groupby(era_col)[target_col].rank(pct=True)

        df["era_score"] = df[era_col].map(scores_by_era)

        df["error"] = (df[target_col] - df[prediction_col]) ** 2
        df["1-error"] = 1 - df["error"]

        # Returns the correlation of the 1-error with the era_score
        # i.e. how dependent/correlated each prediction is with its era_score
        return df[["1-error", "era_score"]].corr()["era_score"][0]


    def rank_noramalize_series(self, col:pd.Series)-> pd.Series:
        """
            Compute the rank ordering of col. Scale each element of col between 0 and 1 based on their relative size
            Returns: a pd.Series
        """ 
        scaled_col = (col.rank(method="first") - 0.5) / len(col)
        scaled_col.index = col.index
        return scaled_col

 
    def compute_validation_corr(self)-> float:
        """
            pred: your predictions on the validation data.
            Compute your corr on the validation data.
            # need to call add_predictions_to_validation_df() before you call this or it will throw an derror
        """
        ranked_targets = self.validation_data['rank_target']
        ranked_preds = self.validation_data['rank_prediction']
        return self._compute_corr(ranked_targets, ranked_preds)


    def _compute_corr(self, a: pd.Series, b: pd.Series )->float:
        """
            Returns np.corrcoef on a and b. pass this only ranked correlations
        """
        return np.corrcoef(a, b)[0, 1] # not ranked 


    def compute_validation_std(self) -> float:
      """
          Returns the Standard Deviation of corr by era.
      """
      return self.create_per_era_grouper().std()

    def compute_validation_per_era_mean_corr(self)-> float:
      """
      Returns the mean corr by era.
      """
      return self.create_per_era_grouper().mean()

    def create_per_era_grouper(self) -> pd.Series:
      """
        Returns an array of era, Corr on validation targets for that era.
      """
      return self.validation_data.groupby("era").apply(lambda df: np.corrcoef(df['rank_target'], df['rank_prediction'])[0][1])

    def compute_validation_sharpe(self)-> float:
        """
            Computes your sharpe corr socre on each era 
            sharpe = average corr per era / std dev of corr per era. 
        """
        per_era_corr_grouper = self.create_per_era_grouper()
        mean_per_era_corr = per_era_corr_grouper.mean()
        std_per_era_corr = per_era_corr_grouper.std()
        return mean_per_era_corr / std_per_era_corr
        

    def compute_max_drawdown(self)-> float:
        """
            Copied from Numerai's example_model.py
            Max drawdown is the "largest cumulative decrease between any two eras in terms of validation correlation"
            In short this keeps a running total of corr between eras. Then it find the length of the largest decrease and returns that number. 
            Is an estimate of risk
        """
        validation_correlations = self.create_per_era_grouper() # this needs to be stored in the class variables do avoid doing it twice
        rolling_max = (validation_correlations + 1).cumprod().rolling(window=100, min_periods=1).max()                                                           
        daily_value = (validation_correlations + 1).cumprod()
        max_drawdown = -(rolling_max - daily_value).max()
        return max_drawdown


    def compute_feature_exposure(self)-> float:
        """
            The maximum corrilatiosn your predictions have with any single feature
            Copied from Numerai's example_model.py
        """
        # pred_valid_df = self.validation_data # unclear if the default to 
        # pred_valid_df['prediction'] = rank_noramalize_series(pred) # add prediction column

        # feature_names = [f for f in self.validation_data.columns if f.startswith("feature")]
        # feature_exposures = validation_data[feature_names].apply(lambda d: correlation(pred_valid_df['prediction'], d), axis=0) # axis =0 means by columns
        feature_exposures = []
        for col in self._feature_col_names:
          feature_exposure_for_col = self._compute_corr(self.validation_data[col], self.validation_data['prediction']) # does feature exposrue look at rank corr 
          feature_exposures.append(feature_exposure_for_col)
                                                             
        max_feature_exposure = np.max(np.abs(np.array(feature_exposures)))
        return max_feature_exposure

        # I don't understand this
    def compute_feature_neutral_corr_mean(self, pred:pd.Series)-> float:
        """
            The mean of your per era correlation after your predictions have been neutralized to all the features
            Copied from Numerai's example_model.py
        """
        pred_valid_df = self.validation_data # unclear if the default to 
        pred_valid_df['prediction'] = rank_noramalize_series(pred) # add prediction column

        feature_cols = [c for c in df.columns if c.startswith("feature")]
        pred_valid_df.loc[:, "neutral_sub"] = neutralize(pred_valid_df, ['prediction'],
                                            feature_cols)['prediction']
        
        # I the rank normalize within the lambda
        scores = df.groupby("era").apply(
            lambda x: self._compute_corr(rank_noramalize_series(x["neutral_sub"]), rank_noramalize_series(x['target']))).mean()
        return np.mean(scores)

    #suspect
    def neutralize(self, df, columns, by, proportion=1.0):
        """
            Copied as is from example_model.py
        """
        scores = df.loc[:, columns]
        exposures = df[by].values

        # constant column to make sure the series is completely neutral to exposures
        exposures = np.hstack(
            (exposures,
            np.asarray(np.mean(scores)) * np.ones(len(exposures)).reshape(-1, 1)))

        scores = scores - proportion * exposures.dot(
            np.linalg.pinv(exposures).dot(scores))
        return scores / scores.std()
    
    #suspect
    def compute_mmc_stats(self, pred:pd.Series) -> tuple:
        """
            Using example predictions as an estimate for the meta model, compute mmc stats
            Copied from example_model.py
            returns val_mmc_mean, corr_plus_mmc_sharpe, 

            Not refractored. Copied as is. Only variable and function names are changed

        """
        pred_valid_df = self.validation_data # unclear if the default to 
        pred_valid_df['prediction'] = rank_noramalize_series(pred) # add prediction column
        pred_valid_df['ExamplePreds'] = self.example_predictions
        mmc_scores = []
        corr_scores = []

        for _, x in validation_data.groupby("era"):
            series = self.neutralize_series(pd.Series(self.rank_noramalize_series(x['prediction'])),
                                    pd.Series(self.rank_noramalize_series(x["ExamplePreds"])))
            mmc_scores.append(np.cov(series, x['target'])[0, 1] / (0.29 ** 2))
            corr_scores.append(correlation(self.rank_noramalize_series(x['prediction']), x['target']))

        val_mmc_mean = np.mean(mmc_scores)
        val_mmc_std = np.std(mmc_scores)
        val_mmc_sharpe = val_mmc_mean / val_mmc_std
        corr_plus_mmcs = [c + m for c, m in zip(corr_scores, mmc_scores)]
        corr_plus_mmc_sharpe = np.mean(corr_plus_mmcs) / np.std(corr_plus_mmcs)
        corr_plus_mmc_mean = np.mean(corr_plus_mmcs)
        #corr_plus_mmc_sharpe_diff = corr_plus_mmc_sharpe - validation_sharpe

        # print(
        #     f"MMC Mean: {val_mmc_mean}\n"
        #     f"Corr Plus MMC Sharpe:{corr_plus_mmc_sharpe}\n"
        #     f"Corr Plus MMC Diff:{corr_plus_mmc_sharpe_diff}"
        # )
        return  val_mmc_mean, corr_plus_mmc_sharpe, 

    #suspect
    def neutralize_series(self, series, by, proportion=1.0):
        """
            Copied from example_model.py
            not refactored
        """
        scores = series.values.reshape(-1, 1)
        exposures = by.values.reshape(-1, 1)

        # this line makes series neutral to a constant column so that it's centered and for sure gets corr 0 with exposures
        exposures = np.hstack(
            (exposures,
            np.array([np.mean(series)] * len(exposures)).reshape(-1, 1)))

        correction = proportion * (exposures.dot(
            np.linalg.lstsq(exposures, scores, rcond=None)[0]))
        corrected_scores = scores - correction
        neutralized = pd.Series(corrected_scores.ravel(), index=series.index)
        return neutralized

    
    def compute_corr_with_example_preds(self, tournament_pred:pd.DataFrame) -> float:
        """
            Compute the rank corrilation between your tournament_pred and the example predictions
            tournament_pred: pd.DataFrame must have 'rank_prediction' column
            WORKS 
        """
        return self._compute_corr(tournament_pred['rank_prediction'], self._rank_normalized_example_predictions)
    
      # replace very bad
    def merge_pred_valid_df(self,pred: pd.Series) -> pd.DataFrame:
        """
            Add your predictions to self.validation_data in order to make calcuating the answers more efficnet 
        """
        pred_valid_df = self.validation_data # unclear if the default to 
        pred_valid_df['prediction'] = rank_noramalize_series(pred)
        return pred_valid_df        

    # repalced by 
    def compute_per_era_corr(self, pred: pd.Series) -> list:
        """
            Returns a list of tuples for representing (era, corr for era)
        """
        pred_valid_df = self.merge_pred_valid_df(pred)
        era_corr_list = []
        eras = list(pred_valid_df['era'].unique())
        for era in eras:
            local_era_targets = np.array(valid_df[valid_df['era'] == era]['target'])
            local_era_predictions = np.array(valid_df[valid_df['era'] == era]['prediction'])
            era_corr = self._compute_corr(local_era_targets, local_era_predictions)
            era_corr_list.append((era,era_corr))
        
        return era_corr_list

    # this is the main method that you call on your validation predictions
    # my_score_calculator = ScoreCalculator()
    # scores =my_score_calculator.compute_numerai_diagnostics(my_model.predict(validation_data[features]))
    # print(scores)

    def add_predictions_to_validation_df(self, tournament_preds:pd.DataFrame) -> None:
      """
        updates the self.validation_df with your prediction in tournament_predss
        tournament_df: pd.DataFrame Your predictions for this round.
        Must have index = 'id'
        Must have column 'prediction' 
      """
      valid_ids = self.validation_data.index # get all the ids with the valaidatino data
      preds_on_valid_data = tournament_preds.loc[valid_ids,:] # subset on the validation data
      self.validation_data['prediction'] = preds_on_valid_data['prediction']
      rows = self.validation_data.shape[0]
      self.validation_data['rank_prediction'] = self.validation_data['prediction'].rank() / rows
      return
    
    def compute_numerai_diagnostics(self, preds: pd.DataFrame):
      """
          Return a dataframe that is equivalent to the diagnostics tab on numerai
          preds: A dataframe of your model's prediction accross the entire live tournament data for this round.
            Must have index='id'
                      columns = 
      """
      self.add_predictions_to_validation_df(tournament_preds)
      diagnostics_df = pd.DataFrame()
      diagnostics_df['valid_sharpe'] = [self.compute_validation_sharpe()]
      diagnostics_df['valid_corr'] = [self.compute_validation_corr()]
      #diagnostics_df['valid_FNC'] = [self.compute_feature_neutral_corr_mean(tournament_preds)] # hard copied as is from example_model.py

      diagnostics_df['valid_SD'] = [self.compute_validation_std()]
      diagnostics_df['feature_exposure'] = [self.compute_feature_exposure()]
      diagnostics_df['max_drawdown'] = [self.compute_max_drawdown()]

      #val_mmc_mean, corr_plus_mmc_sharpe = self.compute_mmc_stats(tournament_preds) # Hard copied as is from example_model.py

      #diagnostics_df['corr_plus_MMC_sharpe'] = [corr_plus_mmc_sharpe]
      #diagnostics_df['MMC_mean '] = [val_mmc_mean]
      diagnostics_df['corr_with_example_preds '] = [self.compute_corr_with_example_preds(preds)]
      return diagnostics_df



## Class to ping scores. 

In [36]:
class NumeraiDataLoader:
  """
      Pings and cleans the data from Numerai
  """
  def ping_validation_data(self) -> pd.DataFrame:
            """
            Ping Numerai to create get the live tournament data and extact all the validation data.

            Adapted from : https://www.kaggle.com/code1110/numerai-tournament | May 3, 2021
            """
            tournament_data_url = 'https://numerai-public-datasets.s3-us-west-2.amazonaws.com/latest_numerai_tournament_data.csv.xz'
            tournament_df = pd.read_csv(tournament_data_url)
            valid_df = tournament_df[tournament_df["data_type"] == "validation"].reset_index(drop = True)
            feature_cols = valid_df.columns[valid_df.columns.str.startswith('feature')]

            map_floats_to_ints = {0.0 : 0, 0.25 : 1, 0.5 : 2, 0.75 : 3, 1.0 : 4}
            for col in feature_cols:
                valid_df[col] = valid_df[col].map(map_floats_to_ints).astype(np.uint8) # reduce space costs by casting features as ints
                
            valid_df["era"] = valid_df["era"].apply(lambda x: int(x[3:])) # strip the word 'era' from the era column
            valid_df.drop(columns=["data_type"], inplace=True)

            total_valid_rows = valid_df.shape[0]
            valid_df['rank_target'] = valid_df['target'].rank(method='first') / total_valid_rows
            valid_df.set_index('id', inplace=True)
            return valid_df

        # called during init # broken You need to specify that this is example preds over the entire df
  def ping_example_predictions(self)-> pd.DataFrame:
      """
          Create a dataframe of Id, Prediction, rank_prediction, where Id, is the id column in tournament_data.csv prediction is the numerai provided example model, and rank_prediction is the normalized prediction target
         	                  prediction  rank_prediction
          id		
          n0003aa52cab36c2	0.49	0.097334
          n000920ed083903f	0.49	0.097335
          n0038e640522c4a6	0.53	0.969455
          n004ac94a87dc54b	0.51	0.656894
          n0052fe97ea0c05f	0.50	0.332613
      """
      example_predictions_url = "https://numerai-public-datasets.s3-us-west-2.amazonaws.com/latest_numerai_example_predictions_data.csv.xz"
      example_preds =  pd.read_csv(example_predictions_url, index_col=0)
      total_example_prediction_rows = example_preds.shape[0]
      example_preds['rank_example_prediction'] = example_preds['prediction'].rank(method='first') / total_example_prediction_rows
      return example_preds


### tester

In [37]:
myLoader = NumeraiDataLoader()

validation_data = myLoader.ping_validation_data()
print(validation_data.head())
print(validation_data.shape)

example_preds = myLoader.ping_example_predictions()
print(example_preds.head())
print(example_preds.shape)

                  era  feature_intelligence1  ...  target  rank_target
id                                            ...                     
n0003aa52cab36c2  121                      1  ...    0.25     0.049935
n000920ed083903f  121                      3  ...    0.50     0.249755
n0038e640522c4a6  121                      4  ...    1.00     0.950058
n004ac94a87dc54b  121                      3  ...    0.50     0.249762
n0052fe97ea0c05f  121                      1  ...    0.75     0.750223

[5 rows x 313 columns]
(137779, 313)
                  prediction  rank_example_prediction
id                                                   
n0003aa52cab36c2        0.49                 0.097334
n000920ed083903f        0.49                 0.097335
n0038e640522c4a6        0.53                 0.969455
n004ac94a87dc54b        0.51                 0.656894
n0052fe97ea0c05f        0.50                 0.332613
(1714763, 2)


In [38]:

tournament_preds = np.cos(example_preds)
tournament_preds_rows = tournament_preds.shape[0]
tournament_preds['rank_prediction'] = tournament_preds['prediction'].rank(method='first') / tournament_preds_rows

tournament_preds

Unnamed: 0_level_0,prediction,rank_example_prediction,rank_prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
n0003aa52cab36c2,0.882333,0.995267,0.667388
n000920ed083903f,0.882333,0.995267,0.667389
n0038e640522c4a6,0.862807,0.565749,0.005497
n004ac94a87dc54b,0.872745,0.791893,0.120487
n0052fe97ea0c05f,0.877583,0.945192,0.343107
...,...,...,...
nffa71c03a5a27a2,0.877583,0.791894,0.667387
nffd391982772aeb,0.877583,0.791893,0.667388
nffda15ceb83f190,0.872745,0.637527,0.343106
nffda7325b750430,0.872745,0.637526,0.343107


##Test Calc

In [47]:
calc = ScoreCalculator(validation_data, example_preds)
# calc.add_predictions_to_validation_df(tournament_preds)
# print(calc.compute_corr_with_example_preds(tournament_preds)) # works
# print(calc.compute_validation_corr()) # works
# print(calc.compute_validation_std())# works
# print(calc.compute_feature_exposure()) # works I am choosing to do rank corrilation
# print(calc.compute_validation_sharpe()) # works
# print(calc.compute_max_drawdown()) # works


d = calc.compute_numerai_diagnostics(tournament_preds)
d


Unnamed: 0,valid_sharpe,valid_corr,valid_SD,feature_exposure,max_drawdown,corr_with_example_preds
0,-0.998927,-0.021054,0.026961,0.246548,-0.513604,-0.881295


In [None]:
compute_numerai_diagnostics