In [1]:
import numpy as np
import pandas as pd
from data_loader import load_names_from_web, holdout_split, year_split
from sklearn.metrics import mean_squared_log_error, mean_absolute_error
from scipy.stats import kendalltau
import warnings
warnings.filterwarnings('ignore')

### Load data

In [2]:
dfraw = load_names_from_web(category='national', hide_pre_1937=True, use_existing_files=True)
traintestval, holdout = year_split(dfraw)
trainval, test = year_split(traintestval)

In [3]:
# function to get all names known to the model
def get_all_known_names(data):

    names = data.groupby(['state', 'name', 'M/F']).size().reset_index()
    names = names[['state', 'name', 'M/F']]
    return names

In [4]:
# function to select only the names that meet a certain popularity threshold
def select_top_names(data, first_year_to_predict, cutoff=10):
    '''
    Select names that have had at least one year at or above the cutoff count?
    Could also try to redo this to do the cutoff for the most recent known year's data
    '''

    data_noleak = data[data['year'] < first_year_to_predict]
    names = data_noleak[data_noleak['count'] >= cutoff][['state', 'name', 'M/F']].drop_duplicates()
    data = names.merge(data, how='left', on=['state', 'name', 'M/F'])

    return data

### Evaluation function: Score y_pred vs. y_true

In [5]:
def evaluate(predictor, data_held_out, first_year_to_predict, metric='msle'):

    most_recent_year = data_held_out['year'].max()
    
    years_to_predict = range(first_year_to_predict, most_recent_year+1)

    # only allow the model to see data from before the year to predict
    historical_data = data_held_out[data_held_out['year'] < first_year_to_predict]

    # get our model's predictions
    predictions = predictor.predict(historical_data, years_to_predict)

    all_known_names = get_all_known_names(historical_data)

    for year_to_predict in years_to_predict:

        print(f'Predictions for {year_to_predict}:')

        names_to_predict = all_known_names.copy()
        names_to_predict['year'] = year_to_predict

        observed = names_to_predict.merge(data_held_out, how='left', on=['state', 'name', 'M/F', 'year'])

        # for now, fill in missing values with 2, same as FiveThirtyEight did;
        # reasoning: missing values could be 0 to 4, so average is 2
        observed['y'] = observed['count'].fillna(2)

        score_df = observed.merge(predictions, how='left', on=['state', 'name', 'M/F', 'year'], suffixes=('_true', '_pred'))

        y_true = score_df['y_true']
        y_pred = score_df['y_pred']

        if (metric == 'msle') or (metric == 'all'):
            loss = mean_squared_log_error(y_true, y_pred)
            print(f'MSLE loss: {loss}')

        y_true = y_true.rank()
        y_pred = y_pred.rank()

        if (metric == 'rank') or (metric == 'all'):
            score = np.sum(np.abs(y_true-y_pred))/(len(y_true)*(len(y_true)-1))
            print(f'Rank score: {score}')

        if (metric == 'rank_mae') or (metric == 'all'):
            loss = mean_absolute_error(y_true, y_pred)
            print(f'Rank MAE loss: {loss}')

        if (metric == 'kendalltau') or (metric == 'all'):
            tau, _ = kendalltau(y_true, y_pred)
            print(f'Tau: {tau}')
        
        top_F_true = score_df[score_df['M/F'] == 'F'][['name', 'y_true']].sort_values(by='y_true', ascending=False).reset_index(drop=True)
        top_F_pred = score_df[score_df['M/F'] == 'F'][['name', 'y_pred']].sort_values(by='y_pred', ascending=False).reset_index(drop=True)
        top_M_true = score_df[score_df['M/F'] == 'M'][['name', 'y_true']].sort_values(by='y_true', ascending=False).reset_index(drop=True)
        top_M_pred = score_df[score_df['M/F'] == 'M'][['name', 'y_pred']].sort_values(by='y_pred', ascending=False).reset_index(drop=True)
        top = pd.concat([top_F_true, top_F_pred, top_M_true, top_M_pred], axis=1, ignore_index=True)
        display(top.head(5))

### Create a dummy predictor (assumes name counts will stay the same as previous year)

In [6]:
class DummyPredictor():

    def __init__(self, strategy='naive'):
        self.strategy = strategy
    
    def predict(self, historical_data, years_to_predict):

        all_known_names = get_all_known_names(historical_data)

        predictions = []
        previous_year_data = historical_data[historical_data['year'] == years_to_predict[0] - 1].drop(columns=['year'])

        for year_to_predict in years_to_predict:

            prediction = all_known_names.copy()
            prediction['year'] = year_to_predict

            if self.strategy == 'naive':
                prediction = prediction.merge(previous_year_data, how='left', on=['state', 'name', 'M/F'])
                prediction['y'] = prediction['count'].fillna(2)
            elif self.strategy == 'mean':
                prediction['y'] = previous_year_data['count'].mean()

            predictions.append(prediction)

        predictions = pd.concat(predictions, ignore_index=True)

        return predictions

### Classify whether a name has peaked yet

In [7]:
def classify(df):

    '''
    PEAKS
    '''

    peaks = df.loc[df.groupby(['state', 'name', 'M/F'])['count'].idxmax()][['state', 'name', 'M/F', 'year']]
    peaks = peaks.rename(columns={'year': 'peak_year'})
    df = df.merge(peaks, how='left', on=['state', 'name', 'M/F'])
    df['has_peaked'] = df.apply(lambda row: 1 if row['year'] >= row['peak_year'] else 0, axis=1)
    return df

### Load actor and character name data

In [8]:
def load_movie_data():

    movies = pd.read_csv('Data/cleaned_cast_and_characters.csv')
    movies['release_year'] = movies['release_date'].fillna('0000').str[:4].astype(int)
    return movies

movies = load_movie_data()

### Create our prediction model

In [9]:
from sklearn import preprocessing
from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import HistGradientBoostingRegressor

class MyPredictor():

    def __init__(self):
        
        cols_to_keep = ['this_year', 'M/F', 'sum', 'median_age', 'thisyear_count', 'diff', 'thisyear_count_opp', 'diff2', 'shift', 'pct_change', 'accel', 'first_letter_1_pct', 'first_letter_2_pct', 'first_letter_3_pct', 'after_peak', 'years_since_peak', 'actor_recency_score', 'character_recency_score']
        categorical_features = ['M/F', 'after_peak']
        max_leaf_nodes = 63 # 31
        max_iter = 200 # 100
        loss = 'absolute_error' # abs better than default

        categorical_features = [True if f in categorical_features else False for f in cols_to_keep]
        
        self.pipe = make_pipeline(
            ColumnTransformer(
                transformers=[
                    ('cols_to_keep', 'passthrough', cols_to_keep),
                ], remainder='drop'),
            HistGradientBoostingRegressor(
                random_state=0,
                categorical_features=categorical_features,
                max_leaf_nodes=max_leaf_nodes,
                max_iter=max_iter,
                loss=loss
            )
        )

        self.gender_encoding = {'M': 0, 'F': 1}

    def preprocess(self, df, latest_known_year):

        # find median age of people with name, 
        # total born with that name,
        # and latest year's count

        df = df.copy()

        df = classify(df)
        df['after_peak'] = df['peak_year'].apply(lambda x: 1 if x < latest_known_year else 0)

        df = df.sort_values(by='year')
        df['cumsum'] = df.groupby(['state', 'name', 'M/F'])['count'].cumsum()
        df['sum'] = df.groupby(['state', 'name', 'M/F'])['count'].transform('sum')

        df['diff'] = df.groupby(['state', 'name', 'M/F'])['count'].diff()
        df['diff2'] = df.groupby(['state', 'name', 'M/F'])['count'].diff(2)
        df['shift'] = df.groupby(['state', 'name', 'M/F'])['count'].shift()
        df['pct_change'] = df.groupby(['state', 'name', 'M/F'])['count'].pct_change()
        df['accel'] = df.groupby(['state', 'name', 'M/F'])['diff'].diff()

        percentage_of_total_per_year = {}
        percentage_change_per_year = {}

        def first_letters(df, n, percentage_of_total_per_year, percentage_change_per_year):
            df['first_letter_'+str(n)] = df['name'].str[0:n].str.lower()
            total_names_per_year = df.groupby(['year', 'state', 'M/F'])['count'].sum()
            letter_names_per_year = df.groupby(['year', 'state', 'M/F', 'first_letter_'+str(n)])['count'].sum()
            percentage_of_total_per_year[n] = (letter_names_per_year / total_names_per_year).rename('first_letter_'+str(n)+'_pct')
            percentage_change_per_year[n] = percentage_of_total_per_year[n].groupby(['state', 'M/F', 'first_letter_'+str(n)]).pct_change().rename('first_letter_'+str(n)+'_pct_change')
            return df
        
        for n in range(1, 4):
            df = first_letters(df, n, percentage_of_total_per_year, percentage_change_per_year)
        
        medians = df[df['cumsum'] >= df['sum']/2]
        medians = medians.drop_duplicates(subset=['state', 'name', 'M/F'], keep='first')
        medians['median_age'] = latest_known_year - medians['year']

        medians = medians.drop(['count', 'cumsum', 'diff', 'shift', 'pct_change', 'accel', 'diff2'], axis=1)

        thisyear = df[df['year'] == latest_known_year][['state', 'name', 'M/F', 'count', 'diff', 'shift', 'pct_change', 'accel', 'diff2']].rename(columns={'count': 'thisyear_count'})

        sex_counts = thisyear.groupby(['state', 'name', 'M/F'])['thisyear_count'].sum()
        thisyear_swapped = thisyear.copy()
        thisyear_swapped['M/F'] = thisyear_swapped['M/F'].replace({'M': 'F', 'F': 'M'})
        thisyear_swapped = thisyear_swapped.merge(sex_counts, how='left', on=['state', 'name', 'M/F'], suffixes=('', '_opp'))
        thisyear_swapped['M/F'] = thisyear_swapped['M/F'].replace({'M': 'F', 'F': 'M'})
        thisyear_swapped['thisyear_count_opp'] = thisyear_swapped['thisyear_count_opp'].fillna(0)
        thisyear = thisyear_swapped

        df = medians.merge(thisyear, how='left', on=['state', 'name', 'M/F']).rename(columns={'year': 'median_year'})

        # this is sort of a rough assumption that if a row didn't exist for this year, not only is the count 0, but so is the diff, shift and pct_change. not always true if the prior year had a count, but often true
        df[['thisyear_count', 'diff', 'shift', 'pct_change', 'accel', 'diff2']] = df[['thisyear_count', 'diff', 'shift', 'pct_change', 'accel', 'diff2']].fillna(0)

        df['after_peak'] = df['after_peak'].fillna(1)
        df['years_since_peak'] = latest_known_year - df['peak_year']

        df['this_year'] = latest_known_year # adding this does seem to improve accuracy. we need to know how far along we are in time

        for n in range(1, 4):
            df = df.merge(percentage_of_total_per_year[n], how='left', left_on=['this_year', 'state', 'M/F', 'first_letter_'+str(n)], right_on=['year', 'state', 'M/F', 'first_letter_'+str(n)])
            df = df.merge(percentage_change_per_year[n], how='left', left_on=['this_year', 'state', 'M/F', 'first_letter_'+str(n)], right_on=['year', 'state', 'M/F', 'first_letter_'+str(n)])
            df['first_letter_'+str(n)+'_pct'] = df['first_letter_'+str(n)+'_pct'].fillna(0)
            df['first_letter_'+str(n)+'_pct_change'] = df['first_letter_'+str(n)+'_pct_change'].fillna(0)

        # change M/F to 0/1 so it works with various models
        # (even HistGradientBoostingRegressor, which accepts categorical values,
        # still needs those values to be numbers not strings)
        df['M/F'] = df['M/F'].map(self.gender_encoding)

        # MOVIES
        known_movies = movies[movies['release_year'] <= latest_known_year] # avoid data leakage
        known_movies['years_since_release'] = latest_known_year - known_movies['release_year'] + 1
        known_movies['recency_score'] = 0.5 ** known_movies['years_since_release']
        actor_name_scores = known_movies.groupby('actor_first_name')['recency_score'].sum().reset_index().rename(columns={'actor_first_name': 'name', 'recency_score': 'actor_recency_score'})
        character_name_scores = known_movies.groupby('character_first_name')['recency_score'].sum().reset_index().rename(columns={'character_first_name': 'name', 'recency_score': 'character_recency_score'})
        df = df.merge(actor_name_scores, how='left', on='name')
        df = df.merge(character_name_scores, how='left', on='name')
        df['actor_recency_score'] = df['actor_recency_score'].fillna(0)
        df['character_recency_score'] = df['character_recency_score'].fillna(0)

        return df
    
    def fit(self, historical_data, first_year_to_predict, years_to_fit=1, weight_decay=0.9):
        # first things first, we don't want to know about future data
        historical_data = historical_data[historical_data['year'] < first_year_to_predict]
        # at this point the data we don't want to know should be inaccessible

        X_all = pd.DataFrame()
        y_all = pd.Series()

        # each year_to_fit is the year that's essentially our y for that loop
        for year_to_fit in range(first_year_to_predict - years_to_fit, first_year_to_predict):

            # now we "know" even less for X
            X = historical_data[historical_data['year'] < year_to_fit]
            y = historical_data[historical_data['year'] == year_to_fit]

            X = self.preprocess(X, latest_known_year=year_to_fit - 1)
            y = y[['state', 'name', 'M/F', 'count']].rename(columns={'count': 'y'})
            y['M/F'] = y['M/F'].map(self.gender_encoding)

            data = X.merge(y, how='left', on=['state', 'name', 'M/F'])
            data['y'] = data['y'].fillna(0)

            X = data.drop(columns=['y'])
            y = data['y']
            X['sample_weight'] = weight_decay ** (first_year_to_predict - year_to_fit)

            X_all = pd.concat([X_all, X], ignore_index=True)
            y_all = pd.concat([y_all, y], ignore_index=True)
        
        temp = X_all.copy()
        temp['y'] = y_all

        sample_weights = X_all['sample_weight']
        X_all = X_all.drop(columns=['sample_weight'])

        self.pipe.fit(X_all, y_all, **{'histgradientboostingregressor__sample_weight': sample_weights})
        # this seems like a silly way to pass params to individual steps of the pipeline, but it's true. See: https://stackoverflow.com/questions/36205850/sklearn-pipeline-applying-sample-weights-after-applying-a-polynomial-feature-t

    def predict(self, historical_data, years_to_predict):

        predictions = []

        for year_to_predict in years_to_predict:

            df = self.preprocess(historical_data, latest_known_year=year_to_predict - 1)

            df['y'] = self.pipe.predict(df)

            df['year'] = year_to_predict

            # if we want to simply, do the following; 
            # but for now, might be useful to see all data displayed.
            # df = df[['state', 'year', 'name', 'M/F', 'y']]

            predictions.append(df)

            assumed_new_year_of_historical_data = df[['state', 'year', 'name', 'M/F', 'y']].rename(columns={'y': 'count'})
            assumed_new_year_of_historical_data['M/F'] = assumed_new_year_of_historical_data['M/F'].map({v: k for k, v in self.gender_encoding.items()})
            historical_data = pd.concat([historical_data, assumed_new_year_of_historical_data], ignore_index=True)

        predictions = pd.concat(predictions, ignore_index=True)

        # we have to reverse the mapping to send our predictions
        # (at least the way we currently have it set up)
        predictions['M/F'] = predictions['M/F'].map({v: k for k, v in self.gender_encoding.items()})

        predictions.loc[predictions['y'] < 4.5, 'y'] = 2

        return predictions

### Get scores for dummy model

In [10]:
first_year_to_predict = 2013
cutoff = 100
data_to_eval = select_top_names(holdout, first_year_to_predict=first_year_to_predict, cutoff=cutoff)
evaluate(predictor=DummyPredictor(strategy='naive'), data_held_out=data_to_eval, first_year_to_predict=first_year_to_predict, metric='all')

Predictions for 2013:
MSLE loss: 0.08223718946872904
Rank score: 0.027165611041145767
Rank MAE loss: 186.60058224163026
Tau: 0.9257442677714379


Unnamed: 0,0,1,2,3,4,5,6,7
0,Sophia,21244.0,Sophia,22325.0,Noah,18276.0,Jacob,19094.0
1,Emma,20958.0,Emma,20964.0,Jacob,18185.0,Mason,18987.0
2,Olivia,18446.0,Isabella,19126.0,Liam,18150.0,Ethan,17687.0
3,Isabella,17663.0,Olivia,17328.0,Mason,17721.0,Noah,17366.0
4,Ava,15265.0,Ava,15545.0,William,16654.0,William,16904.0


Predictions for 2014:
MSLE loss: 0.11678471442069187
Rank score: 0.03559105599212376
Rank MAE loss: 244.4749636098981
Tau: 0.9025481322343588


Unnamed: 0,0,1,2,3,4,5,6,7
0,Emma,20958.0,Sophia,22325.0,Noah,19329.0,Jacob,19094.0
1,Olivia,19829.0,Emma,20964.0,Liam,18478.0,Mason,18987.0
2,Sophia,18636.0,Isabella,19126.0,Mason,17212.0,Ethan,17687.0
3,Isabella,17125.0,Olivia,17328.0,Jacob,16904.0,Noah,17366.0
4,Ava,15721.0,Ava,15545.0,William,16839.0,William,16904.0


Predictions for 2015:
MSLE loss: 0.1597163216092688
Rank score: 0.043047821753874706
Rank MAE loss: 295.69548762736537
Tau: 0.8810481150454055


Unnamed: 0,0,1,2,3,4,5,6,7
0,Emma,20478.0,Sophia,22325.0,Noah,19666.0,Jacob,19094.0
1,Olivia,19717.0,Emma,20964.0,Liam,18400.0,Mason,18987.0
2,Sophia,17445.0,Isabella,19126.0,Mason,16657.0,Ethan,17687.0
3,Ava,16399.0,Olivia,17328.0,Jacob,15979.0,Noah,17366.0
4,Isabella,15646.0,Ava,15545.0,William,15935.0,William,16904.0


Predictions for 2016:
MSLE loss: 0.22471799079602142
Rank score: 0.05074982999586989
Rank MAE loss: 348.60058224163026
Tau: 0.8588455930516347


Unnamed: 0,0,1,2,3,4,5,6,7
0,Emma,19536.0,Sophia,22325.0,Noah,19171.0,Jacob,19094.0
1,Olivia,19397.0,Emma,20964.0,Liam,18259.0,Mason,18987.0
2,Ava,16338.0,Isabella,19126.0,William,15798.0,Ethan,17687.0
3,Sophia,16171.0,Olivia,17328.0,Mason,15282.0,Noah,17366.0
4,Isabella,14846.0,Ava,15545.0,James,14905.0,William,16904.0


Predictions for 2017:
MSLE loss: 0.29673044717737596
Rank score: 0.05796626957007656
Rank MAE loss: 398.1703056768559
Tau: 0.8394971066188995


Unnamed: 0,0,1,2,3,4,5,6,7
0,Emma,19857.0,Sophia,22325.0,Liam,18849.0,Jacob,19094.0
1,Olivia,18763.0,Emma,20964.0,Noah,18473.0,Mason,18987.0
2,Ava,16005.0,Isabella,19126.0,William,15023.0,Ethan,17687.0
3,Isabella,15231.0,Olivia,17328.0,James,14346.0,Noah,17366.0
4,Sophia,14925.0,Ava,15545.0,Logan,14042.0,William,16904.0


Predictions for 2018:
MSLE loss: 0.3625212319106479
Rank score: 0.06386382886385111
Rank MAE loss: 438.6806404657933
Tau: 0.8230872112987722


Unnamed: 0,0,1,2,3,4,5,6,7
0,Emma,18801.0,Sophia,22325.0,Liam,19957.0,Jacob,19094.0
1,Olivia,18039.0,Emma,20964.0,Noah,18403.0,Mason,18987.0
2,Ava,15010.0,Isabella,19126.0,William,14633.0,Ethan,17687.0
3,Isabella,14548.0,Olivia,17328.0,James,13629.0,Noah,17366.0
4,Sophia,13991.0,Ava,15545.0,Oliver,13487.0,William,16904.0


Predictions for 2019:
MSLE loss: 0.436664592019942
Rank score: 0.06951466231320472
Rank MAE loss: 477.4962154294032
Tau: 0.8071419430827314


Unnamed: 0,0,1,2,3,4,5,6,7
0,Olivia,18549.0,Sophia,22325.0,Liam,20601.0,Jacob,19094.0
1,Emma,17194.0,Emma,20964.0,Noah,19145.0,Mason,18987.0
2,Ava,14528.0,Isabella,19126.0,Oliver,13953.0,Ethan,17687.0
3,Sophia,13787.0,Olivia,17328.0,William,13629.0,Noah,17366.0
4,Isabella,13376.0,Ava,15545.0,Elijah,13393.0,William,16904.0


Predictions for 2020:
MSLE loss: 0.5326464911192653
Rank score: 0.0750478437924282
Rank MAE loss: 515.5036390101892
Tau: 0.7919159082477174


Unnamed: 0,0,1,2,3,4,5,6,7
0,Olivia,17664.0,Sophia,22325.0,Liam,19828.0,Jacob,19094.0
1,Emma,15680.0,Emma,20964.0,Noah,18407.0,Mason,18987.0
2,Ava,13179.0,Isabella,19126.0,Oliver,14261.0,Ethan,17687.0
3,Charlotte,13083.0,Olivia,17328.0,Elijah,13172.0,Noah,17366.0
4,Sophia,13070.0,Ava,15545.0,William,12643.0,William,16904.0


Predictions for 2021:
MSLE loss: 0.6229610804255129
Rank score: 0.08124008397536514
Rank MAE loss: 558.0381368267831
Tau: 0.7748132798803677


Unnamed: 0,0,1,2,3,4,5,6,7
0,Olivia,17798.0,Sophia,22325.0,Liam,20365.0,Jacob,19094.0
1,Emma,15510.0,Emma,20964.0,Noah,18849.0,Mason,18987.0
2,Charlotte,13336.0,Isabella,19126.0,Oliver,14683.0,Ethan,17687.0
3,Amelia,13007.0,Olivia,17328.0,Elijah,12774.0,Noah,17366.0
4,Ava,12830.0,Ava,15545.0,James,12429.0,William,16904.0


Predictions for 2022:
MSLE loss: 0.7121785823166749
Rank score: 0.08621808462507864
Rank MAE loss: 592.2320232896652
Tau: 0.7610733881152087


Unnamed: 0,0,1,2,3,4,5,6,7
0,Olivia,16573.0,Sophia,22325.0,Liam,20456.0,Jacob,19094.0
1,Emma,14435.0,Emma,20964.0,Noah,18621.0,Mason,18987.0
2,Charlotte,12891.0,Isabella,19126.0,Oliver,15076.0,Ethan,17687.0
3,Amelia,12333.0,Olivia,17328.0,James,12028.0,Noah,17366.0
4,Sophia,12310.0,Ava,15545.0,Elijah,11979.0,William,16904.0


### Get scores for our best model

In [11]:
first_year_to_predict = 2013
cutoff = 100
movies = movies[movies['release_year'] <= first_year_to_predict - 1]
my_predictor = MyPredictor()
data_to_fit = select_top_names(traintestval, first_year_to_predict=first_year_to_predict, cutoff=cutoff)
data_to_eval = select_top_names(holdout, first_year_to_predict=first_year_to_predict, cutoff=cutoff)
my_predictor.fit(historical_data=data_to_fit, first_year_to_predict=first_year_to_predict, years_to_fit=30)
evaluate(predictor=my_predictor, data_held_out=data_to_eval, first_year_to_predict=first_year_to_predict, metric='all')

Predictions for 2013:
MSLE loss: 0.07171282574972478
Rank score: 0.025286697211254157
Rank MAE loss: 173.6943231441048
Tau: 0.9293805440453246


Unnamed: 0,0,1,2,3,4,5,6,7
0,Sophia,21244.0,Emma,20975.010064,Noah,18276.0,Jacob,19655.52919
1,Emma,20958.0,Sophia,20164.833858,Jacob,18185.0,Mason,19274.746307
2,Olivia,18446.0,Isabella,20156.88511,Liam,18150.0,Liam,15862.453096
3,Isabella,17663.0,Ava,13982.837335,Mason,17721.0,Ethan,14962.63482
4,Ava,15265.0,Olivia,13825.663223,William,16654.0,Jayden,14102.641506


Predictions for 2014:
MSLE loss: 0.1038895432324433
Rank score: 0.03238830320726645
Rank MAE loss: 222.47525473071326
Tau: 0.9098227523928353


Unnamed: 0,0,1,2,3,4,5,6,7
0,Emma,20958.0,Isabella,20380.173581,Noah,19329.0,Jacob,19756.670064
1,Olivia,19829.0,Emma,20195.887472,Liam,18478.0,Mason,19448.384778
2,Sophia,18636.0,Sophia,19447.239138,Mason,17212.0,Jackson,15802.146078
3,Isabella,17125.0,Emily,13639.583505,Jacob,16904.0,Christopher,14530.654802
4,Ava,15721.0,Mia,13408.560123,William,16839.0,Joseph,14526.660044


Predictions for 2015:
MSLE loss: 0.1388572746775794
Rank score: 0.03848545974647611
Rank MAE loss: 264.3566229985444
Tau: 0.8916996191502585


Unnamed: 0,0,1,2,3,4,5,6,7
0,Emma,20478.0,Sophia,20173.9359,Noah,19666.0,Jacob,19667.29881
1,Olivia,19717.0,Isabella,19866.358644,Liam,18400.0,Mason,19104.81753
2,Sophia,17445.0,Emma,19790.804898,Mason,16657.0,Jackson,15802.095627
3,Ava,16399.0,Mia,14873.564153,Jacob,15979.0,Christopher,15297.046594
4,Isabella,15646.0,Ava,14053.888496,William,15935.0,Joseph,14610.002418


Predictions for 2016:
MSLE loss: 0.1784742743206004
Rank score: 0.04437064354483352
Rank MAE loss: 304.7819505094614
Tau: 0.8749969140743702


Unnamed: 0,0,1,2,3,4,5,6,7
0,Emma,19536.0,Emma,19763.234808,Noah,19171.0,Jacob,19746.426337
1,Olivia,19397.0,Isabella,19695.950054,Liam,18259.0,Mason,19099.129148
2,Ava,16338.0,Sophia,19680.934612,William,15798.0,Jackson,14639.448138
3,Sophia,16171.0,Mia,15253.387397,Mason,15282.0,Christopher,14578.801426
4,Isabella,14846.0,Olivia,13930.73466,James,14905.0,Jayden,14302.165778


Predictions for 2017:
MSLE loss: 0.2214466766697961
Rank score: 0.049592594028865844
Rank MAE loss: 340.65152838427946
Tau: 0.859431208917134


Unnamed: 0,0,1,2,3,4,5,6,7
0,Emma,19857.0,Emma,19767.178632,Liam,18849.0,Jacob,19679.205547
1,Olivia,18763.0,Isabella,19720.746866,Noah,18473.0,Mason,18887.58111
2,Ava,16005.0,Sophia,19700.954894,William,15023.0,Jayden,14471.271371
3,Isabella,15231.0,Mia,14383.508213,James,14346.0,Michael,14299.666201
4,Sophia,14925.0,Olivia,13906.061668,Logan,14042.0,Aiden,14057.147171


Predictions for 2018:
MSLE loss: 0.26042608458520244
Rank score: 0.0544470728244928
Rank MAE loss: 373.99694323144104
Tau: 0.8459488571945593


Unnamed: 0,0,1,2,3,4,5,6,7
0,Emma,18801.0,Emma,19739.731967,Liam,19957.0,Jacob,19680.948128
1,Olivia,18039.0,Sophia,19723.907059,Noah,18403.0,Mason,19099.129148
2,Ava,15010.0,Isabella,19720.194151,William,14633.0,Aiden,14152.436744
3,Isabella,14548.0,Olivia,13772.190026,James,13629.0,Jayden,14149.976853
4,Sophia,13991.0,Ava,13635.224252,Oliver,13487.0,Michael,13860.483189


Predictions for 2019:
MSLE loss: 0.3094261117062854
Rank score: 0.05969856344655852
Rank MAE loss: 410.0694323144105
Tau: 0.8314042478822724


Unnamed: 0,0,1,2,3,4,5,6,7
0,Olivia,18549.0,Emma,19740.251506,Liam,20601.0,Jacob,19680.087126
1,Emma,17194.0,Sophia,19696.285467,Noah,19145.0,Mason,19235.747656
2,Ava,14528.0,Isabella,19693.512723,Oliver,13953.0,Aiden,14182.035445
3,Sophia,13787.0,Olivia,13774.938009,William,13629.0,Jayden,14146.595384
4,Isabella,13376.0,Abigail,13769.819154,Elijah,13393.0,Joseph,13865.750503


Predictions for 2020:
MSLE loss: 0.36604384540377155
Rank score: 0.06458980848285115
Rank MAE loss: 443.6673944687045
Tau: 0.8171233283792322


Unnamed: 0,0,1,2,3,4,5,6,7
0,Olivia,17664.0,Emma,19739.99613,Liam,19828.0,Jacob,19681.231033
1,Emma,15680.0,Sophia,19696.333862,Noah,18407.0,Mason,19099.701911
2,Ava,13179.0,Isabella,19692.825519,Oliver,14261.0,Aiden,14019.753635
3,Charlotte,13083.0,Olivia,13776.827656,Elijah,13172.0,Jayden,13994.529058
4,Sophia,13070.0,Mia,13644.917358,William,12643.0,Jackson,13846.020062


Predictions for 2021:
MSLE loss: 0.43454661510088594
Rank score: 0.07028755862202249
Rank MAE loss: 482.80524017467246
Tau: 0.8018339215732352


Unnamed: 0,0,1,2,3,4,5,6,7
0,Olivia,17798.0,Emma,19740.031426,Liam,20365.0,Jacob,19681.231033
1,Emma,15510.0,Sophia,19692.485222,Noah,18849.0,Mason,19098.668118
2,Charlotte,13336.0,Isabella,19692.305979,Oliver,14683.0,Jayden,14009.773904
3,Amelia,13007.0,Olivia,13774.418393,Elijah,12774.0,James,13841.847118
4,Ava,12830.0,Mia,13770.281234,James,12429.0,Matthew,13807.551291


Predictions for 2022:
MSLE loss: 0.49964373174327265
Rank score: 0.07470154182991619
Rank MAE loss: 513.1248908296943
Tau: 0.7886319745112321


Unnamed: 0,0,1,2,3,4,5,6,7
0,Olivia,16573.0,Emma,19740.031426,Liam,20456.0,Jacob,19680.56954
1,Emma,14435.0,Sophia,19692.485222,Noah,18621.0,Mason,19019.222997
2,Charlotte,12891.0,Isabella,19692.392608,Oliver,15076.0,Jayden,14007.333791
3,Amelia,12333.0,Olivia,13774.418393,James,12028.0,Matthew,13805.882334
4,Sophia,12310.0,Abigail,13632.163573,Elijah,11979.0,James,13776.137824
