In [1]:
import numpy as np
import pandas as pd
from data_loader import load_names_from_web, holdout_split
from sklearn.metrics import mean_squared_log_error

In [2]:
dfraw = load_names_from_web(category='national', hide_pre_1937=True, use_existing_files=True)
traintestval, holdout = holdout_split(dfraw)
trainval, test = holdout_split(traintestval)

In [3]:
test

Unnamed: 0,state,year,name,M/F,count
0,US,2000,Madison,F,19968
1,US,2000,Olivia,F,12854
2,US,2000,Sydney,F,10244
3,US,2000,Jennifer,F,9389
4,US,2000,Amanda,F,8557
...,...,...,...,...,...
212633,US,2019,Zaheen,M,5
212634,US,2019,Zahi,M,5
212635,US,2019,Zaymar,M,5
212636,US,2019,Zeo,M,5


In [4]:
def get_all_known_names(data):

    names = data.groupby(['state', 'name', 'M/F']).size().reset_index()
    names = names[['state', 'name', 'M/F']]
    return names

In [5]:
def select_top_names(data, first_year_to_predict, cutoff=10):
    '''
    Select names that have had at least one year at or above the cutoff count?
    Could also try to redo this to do the cutoff for the most recent known year's data
    '''

    data_noleak = data[data['year'] < first_year_to_predict]
    names = data_noleak[data_noleak['count'] >= cutoff][['state', 'name', 'M/F']].drop_duplicates()
    # display(names)
    data = names.merge(data, how='left', on=['state', 'name', 'M/F'])
    # display(data)

    return data

In [19]:
def evaluate(predictor, data_held_out, first_year_to_predict, metric='msle'):

    # display('data_held_out:')
    # display(data_held_out)

    most_recent_year = data_held_out['year'].max()
    
    years_to_predict = range(first_year_to_predict, most_recent_year+1)

    # only allow the model to see data from before the year to predict
    historical_data = data_held_out[data_held_out['year'] < first_year_to_predict]

    # display('historical_data:')
    # display(historical_data)

    # get our model's predictions
    predictions = predictor.predict(historical_data, years_to_predict)

    all_known_names = get_all_known_names(historical_data)

    for year_to_predict in years_to_predict:

        print(f'Predictions for {year_to_predict}:')

        names_to_predict = all_known_names.copy()
        names_to_predict['year'] = year_to_predict
        # display(names_to_predict)

        observed = names_to_predict.merge(data_held_out, how='left', on=['state', 'name', 'M/F', 'year'])

        # for now, fill in missing values with 2, same as FiveThirtyEight did;
        # reasoning: missing values could be 0 to 4, so average is 2
        observed['y'] = observed['count'].fillna(2)
        # observed = observed.rename(columns={'count': 'count_true'})
        # display(observed)

        score_df = observed.merge(predictions, how='left', on=['state', 'name', 'M/F', 'year'], suffixes=('_true', '_pred'))
        # display(score_df)

        y_true = score_df['y_true']
        y_pred = score_df['y_pred']

        if metric == 'msle':
            loss = mean_squared_log_error(y_true, y_pred)
            print(f'Loss: {loss}')

        if metric == 'rank':
            y_true = y_true.rank()
            y_pred = y_pred.rank()
            score = np.sum(np.abs(y_true-y_pred))/(len(y_true)*(len(y_true)-1))
            print(f'Score: {score}')
        
        top_F_true = score_df[score_df['M/F'] == 'F'].sort_values(by='y_true', ascending=False)
        display(top.head(5))

In [20]:
class DummyPredictor():

    def __init__(self, strategy='naive'):
        self.strategy = strategy
    
    def predict(self, historical_data, years_to_predict):

        all_known_names = get_all_known_names(historical_data)

        predictions = []
        previous_year_data = historical_data[historical_data['year'] == years_to_predict[0] - 1].drop(columns=['year'])

        for year_to_predict in years_to_predict:

            prediction = all_known_names.copy()
            prediction['year'] = year_to_predict

            if self.strategy == 'naive':
                prediction = prediction.merge(previous_year_data, how='left', on=['state', 'name', 'M/F'])
                prediction['y'] = prediction['count'].fillna(2)
                # display(prediction)
            elif self.strategy == 'mean':
                prediction['y'] = previous_year_data['count'].mean()
                # display(prediction)

            predictions.append(prediction)

        predictions = pd.concat(predictions, ignore_index=True)
        # display(predictions)

        return predictions

In [21]:
first_year_to_predict = 2003
cutoff = 100
# data_to_fit = select_top_names(trainval, first_year_to_predict=first_year_to_predict, cutoff=0)
data_to_eval = select_top_names(test, first_year_to_predict=first_year_to_predict, cutoff=cutoff)
evaluate(predictor=DummyPredictor(strategy='naive'), data_held_out=data_to_eval, first_year_to_predict=first_year_to_predict, metric='rank')

Predictions for 2003:
Score: 0.03204052376706701


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
577,US,Michael,M,2003,27126.0,27126.0,28256.0,28256.0
276,US,Ethan,M,2003,21361.0,21361.0,22112.0,22112.0
527,US,Madison,F,2003,20201.0,20201.0,21773.0,21773.0
58,US,Anthony,M,2003,20149.0,20149.0,19590.0,19590.0
212,US,David,M,2003,18631.0,18631.0,18697.0,18697.0


Predictions for 2004:
Score: 0.040194518743844804


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
577,US,Michael,M,2004,25465.0,25465.0,28256.0,28256.0
276,US,Ethan,M,2004,22208.0,22208.0,22112.0,22112.0
527,US,Madison,F,2004,20628.0,20628.0,21773.0,21773.0
58,US,Anthony,M,2004,19612.0,19612.0,19590.0,19590.0
212,US,David,M,2004,18400.0,18400.0,18697.0,18697.0


Predictions for 2005:
Score: 0.05132340343700285


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
577,US,Michael,M,2005,23826.0,23826.0,28256.0,28256.0
276,US,Ethan,M,2005,21317.0,21317.0,22112.0,22112.0
527,US,Madison,F,2005,19577.0,19577.0,21773.0,21773.0
58,US,Anthony,M,2005,19383.0,19383.0,19590.0,19590.0
212,US,David,M,2005,18133.0,18133.0,18697.0,18697.0


Predictions for 2006:
Score: 0.058113785889524074


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
577,US,Michael,M,2006,22647.0,22647.0,28256.0,28256.0
276,US,Ethan,M,2006,20521.0,20521.0,22112.0,22112.0
58,US,Anthony,M,2006,19411.0,19411.0,19590.0,19590.0
527,US,Madison,F,2006,18632.0,18632.0,21773.0,21773.0
212,US,David,M,2006,17653.0,17653.0,18697.0,18697.0


Predictions for 2007:
Score: 0.06422673263287938


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
577,US,Michael,M,2007,22014.0,22014.0,28256.0,28256.0
276,US,Ethan,M,2007,21034.0,21034.0,22112.0,22112.0
58,US,Anthony,M,2007,19633.0,19633.0,19590.0,19590.0
527,US,Madison,F,2007,17965.0,17965.0,21773.0,21773.0
212,US,David,M,2007,17556.0,17556.0,18697.0,18697.0


Predictions for 2008:
Score: 0.07240549407644388


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
577,US,Michael,M,2008,20642.0,20642.0,28256.0,28256.0
276,US,Ethan,M,2008,20220.0,20220.0,22112.0,22112.0
58,US,Anthony,M,2008,18409.0,18409.0,19590.0,19590.0
613,US,Olivia,F,2008,17086.0,17086.0,14630.0,14630.0
527,US,Madison,F,2008,17043.0,17043.0,21773.0,21773.0


Predictions for 2009:
Score: 0.080627961049632


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
276,US,Ethan,M,2009,19849.0,19849.0,22112.0,22112.0
577,US,Michael,M,2009,18956.0,18956.0,28256.0,28256.0
613,US,Olivia,F,2009,17441.0,17441.0,14630.0,14630.0
58,US,Anthony,M,2009,16321.0,16321.0,19590.0,19590.0
212,US,David,M,2009,15450.0,15450.0,18697.0,18697.0


Predictions for 2010:
Score: 0.08712843015564996


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
276,US,Ethan,M,2010,18011.0,18011.0,22112.0,22112.0
577,US,Michael,M,2010,17374.0,17374.0,28256.0,28256.0
613,US,Olivia,F,2010,17033.0,17033.0,14630.0,14630.0
58,US,Anthony,M,2010,15503.0,15503.0,19590.0,19590.0
212,US,David,M,2010,14215.0,14215.0,18697.0,18697.0


Predictions for 2011:
Score: 0.09472299435324558


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
613,US,Olivia,F,2011,17331.0,17331.0,14630.0,14630.0
577,US,Michael,M,2011,16815.0,16815.0,28256.0,28256.0
276,US,Ethan,M,2011,16693.0,16693.0,22112.0,22112.0
58,US,Anthony,M,2011,14283.0,14283.0,19590.0,19590.0
212,US,David,M,2011,13244.0,13244.0,18697.0,18697.0


Predictions for 2012:
Score: 0.10091606790091082


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
276,US,Ethan,M,2012,17687.0,17687.0,22112.0,22112.0
613,US,Olivia,F,2012,17328.0,17328.0,14630.0,14630.0
577,US,Michael,M,2012,16202.0,16202.0,28256.0,28256.0
58,US,Anthony,M,2012,13215.0,13215.0,19590.0,19590.0
212,US,David,M,2012,12553.0,12553.0,18697.0,18697.0


Predictions for 2013:
Score: 0.10854122596924297


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
613,US,Olivia,F,2013,18446.0,18446.0,14630.0,14630.0
276,US,Ethan,M,2013,16254.0,16254.0,22112.0,22112.0
577,US,Michael,M,2013,15530.0,15530.0,28256.0,28256.0
212,US,David,M,2013,12381.0,12381.0,18697.0,18697.0
514,US,Logan,M,2013,12360.0,12360.0,12974.0,12974.0


Predictions for 2014:
Score: 0.11577303427096262


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
613,US,Olivia,F,2014,19829.0,19829.0,14630.0,14630.0
276,US,Ethan,M,2014,15737.0,15737.0,22112.0,22112.0
577,US,Michael,M,2014,15487.0,15487.0,28256.0,28256.0
514,US,Logan,M,2014,13661.0,13661.0,12974.0,12974.0
212,US,David,M,2014,12243.0,12243.0,18697.0,18697.0


Predictions for 2015:
Score: 0.1199877041776659


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
613,US,Olivia,F,2015,19717.0,19717.0,14630.0,14630.0
276,US,Ethan,M,2015,15095.0,15095.0,22112.0,22112.0
577,US,Michael,M,2015,14474.0,14474.0,28256.0,28256.0
514,US,Logan,M,2015,12934.0,12934.0,12974.0,12974.0
212,US,David,M,2015,11821.0,11821.0,18697.0,18697.0


Predictions for 2016:
Score: 0.1251303881633771


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
613,US,Olivia,F,2016,19397.0,19397.0,14630.0,14630.0
577,US,Michael,M,2016,14120.0,14120.0,28256.0,28256.0
276,US,Ethan,M,2016,13843.0,13843.0,22112.0,22112.0
514,US,Logan,M,2016,11268.0,11268.0,12974.0,12974.0
212,US,David,M,2016,11142.0,11142.0,18697.0,18697.0


Predictions for 2017:
Score: 0.13105540112935088


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
613,US,Olivia,F,2017,18763.0,18763.0,14630.0,14630.0
514,US,Logan,M,2017,14042.0,14042.0,12974.0,12974.0
577,US,Michael,M,2017,12700.0,12700.0,28256.0,28256.0
276,US,Ethan,M,2017,12471.0,12471.0,22112.0,22112.0
279,US,Evelyn,F,2017,10741.0,10741.0,3372.0,3372.0


Predictions for 2018:
Score: 0.13403029084573115


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
613,US,Olivia,F,2018,18039.0,18039.0,14630.0,14630.0
514,US,Logan,M,2018,12427.0,12427.0,12974.0,12974.0
276,US,Ethan,M,2018,11918.0,11918.0,22112.0,22112.0
577,US,Michael,M,2018,11729.0,11729.0,28256.0,28256.0
279,US,Evelyn,F,2018,10427.0,10427.0,3372.0,3372.0


Predictions for 2019:
Score: 0.13911033023898184


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
613,US,Olivia,F,2019,18549.0,18549.0,14630.0,14630.0
276,US,Ethan,M,2019,11291.0,11291.0,22112.0,22112.0
577,US,Michael,M,2019,10589.0,10589.0,28256.0,28256.0
514,US,Logan,M,2019,10551.0,10551.0,12974.0,12974.0
279,US,Evelyn,F,2019,10433.0,10433.0,3372.0,3372.0


Predictions for 2020:
Score: 0.1412052819589401


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
613,US,Olivia,F,2020,17664.0,17664.0,14630.0,14630.0
577,US,Michael,M,2020,9805.0,9805.0,28256.0,28256.0
276,US,Ethan,M,2020,9533.0,9533.0,22112.0,22112.0
279,US,Evelyn,F,2020,9503.0,9503.0,3372.0,3372.0
514,US,Logan,M,2020,9140.0,9140.0,12974.0,12974.0


Predictions for 2021:
Score: 0.1478150148890171


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
613,US,Olivia,F,2021,17798.0,17798.0,14630.0,14630.0
279,US,Evelyn,F,2021,9475.0,9475.0,3372.0,3372.0
560,US,Mateo,M,2021,9159.0,9159.0,766.0,766.0
577,US,Michael,M,2021,9103.0,9103.0,28256.0,28256.0
276,US,Ethan,M,2021,8842.0,8842.0,22112.0,22112.0


Predictions for 2022:
Score: 0.15419310851208895


Unnamed: 0,state,name,M/F,year,count_true,y_true,count_pred,y_pred
613,US,Olivia,F,2022,16573.0,16573.0,14630.0,14630.0
560,US,Mateo,M,2022,10321.0,10321.0,766.0,766.0
279,US,Evelyn,F,2022,9289.0,9289.0,3372.0,3372.0
577,US,Michael,M,2022,8829.0,8829.0,28256.0,28256.0
620,US,Owen,M,2022,8546.0,8546.0,4190.0,4190.0


In [17]:
from sklearn import preprocessing
from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import HistGradientBoostingRegressor

class MyPredictor():

    def __init__(self):
        
        # params
        cols_to_keep = ['this_year', 'M/F', 'sum', 'median_age', 'thisyear_count']
        # cols_to_keep = ['this_year', 'M/F', 'sum_scaled', 'median_age_scaled', 'thisyear_count_scaled']
        categorical_features = ['M/F']
        max_leaf_nodes = 16 # 16
        max_iter = 100 # 100
        loss = 'absolute_error' # abs better than default

        categorical_features = [True if f in categorical_features else False for f in cols_to_keep]
        # print(categorical_features)
        
        self.pipe = make_pipeline(
            ColumnTransformer(
                transformers=[
                    # ('category_encoder', LabelEncoder(), categorical_features),
                    ('cols_to_keep', 'passthrough', cols_to_keep),
                ], remainder='drop'),
            HistGradientBoostingRegressor(
                random_state=0,
                categorical_features=categorical_features,
                max_leaf_nodes=max_leaf_nodes,
                max_iter=max_iter,
                loss=loss
            )
        )

        self.gender_encoding = {'M': 0, 'F': 1}

        self.scaler = preprocessing.StandardScaler()

    def preprocess(self, df, this_year, latest_known_year):

        # find median age of people with name, 
        # total born with that name,
        # and latest year's count

        df = df.copy()
        df = df.sort_values(by='year')
        df['cumsum'] = df.groupby(['state', 'name', 'M/F'])['count'].cumsum()
        df['sum'] = df.groupby(['state', 'name', 'M/F'])['count'].transform('sum')
        # display(df[(df['name'] == 'Millie') & (df['M/F'] == 'M')])

        medians = df[df['cumsum'] >= df['sum']/2]
        medians = medians.drop_duplicates(subset=['state', 'name', 'M/F'], keep='first')
        medians['median_age'] = latest_known_year - medians['year']
        # display(medians[medians['name'] == 'Madison'])

        thisyear = df[df['year'] == latest_known_year][['state', 'name', 'M/F', 'count']].rename(columns={'count': 'thisyear_count'})
        df2 = medians.merge(thisyear, how='left', on=['state', 'name', 'M/F']).rename(columns={'year': 'median_year'})
        df2['thisyear_count'] = df2['thisyear_count'].fillna(0) # might want to shift this to 2 and fill in 2s for missing years? or maybe not
        # display(df2)
        # display(df2.groupby(['state','name','M/F']).ngroups)

        # change M/F to 0/1 so it works with various models
        # (even HistGradientBoostingRegressor, which accepts categorical values,
        # still needs those values to be numbers not strings)
        df2['M/F'] = df2['M/F'].map(self.gender_encoding)

        df2['this_year'] = this_year # adding this does seem to improve accuracy. we need to know how far along we are in time

        '''
        # SCALING
        scaler = preprocessing.MinMaxScaler((0, 1000000))
        columns_scaled = ['sum', 'median_age', 'thisyear_count']
        scaled = scaler.fit_transform(df2[columns_scaled])
        scaled = pd.DataFrame(scaled, columns=[f'{column}_scaled' for column in columns_scaled])
        df3 = pd.concat([df2, scaled], axis=1)
        '''

        return df2
    
    def fit(self, historical_data, first_year_to_predict, years_to_fit=1, weight_decay=0.9):
        # first things first, we don't want to know about future data
        historical_data = historical_data[historical_data['year'] < first_year_to_predict]
        # at this point the data we don't want to know should be inaccessible

        X_all = pd.DataFrame()
        y_all = pd.Series()

        # each year_to_fit is the year that's essentially our y for that loop
        for year_to_fit in range(first_year_to_predict - years_to_fit, first_year_to_predict):

            # now we "know" even less for X
            X = historical_data[historical_data['year'] < year_to_fit]
            y = historical_data[historical_data['year'] == year_to_fit]

            X = self.preprocess(X, this_year=year_to_fit, latest_known_year=year_to_fit - 1)
            y = y[['state', 'name', 'M/F', 'count']].rename(columns={'count': 'y'})
            y['M/F'] = y['M/F'].map(self.gender_encoding)

            data = X.merge(y, how='left', on=['state', 'name', 'M/F'])
            data['y'] = data['y'].fillna(0)
            # display(data)

            X = data.drop(columns=['y'])
            y = data['y']
            X['sample_weight'] = weight_decay ** (first_year_to_predict - year_to_fit)

            X_all = pd.concat([X_all, X], ignore_index=True)
            y_all = pd.concat([y_all, y], ignore_index=True)
        
        temp = X_all.copy()
        temp['y'] = y_all
        display(temp)

        sample_weights = X_all['sample_weight']
        X_all = X_all.drop(columns=['sample_weight'])

        self.pipe.fit(X_all, y_all, **{'histgradientboostingregressor__sample_weight': sample_weights})
        # this seems like a silly way to pass params to individual steps of the pipeline, but it's true. See: https://stackoverflow.com/questions/36205850/sklearn-pipeline-applying-sample-weights-after-applying-a-polynomial-feature-t

    def predict(self, historical_data, years_to_predict):

        # all_known_names = get_all_known_names(historical_data)

        predictions = []

        # display('historical_data in predict:')
        # display(historical_data)
        # display('years_to_predict:')
        # display(years_to_predict)

        for year_to_predict in years_to_predict:

            # display('historical_data in predict loop:')
            # display(historical_data)

            df = self.preprocess(historical_data, this_year=year_to_predict - 1, latest_known_year=year_to_predict - 1)
            # df = self.preprocess(historical_data, years_to_predict[0] - 1)

            df['y'] = self.pipe.predict(df)

            df['year'] = year_to_predict
            # display(df)

            # if we want to simply, do the following; 
            # but for now, might be useful to see all data displayed.
            # df = df[['state', 'year', 'name', 'M/F', 'y']]

            predictions.append(df)

            assumed_new_year_of_historical_data = df[['state', 'year', 'name', 'M/F', 'y']].rename(columns={'y': 'count'})
            assumed_new_year_of_historical_data['M/F'] = assumed_new_year_of_historical_data['M/F'].map({v: k for k, v in self.gender_encoding.items()})
            historical_data = pd.concat([historical_data, assumed_new_year_of_historical_data], ignore_index=True)

        predictions = pd.concat(predictions, ignore_index=True)

        # we have to reverse the mapping to send our predictions
        # (at least the way we currently have it set up)
        predictions['M/F'] = predictions['M/F'].map({v: k for k, v in self.gender_encoding.items()})

        predictions.loc[predictions['y'] < 4.5, 'y'] = 2

        display(predictions)
        # display(predictions[predictions['y'] < 4.5])

        return predictions

In [18]:
first_year_to_predict = 2003
cutoff = 100
my_predictor = MyPredictor()
data_to_fit = select_top_names(trainval, first_year_to_predict=first_year_to_predict, cutoff=cutoff)
data_to_eval = select_top_names(test, first_year_to_predict=first_year_to_predict, cutoff=cutoff)
my_predictor.fit(historical_data=data_to_fit, first_year_to_predict=first_year_to_predict, years_to_fit=30)
evaluate(predictor=my_predictor, data_held_out=data_to_eval, first_year_to_predict=first_year_to_predict, metric='rank')

Unnamed: 0,state,name,M/F,median_year,count,cumsum,sum,median_age,thisyear_count,this_year,sample_weight,y
0,US,Cloe,1,1940,8,26,42,32,0.0,1973,0.042391,0.0
1,US,Harlie,1,1940,5,5,5,32,0.0,1973,0.042391,0.0
2,US,Gracey,1,1941,7,7,12,31,0.0,1973,0.042391,0.0
3,US,Rowan,1,1942,7,7,7,30,0.0,1973,0.042391,0.0
4,US,Melvyn,0,1942,282,1613,2811,30,7.0,1973,0.042391,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...
93592,US,Adamari,1,2001,130,240,240,0,130.0,2002,0.900000,168.0
93593,US,Maximus,0,2001,892,1114,1114,0,892.0,2002,0.900000,912.0
93594,US,Jersey,1,2001,61,88,88,0,61.0,2002,0.900000,110.0
93595,US,Dasani,1,2001,94,161,161,0,94.0,2002,0.900000,105.0


Unnamed: 0,state,name,M/F,median_year,count,cumsum,sum,median_age,thisyear_count,this_year,y,year
0,US,Gertrude,F,1945,664.000000,8864.000000,17407.000000,57,11.000000,2002,10.251178,2003
1,US,Flossie,F,1946,108.000000,1532.000000,2988.000000,56,0.000000,2002,2.000000,2003
2,US,Eula,F,1946,380.000000,5131.000000,9796.000000,56,0.000000,2002,2.000000,2003
3,US,Olive,F,1946,123.000000,1847.000000,3510.000000,56,43.000000,2002,39.656542,2003
4,US,Marlys,F,1946,243.000000,2902.000000,5565.000000,56,0.000000,2002,2.000000,2003
...,...,...,...,...,...,...,...,...,...,...,...,...
16575,US,Charlize,F,2012,357.763853,3541.410384,6487.752083,9,316.131570,2021,316.131570,2022
16576,US,Anaiya,F,2012,206.899037,2108.331872,3846.785794,9,180.028358,2021,180.028358,2022
16577,US,Marimar,F,2012,108.303749,1125.394863,2041.263159,9,101.427713,2021,101.427713,2022
16578,US,Kadejah,F,2012,97.748212,986.569086,1894.271570,9,101.427713,2021,101.427713,2022


Predictions for 2003:
Score: 0.03228673158394667
Predictions for 2004:
Score: 0.0396044940939261
Predictions for 2005:
Score: 0.049479030086886595
Predictions for 2006:
Score: 0.056214052201884586
Predictions for 2007:
Score: 0.06275531313555124
Predictions for 2008:
Score: 0.0711992214588323
Predictions for 2009:
Score: 0.07874862327581686
Predictions for 2010:
Score: 0.08545305152007832
Predictions for 2011:
Score: 0.09271982424549688
Predictions for 2012:
Score: 0.09962384107503948
Predictions for 2013:
Score: 0.10860387056170347
Predictions for 2014:
Score: 0.11510579651870888
Predictions for 2015:
Score: 0.1200722015349382
Predictions for 2016:
Score: 0.12627984359247799
Predictions for 2017:
Score: 0.13131763430709253
Predictions for 2018:
Score: 0.13457660996602624
Predictions for 2019:
Score: 0.13880147782964167
Predictions for 2020:
Score: 0.14133931224978585
Predictions for 2021:
Score: 0.14496250065558294
Predictions for 2022:
Score: 0.15193936003449823
