In [385]:
import pandas as pd
import numpy as np

In [386]:
path_in = "/Users/timholdsworth/code/scaling-science/Data/100_most_impactful_papers.csv"
def get_data(path_in):
    df = pd.read_csv(path_in)
    return df

In [387]:
# Returns a series of len(df) where each value is the column the data starts in for each row
def get_start_columns(df):
    
    total_years_pub = df.loc[:, 'total_years_pub']
    # Because there are 3 extra column at the end
    start_col_pos = total_years_pub + 3
    # Because we want the last certain number of columns
    start_cols = -start_col_pos
    
    return start_cols

In [388]:
# Method to calculate decay_scores for a given paper, returning the scores as a series
def calc_decay_scores(df, start_col, index):
         
    # Get the nondecayed scores 
    start = start_col[index]
    impact_scores = df.iloc[start:-3, index].reset_index(drop=True)
    
    # Generate a series of decay coefficients
    time = np.arange(len(impact_scores))
    decay_list = [np.exp(-t / 35) for t in time]
    decay_series = pd.Series(decay_list)

    # Multiply the decay coefficeints by the nondecayed scores
    decay_score = decay_series.multiply(impact_scores)
    
    return decay_score
    

In [400]:
# Method to update the dataframe with the impact scores
def update_df_with_decay_scores(df, start_cols):
    
    # Get the index at which data starts for a given column
    total_years_pub = df.loc[: , 'total_years_pub']
    start_index = total_years_pub + 3

    # Set the index to paper title and transpose main df 
    df = df.set_index('title')
    df = df.transpose()

    count = 0
    
    # For all papers, where each column represents a paper, update the score with the decayed_score
    for column in df:
        
        # Calculate the decay scores for each row
        decay_score = calc_decay_scores(df, start_cols, df.columns.get_loc(column))
        
        # Turn the decayed_score into a df with column names matching and back to year-indexed series
        decay_frame = decay_score.to_frame()
        
        # Get the value of the column title from the dataframe itself - which is the column title
        decay_frame.columns = [list(df.columns.values)[df.columns.get_loc(column)]]
        
        # Build an index of years for the decay_frame
        time = np.arange(len(decay_score))
        year_index = start_index[count]
        years = df.index.values.tolist()
        year = years[-year_index]
        year_list = ['' + str((int(year)) + t) + '' for t in time]
        year_series = pd.Series(year_list)
        decay_frame['years'] = year_series
        decay_frame = decay_frame.set_index('years') 
        
        # Update the dataframe with the new values
        df.update(decay_frame)
    
        count = count + 1
    
    df = df.round(3)
    return df.T
    

In [406]:
def main():
    
    df = get_data(path_in)
    start_cols = get_start_columns(df)
    df1 = update_df_with_decay_scores(df, start_cols)  
    return df1

df_decay = main()
#df_decay

In [407]:
path_out = '/Users/timholdsworth/code/scaling-science/Data/100_most_impactful_papers_decayed1.csv'
def write_to_csv(df):
    df.to_csv(path_out, index_label='title')

In [408]:
write_to_csv(df_decay)