In [1]:
import pandas as pd

### Removing Unecessary Column

In [2]:
df = pd.read_csv('csv_files/p/p.csv')

### Restructing into New CSV File

In [3]:
min_age = df['Age'].min()
min_age

22

In [4]:
max_age = df['Age'].max()
max_age

40

In [5]:
players = df['Player'].unique()
ages = list(range(22, 41))
new_df = pd.DataFrame(columns=['Player'] + list(ages))
new_df['Player'] = players
new_df.set_index('Player', inplace=True)

for player in players:
    player_data = df[df['Player'] == player]
    for _, row in player_data.iterrows():
        age = row['Age']
        av = row['AV']
        new_df.at[player, age] = av

new_df = new_df.fillna('N/A')

print(new_df)

                      22   23   24   25   26   27   28   29   30   31   32  \
Player                                                                       
Dustin Colquitt      N/A    1    3    3    2    3    2    3    3    2    2   
Matt Dodge           N/A    2  N/A  N/A  N/A  N/A  N/A  N/A  N/A  N/A  N/A   
Lac Edwards          N/A  N/A    1    3    3    3  N/A    1  N/A  N/A  N/A   
Brandon Fields       N/A    2    2    3    3    4    4    4    2  N/A  N/A   
Reggie Hodges        N/A  N/A  N/A  N/A    1  N/A    3  N/A    1  N/A  N/A   
Sterling Hofrichter  N/A  N/A    1  N/A  N/A  N/A  N/A  N/A  N/A  N/A  N/A   
Kevin Huber          N/A  N/A    2    1    2    3    2    3    2    3    2   
Eddie Johnson          1  N/A  N/A  N/A  N/A  N/A  N/A  N/A  N/A  N/A  N/A   
Donnie Jones         N/A  N/A  N/A    3    2    4    5    3    3    2    3   
Drew Kaser           N/A    2    3  N/A  N/A  N/A  N/A  N/A  N/A  N/A  N/A   
Sam Koch             N/A  N/A    2    2    3    2    2    3    3

In [6]:
new_df.to_csv('csv_files/p/new_p.csv')
df = pd.read_csv('csv_files/p/new_p.csv')

### Only Including Players w/ 3 Consecutive Years of AVs

In [7]:
df.set_index('Player', inplace=True)
filtered_df = df.dropna(thresh=3)
filtered_df.reset_index(inplace=True)
filtered_df.to_csv('csv_files/p/filtered_p.csv', index=False)

df = pd.read_csv('csv_files/p/new_p.csv')

df.set_index('Player', inplace=True)

def has_consecutive_av(player_av):
    consecutive_count = 0
    for av in player_av:
        if not pd.isna(av):
            consecutive_count += 1
            if consecutive_count == 3:
                return True
        else:
            consecutive_count = 0
    return False

players_with_consecutive_av = df[df.apply(has_consecutive_av, axis=1)]
players_with_consecutive_av.reset_index(inplace=True)

players_with_consecutive_av.to_csv('csv_files/p/final_p.csv', index=False)

### Prime Age of P
- Finding the three year stretch where each player has the highest average AV.
- Finding the average age (the middle age) of that player during that three year stretch.
- That will be their "prime" playing age.

In [8]:
df = pd.read_csv('csv_files/p/final_p.csv')
df.set_index('Player', inplace=True)

def calculate_average_av(player_av):
    max_avg_av = 0
    prime_age = None
    for i in range(len(player_av) - 2):
        three_year_avg = sum(player_av[i:i+3]) / 3
        if three_year_avg > max_avg_av:
            max_avg_av = three_year_avg
            prime_age = i + 22  # Adding 22 to get the actual age

    return prime_age

prime_ages = []
for player in df.index:
    player_av = df.loc[player, '22':'40'].tolist()
    prime_age = calculate_average_av(player_av)
    prime_ages.append({'Player': player, 'Prime Age': prime_age})
    
prime_age_df = pd.DataFrame(prime_ages)
prime_age_df.to_csv('csv_files/c/prime_ages_p.csv', index=False)

prime_ages_df = pd.read_csv('csv_files/c/prime_ages_p.csv')
prime_ages_df = prime_ages_df.iloc[:-1]
prime_ages_df.to_csv('csv_files/c/prime_ages_p.csv', index=False)

average_prime_age = prime_ages_df['Prime Age'].mean()

In [9]:
average_prime_age

25.133333333333333