In [2]:
import pandas as pd

In [3]:
#Player Stats 
p_stats_season = pd.read_csv('National Data/2023 All Player Stats.csv')
#Remove 'Team' Player
p_stats_season = p_stats_season[p_stats_season['Player'] != ' Team']

#Group Player Stats by PlayerId
p_stats_season = p_stats_season[['PlayerId', 'StatType', 'Stat']].groupby(['PlayerId', 'StatType']).sum().unstack()
p_stats_season.columns = p_stats_season.columns.droplevel(0)
p_stats_season.reset_index(inplace = True)
p_stats_season = p_stats_season.rename_axis(None, axis=1)
p_stats_season.index = p_stats_season.index.map(int)

#Player usage data
p_usage = pd.read_csv('National Data/2023 All Usage Stats.csv')
p_usage = p_usage.rename(columns = {'Name':'Player', 'Id': 'PlayerId'}).sort_values(by = 'Player').reset_index(drop = True)

#Merge Player Stats and Player Usage
merged = p_usage.merge(p_stats_season, on = 'PlayerId', how = 'outer')

#2023 ALL CFB Player Basic Info
p_info = pd.read_csv('Player Basic Info/2023 Players Basic Info.csv')
p_info['Player'] = p_info['First Name'] + ' '+ p_info['Last Name']
p_info.rename(columns = {'Id': 'PlayerId'}, inplace=True)
p_info.drop(columns = ['First Name', 'Last Name'], inplace = True)
p_info.sort_values(by = 'Player', inplace = True)
p_info.dropna(inplace=True)

#Team Records
t_records = pd.read_csv('National Data/2023 All Team Records.csv')
t_records.drop(columns=['Year', 'Conference'], inplace = True)

#Team Basic Stats
t_b_stats = pd.read_csv('National Data/2023 All Team Stats.csv')
t_b_stats = t_b_stats[['Team', 'StatName', 'StatValue']].groupby(['Team', 'StatName']).sum().unstack()
t_b_stats.columns = t_b_stats.columns.droplevel(0)
t_b_stats.reset_index(inplace = True)
t_b_stats = t_b_stats.rename_axis(None, axis=1)
t_b_stats = t_b_stats.add_prefix('Team ')
t_b_stats.rename(columns={'Team Team': 'Team'},  inplace=True)

#Merge to fill in missing player info
merged2 = merged.merge(p_info, on = 'PlayerId', how = 'left', suffixes=[None, '_y'])
merged2['Position'] = merged2['Position'].combine_first(merged2['Position_y'])
merged2['Team'] = merged2['Team'].combine_first(merged2['Team_y'])
merged2['Player'] = merged2['Player'].combine_first(merged2['Player_y'])
merged2.drop(columns=['Team_y', 'Position_y', 'Player_y'], inplace= True)
merged2.sort_values(by = 'PlayerId')
merged2.dropna(subset = ['Player'], inplace=True)

#Merge Team Record Info
merged3 = merged2.merge(t_records, on = 'Team')

#Merge team basic stats
merged4 = merged3.merge(t_b_stats, on = 'Team')

#Load transfer data 
transfer = pd.read_csv(f'Transfer Data/2023 Transfer.csv')
transfer['Player'] = transfer['FirstName'] + ' ' + transfer['LastName']
transfer.rename(columns = {'Origin': 'Team'}, inplace = True)
transfer = transfer[['Player','Team', 'Position', 'Stars']]
transfer = transfer.assign(Transfer_Portal ='Yes')

#Merge Data with Transfer Data 
final_merged = merged4.merge(transfer, on = ['Player', 'Team', 'Position'], how = 'left')
final_merged['Transfer_Portal'] = final_merged['Transfer_Portal'].fillna('No')

In [4]:
cols = ['Season', 'PlayerId', 'Player', 'Position', 'Team', 'Conference',
       'Usage Overall', 'Usage Pass', 'Usage Rush', 'Usage FirstDown',
       'Usage SecondDown', 'Usage ThirdDown', 'Usage StandardDowns',
       'Usage PassingDowns', 'ATT', 'AVG', 'CAR', 'COMPLETIONS', 'FGA', 'FGM',
       'FUM', 'INT', 'In 20', 'LONG', 'LOST', 'NO', 'PCT', 'PD', 'PTS',
       'QB HUR', 'REC', 'SACKS', 'SOLO', 'TB', 'TD', 'TFL', 'TOT', 'XPA',
       'XPM', 'YDS', 'YPA', 'YPC', 'YPP', 'YPR', 'Division', 'ExpectedWins',
       'Total Games', 'Total Wins', 'Total Losses', 'Total Ties',
       'ConferenceGames Games', 'ConferenceGames Wins',
       'ConferenceGames Losses', 'ConferenceGames Ties', 'HomeGames Games',
       'HomeGames Wins', 'HomeGames Losses', 'HomeGames Ties',
       'AwayGames Games', 'AwayGames Wins', 'AwayGames Losses',
       'AwayGames Ties', 'Team firstDowns', 'Team fourthDownConversions',
       'Team fourthDowns', 'Team fumblesLost', 'Team fumblesRecovered',
       'Team games', 'Team interceptionTDs', 'Team interceptionYards',
       'Team interceptions', 'Team kickReturnTDs', 'Team kickReturnYards',
       'Team kickReturns', 'Team netPassingYards', 'Team passAttempts',
       'Team passCompletions', 'Team passesIntercepted', 'Team passingTDs',
       'Team penalties', 'Team penaltyYards', 'Team possessionTime',
       'Team puntReturnTDs', 'Team puntReturnYards', 'Team puntReturns',
       'Team rushingAttempts', 'Team rushingTDs', 'Team rushingYards',
       'Team sacks', 'Team tacklesForLoss', 'Team thirdDownConversions',
       'Team thirdDowns', 'Team totalYards', 'Team turnovers', 'Stars',
       'Transfer_Portal']

df = final_merged[cols]

position_dict = {}
team_dict = {}
con_dict = {}

for i,pos in enumerate(df.Position.unique()):
    position_dict[pos] = i

for i,team in enumerate(df.Team.unique()):
    team_dict[team] = i

for i,con in enumerate(df.Conference.unique()):
    con_dict[con] = i

df['PositionId'] = df['Position'].map(position_dict)
df['TeamId'] = df['Team'].map(team_dict)
df['ConferenceId'] = df['Conference'].map(con_dict)

df.to_csv('National Dataset.csv', index=False)