In [None]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

In [None]:
df = pd.read_csv('data/2023_season_data.csv')
df.drop(['Fmb', 'GS'], axis=1, inplace=True)

In [None]:
df['Player'] = df ['Player'].apply(lambda x: x.split('*')[0]).apply(lambda x: x.split('\\')[0])

In [None]:
df

In [None]:
df.rename({
    'TD': 'PassTD',
    'TD.1': 'RushTD',
    'TD.2': 'RecTD',
    'TD.3': 'TotTD',
    'Yds': 'PassYDs',
    'Yds.1': 'RushYDs',
    'Yds.2': 'RecYDs',
    'Att': 'PassAtt',
    'Att.1': 'RushAtt'
}, axis=1, inplace=True)

In [None]:
df

In [None]:
#create separate dataframes based off position
rb_df = df[df['FantPos'] == 'RB']
qb_df = df[df['FantPos'] == 'QB']
wr_df = df[df['FantPos'] == 'WR']
te_df = df[df['FantPos'] == 'TE']

In [None]:
rb_df

In [None]:
qb_df

In [None]:
wr_df

In [None]:
te_df

In [None]:
rushing_columns = ['RushAtt', 'RushYDs', 'Y/A', 'RushTD']
receiving_columns = ['Tgt', 'Rec', 'RecYDs', 'Y/R', 'RecTD']
passing_columns = ['PassAtt', 'PassYDs', 'PassTD', 'Int']

def transform_columns(df, new_column_list):
    df = df[['Player', 'Tm', 'Age', 'G'] + new_column_list + ['FL']]
    return df

In [None]:
rb_df = transform_columns(rb_df, rushing_columns+receiving_columns)
wr_df = transform_columns(rb_df, rushing_columns+receiving_columns)
te_df = transform_columns(te_df, receiving_columns)
qb_df = transform_columns(qb_df, passing_columns)

rb_df.head()

In [None]:
# targets + rushing TDs correlation to fantasy points per game for RBs in 2023

# create new columns to calculate points scored
rb_df['FanPoints'] = rb_df['RushYDs']*0.01 + rb_df['RushTD']*6 + rb_df['Rec'] + rb_df['RecYDs'] *0.01 + rb_df['RecYDs'] *0.01 + rb_df['RecTD']*6 - rb_df['FL']*2

#create new column for fantasy points per game
rb_df['FPPG'] = rb_df['FanPoints']/rb_df['G']
rb_df['FPPG'] = rb_df['FPPG'].apply(lambda x: round(x,2))

#create new column for usage per game. Defined by # targets + carries
rb_df['Usage/GM'] = (rb_df['RushAtt'] + rb_df['Tgt'])/rb_df['G']
#round each row value to two decimal places
rb_df['Usage/GM'] = rb_df['Usage/GM'].apply(lambda x:round(x,2))

#styling
sns.set_style('whitegrid')

#matplotlib canvas
fig, ax = plt.subplots()
fig.set_size_inches(15,10)

#regression scatterplot with trendline
plot = sns.regplot(
    x=rb_df['Usage/GM'],
    y=rb_df['FPPG'],
    scatter=True)

In [None]:
#How does efficiency correlate to fantasy football performance?
rb_df['TD/Usage'] = (rb_df['RushTD']+ rb_df['RecTD'])/(rb_df['RushAtt'] + rb_df['Tgt'])
fig, ax = plt.subplots()
fig.set_size_inches(15, 10)

#Make sure there is an adequete sample size
rb_df = rb_df[rb_df['RushAtt'] > 20]
plot = sns.regplot(
x=rb_df['TD/Usage'],
y=rb_df['FPPG'],
scatter=True)