### Import libraries, .csv file, and create DataFrame 

In [14]:
import pandas as pd
import numpy as np

df = pd.read_csv("FantasyPros_Fantasy_Football_Statistics_RB.csv")

df.head(10)

Unnamed: 0,Rank,Player,ATT,YDS,Y/A,LG,20+,TD,REC,TGT,YDS.1,Y/R,TD.1,FL,G,FPTS,FPTS/G,ROST
0,1.0,Austin Ekeler (LAC),204.0,915,4.5,72.0,10.0,13.0,107.0,127.0,722.0,6.7,5.0,3.0,17.0,372.7,21.9,100.0%
1,2.0,Christian McCaffrey (SF),244.0,1139,4.7,49.0,14.0,8.0,85.0,108.0,741.0,8.7,5.0,0.0,17.0,356.4,21.0,100.0%
2,3.0,Josh Jacobs (LV),340.0,1653,4.9,86.0,14.0,12.0,53.0,64.0,400.0,7.5,0.0,1.0,17.0,328.3,19.3,99.8%
3,4.0,Derrick Henry (TEN),349.0,1538,4.4,56.0,20.0,13.0,33.0,41.0,398.0,12.1,0.0,3.0,16.0,302.8,18.9,99.9%
4,5.0,Saquon Barkley (NYG),295.0,1312,4.4,68.0,16.0,10.0,57.0,76.0,338.0,5.9,0.0,0.0,16.0,284.0,17.8,100.0%
5,6.0,Nick Chubb (CLE),302.0,1525,5.0,41.0,18.0,12.0,27.0,37.0,239.0,8.9,1.0,1.0,17.0,281.4,16.6,25.6%
6,7.0,Rhamondre Stevenson (NE),210.0,1040,5.0,49.0,12.0,5.0,69.0,88.0,421.0,6.1,1.0,1.0,17.0,249.1,14.7,96.1%
7,8.0,Tony Pollard (DAL),193.0,1007,5.2,57.0,18.0,9.0,39.0,55.0,371.0,9.5,3.0,0.0,16.0,248.8,15.6,99.8%
8,9.0,Aaron Jones (GB),213.0,1121,5.3,36.0,10.0,2.0,59.0,72.0,395.0,6.7,5.0,3.0,17.0,248.6,14.6,97.5%
9,10.0,Joe Mixon (CIN),212.0,826,3.9,40.0,7.0,7.0,60.0,75.0,441.0,7.4,2.0,0.0,15.0,240.7,16.0,98.4%


### Find the column numbers (indices) in the df

In [15]:
# Enumerate the columns to get their positions (numbers) and names
column_numbers = list(enumerate(df.columns))

column_numbers

[(0, 'Rank'),
 (1, 'Player'),
 (2, 'ATT'),
 (3, 'YDS'),
 (4, 'Y/A'),
 (5, 'LG'),
 (6, '20+'),
 (7, 'TD'),
 (8, 'REC'),
 (9, 'TGT'),
 (10, 'YDS.1'),
 (11, 'Y/R'),
 (12, 'TD.1'),
 (13, 'FL'),
 (14, 'G'),
 (15, 'FPTS'),
 (16, 'FPTS/G'),
 (17, 'ROST')]

### Check the data types of the columns

In [16]:
df.dtypes

Rank      float64
Player     object
ATT       float64
YDS        object
Y/A       float64
LG        float64
20+       float64
TD        float64
REC       float64
TGT       float64
YDS.1     float64
Y/R       float64
TD.1      float64
FL        float64
G         float64
FPTS      float64
FPTS/G    float64
ROST       object
dtype: object

### Convert columns with an 'object' data type to a 'float64' data type

In [17]:
# Columns with object datatype that need conversion
convert = df.select_dtypes(include=['object']).columns.tolist()

# Exclude 'Player' and 'ROST' columns as they are likely non-numeric categorical columns
convert.remove('Player')
convert.remove('ROST')

# Convert each column to float64
for col in convert:
   df[col] = df[col].str.replace(',', '').str.replace('-', '0').astype(float)

# Verify the data types
df.dtypes

Rank      float64
Player     object
ATT       float64
YDS       float64
Y/A       float64
LG        float64
20+       float64
TD        float64
REC       float64
TGT       float64
YDS.1     float64
Y/R       float64
TD.1      float64
FL        float64
G         float64
FPTS      float64
FPTS/G    float64
ROST       object
dtype: object

### Convert the relevant stats to a per game basis

In [18]:
# Define relevant columns
relevant_columns = df.columns[2:16].tolist()

# List of columns to exclude from the per-game calculation
exclude_from_per_game = ['Y/A', 'LG', 'Y/R', 'FL', 'G', 'FPTS', 'FPTS/G']

# Convert stats to a per-game basis for only the columns not in the exclude list
for col in relevant_columns:
    if col not in exclude_from_per_game:
        df[col + '_per_game'] = (df[col] / df['G']).round(1)

# Update the relevant columns list for correlation
# It will contain original columns that were excluded from per game calculation
# and the new per game columns for the rest
relevant_columns_for_correlation = exclude_from_per_game + \
                                  [col + '_per_game' for col in relevant_columns 
                                        if col not in exclude_from_per_game]

# Display the relevant columns for correlation and the 'Rank' and 'Player' columns
df_average = df[['Rank', 'Player'] + relevant_columns_for_correlation].head(10)
df_average

Unnamed: 0,Rank,Player,Y/A,LG,Y/R,FL,G,FPTS,FPTS/G,ATT_per_game,YDS_per_game,20+_per_game,TD_per_game,REC_per_game,TGT_per_game,YDS.1_per_game,TD.1_per_game
0,1.0,Austin Ekeler (LAC),4.5,72.0,6.7,3.0,17.0,372.7,21.9,12.0,53.8,0.6,0.8,6.3,7.5,42.5,0.3
1,2.0,Christian McCaffrey (SF),4.7,49.0,8.7,0.0,17.0,356.4,21.0,14.4,67.0,0.8,0.5,5.0,6.4,43.6,0.3
2,3.0,Josh Jacobs (LV),4.9,86.0,7.5,1.0,17.0,328.3,19.3,20.0,97.2,0.8,0.7,3.1,3.8,23.5,0.0
3,4.0,Derrick Henry (TEN),4.4,56.0,12.1,3.0,16.0,302.8,18.9,21.8,96.1,1.2,0.8,2.1,2.6,24.9,0.0
4,5.0,Saquon Barkley (NYG),4.4,68.0,5.9,0.0,16.0,284.0,17.8,18.4,82.0,1.0,0.6,3.6,4.8,21.1,0.0
5,6.0,Nick Chubb (CLE),5.0,41.0,8.9,1.0,17.0,281.4,16.6,17.8,89.7,1.1,0.7,1.6,2.2,14.1,0.1
6,7.0,Rhamondre Stevenson (NE),5.0,49.0,6.1,1.0,17.0,249.1,14.7,12.4,61.2,0.7,0.3,4.1,5.2,24.8,0.1
7,8.0,Tony Pollard (DAL),5.2,57.0,9.5,0.0,16.0,248.8,15.6,12.1,62.9,1.1,0.6,2.4,3.4,23.2,0.2
8,9.0,Aaron Jones (GB),5.3,36.0,6.7,3.0,17.0,248.6,14.6,12.5,65.9,0.6,0.1,3.5,4.2,23.2,0.3
9,10.0,Joe Mixon (CIN),3.9,40.0,7.4,0.0,15.0,240.7,16.0,14.1,55.1,0.5,0.5,4.0,5.0,29.4,0.1


### Calculate the correlations for relevant stats for different conditions

In [19]:
# Exclude the columns from correlation calculation
columns_to_exclude_from_correlation = ['FPTS/G', 'FPTS', 'FL', 'G']
relevant_columns_for_correlation = [col for col in relevant_columns_for_correlation 
                                        if col not in columns_to_exclude_from_correlation]

# Define a function to calculate correlations for given conditions
def compute_correlations(dataframe):
    return dataframe[relevant_columns_for_correlation].corrwith(dataframe['FPTS/G'])

# Compute correlations for various conditions
correlations_all = compute_correlations(df)
correlations_fpts_nonzero = compute_correlations(df[df['FPTS/G'] > 0])
correlations_top50 = compute_correlations(df[df['Rank'] <= 50])
correlations_top25 = compute_correlations(df[df['Rank'] <= 25])

# Compile all correlations into a DataFrame for comparison
all_correlations = pd.DataFrame({
    'All Players': correlations_all,
    'FPTS > 0': correlations_fpts_nonzero,
    'Top 50 Players': correlations_top50,
    'Top 25 Players': correlations_top25
})

# Calculate the average correlation across the four conditions
all_correlations['Average'] = all_correlations.mean(axis=1)

all_correlations

Unnamed: 0,All Players,FPTS > 0,Top 50 Players,Top 25 Players,Average
Y/A,0.484468,0.340921,0.14535,0.124178,0.273729
LG,0.794578,0.753523,0.43835,0.380354,0.591701
Y/R,0.312101,0.191187,0.207406,0.208589,0.229821
ATT_per_game,0.897355,0.87825,0.657552,0.465307,0.724616
YDS_per_game,0.900657,0.88517,0.685451,0.496821,0.742025
20+_per_game,0.745121,0.730448,0.469858,0.361797,0.576806
TD_per_game,0.827187,0.813381,0.648756,0.477188,0.691628
REC_per_game,0.822071,0.793451,0.669008,0.594714,0.719811
TGT_per_game,0.823385,0.797113,0.673987,0.578925,0.718352
YDS.1_per_game,0.814064,0.789105,0.689727,0.597689,0.722646


### Assign the weights for the relevant stats

In [20]:
# Calculate R^2 for the 'Average' correlation
all_correlations['R^2'] = all_correlations['Average'] ** 2

# Assign weights based on the given criteria
all_correlations['Weight'] = all_correlations.apply(lambda row: 1 + row['R^2'] if row['Average'] > 0.65 else 1, axis=1)

# Display the R^2 and weights for each column
weights = all_correlations[['Average', 'R^2', 'Weight']]
weights

Unnamed: 0,Average,R^2,Weight
Y/A,0.273729,0.074928,1.0
LG,0.591701,0.35011,1.0
Y/R,0.229821,0.052818,1.0
ATT_per_game,0.724616,0.525068,1.525068
YDS_per_game,0.742025,0.550601,1.550601
20+_per_game,0.576806,0.332705,1.0
TD_per_game,0.691628,0.478349,1.478349
REC_per_game,0.719811,0.518128,1.518128
TGT_per_game,0.718352,0.51603,1.51603
YDS.1_per_game,0.722646,0.522218,1.522218


### Add the assigned weights to the relevant per-game stats

In [21]:
# Multiply each relevant column by its corresponding weight
for col in relevant_columns_for_correlation:
    weight = weights.loc[col, 'Weight']
    df[col + '_weighted'] = (df[col] * weight).round(1)

# Extract the weighted columns to view the results
weighted_columns = [col + '_weighted' for col in relevant_columns_for_correlation]
df_weighted = df[['Rank', 'Player', 'FPTS/G'] + weighted_columns]

df_weighted.head(10)

Unnamed: 0,Rank,Player,FPTS/G,Y/A_weighted,LG_weighted,Y/R_weighted,ATT_per_game_weighted,YDS_per_game_weighted,20+_per_game_weighted,TD_per_game_weighted,REC_per_game_weighted,TGT_per_game_weighted,YDS.1_per_game_weighted,TD.1_per_game_weighted
0,1.0,Austin Ekeler (LAC),21.9,4.5,72.0,6.7,18.3,83.4,0.6,1.2,9.6,11.4,64.7,0.3
1,2.0,Christian McCaffrey (SF),21.0,4.7,49.0,8.7,22.0,103.9,0.8,0.7,7.6,9.7,66.4,0.3
2,3.0,Josh Jacobs (LV),19.3,4.9,86.0,7.5,30.5,150.7,0.8,1.0,4.7,5.8,35.8,0.0
3,4.0,Derrick Henry (TEN),18.9,4.4,56.0,12.1,33.2,149.0,1.2,1.2,3.2,3.9,37.9,0.0
4,5.0,Saquon Barkley (NYG),17.8,4.4,68.0,5.9,28.1,127.1,1.0,0.9,5.5,7.3,32.1,0.0
5,6.0,Nick Chubb (CLE),16.6,5.0,41.0,8.9,27.1,139.1,1.1,1.0,2.4,3.3,21.5,0.1
6,7.0,Rhamondre Stevenson (NE),14.7,5.0,49.0,6.1,18.9,94.9,0.7,0.4,6.2,7.9,37.8,0.1
7,8.0,Tony Pollard (DAL),15.6,5.2,57.0,9.5,18.5,97.5,1.1,0.9,3.6,5.2,35.3,0.2
8,9.0,Aaron Jones (GB),14.6,5.3,36.0,6.7,19.1,102.2,0.6,0.1,5.3,6.4,35.3,0.3
9,10.0,Joe Mixon (CIN),16.0,3.9,40.0,7.4,21.5,85.4,0.5,0.7,6.1,7.6,44.8,0.1


### Define the columns to be used for the average weighted score

In [22]:
# Columns for the "average" calculation
average_columns = [
    'ATT_per_game_weighted', 'YDS_per_game_weighted', 'TD_per_game_weighted', 
    'REC_per_game_weighted', 'TGT_per_game_weighted', 'YDS.1_per_game_weighted', 'FPTS/G'
]

# Columns for the "average2" calculation (correlation > 0.6)
relevant_cols_gt_0_6 = weights[weights['Average'] > 0.6].index.tolist()
average2_columns = [col + '_weighted' for col in relevant_cols_gt_0_6 if col + '_weighted' in df.columns]
average2_columns.append('FPTS/G')

# Display the columns used in the 'average2' calculation (where correlation is > 0.6)
average2_columns

['ATT_per_game_weighted',
 'YDS_per_game_weighted',
 'TD_per_game_weighted',
 'REC_per_game_weighted',
 'TGT_per_game_weighted',
 'YDS.1_per_game_weighted',
 'FPTS/G']

### Calculate each player's average weighted score

In [23]:
# Calculate "average"
df['average'] = df[average_columns].mean(axis=1).round(1)

# Calculate "average2"
df['average2'] = df[average2_columns].mean(axis=1).round(1)

# Rank the 'average' and 'average2' columns with NaN handling
df['average_rank'] = df.sort_values('average', ascending=False)\
                ['average'].rank(method='first', ascending=False, na_option='bottom').astype(float)
df['average2_rank'] = df.sort_values('average2', ascending=False)\
                ['average2'].rank(method='first', ascending=False, na_option='bottom').astype(float)

# Calculate the variance in 'average'
df['variance'] = df['Rank'] - df['average_rank']

# Calculate the variance in 'average2'
df['variance2'] = df['Rank'] - df['average2_rank']

### Display and sort the results of average weighted score in descending order

In [24]:
# Display the results for 'average'
df[['Rank', 'Player', 'average', 'average_rank', 'variance']]\
    .sort_values(by='average_rank', ascending=True).head(30)    # Define the sort on this line

Unnamed: 0,Rank,Player,average,average_rank,variance
2,3.0,Josh Jacobs (LV),35.4,1.0,2.0
3,4.0,Derrick Henry (TEN),35.3,2.0,2.0
1,2.0,Christian McCaffrey (SF),33.0,3.0,-1.0
4,5.0,Saquon Barkley (NYG),31.3,4.0,1.0
0,1.0,Austin Ekeler (LAC),30.1,5.0,-4.0
5,6.0,Nick Chubb (CLE),30.1,6.0,0.0
41,42.0,Breece Hall (NYJ),27.9,7.0,35.0
15,16.0,Alvin Kamara (NO),27.6,8.0,8.0
8,9.0,Aaron Jones (GB),26.1,9.0,0.0
9,10.0,Joe Mixon (CIN),26.0,10.0,0.0


In [25]:
# Display the results for 'average2'
df[['Rank', 'Player', 'average2', 'average2_rank', 'variance2']]\
    .sort_values(by='average2_rank', ascending=True).head(30)

Unnamed: 0,Rank,Player,average2,average2_rank,variance2
2,3.0,Josh Jacobs (LV),35.4,1.0,2.0
3,4.0,Derrick Henry (TEN),35.3,2.0,2.0
1,2.0,Christian McCaffrey (SF),33.0,3.0,-1.0
4,5.0,Saquon Barkley (NYG),31.3,4.0,1.0
0,1.0,Austin Ekeler (LAC),30.1,5.0,-4.0
5,6.0,Nick Chubb (CLE),30.1,6.0,0.0
41,42.0,Breece Hall (NYJ),27.9,7.0,35.0
15,16.0,Alvin Kamara (NO),27.6,8.0,8.0
8,9.0,Aaron Jones (GB),26.1,9.0,0.0
9,10.0,Joe Mixon (CIN),26.0,10.0,0.0
