### Import libraries, .csv file, and create DataFrame 

In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("FantasyPros_Fantasy_Football_Statistics_RB.csv")

df.head(10)

Unnamed: 0,Rank,Player,ATT,YDS,Y/A,LG,20+,TD,REC,TGT,YDS.1,Y/R,TD.1,FL,G,FPTS,FPTS/G,ROST
0,1.0,Christian McCaffrey (SF),110.0,553.0,5.0,65.0,9.0,7.0,23.0,27.0,177.0,7.7,2.0,1.0,6.0,148.0,24.7,100.0%
1,2.0,Raheem Mostert (MIA),75.0,429.0,5.7,49.0,8.0,9.0,18.0,20.0,145.0,8.1,2.0,1.0,6.0,139.4,23.2,97.6%
2,3.0,Travis Etienne Jr. (JAC),113.0,451.0,4.0,35.0,6.0,5.0,21.0,24.0,172.0,8.2,0.0,0.0,6.0,115.3,19.2,99.8%
3,4.0,Kyren Williams (LAR),97.0,456.0,4.7,31.0,4.0,6.0,13.0,24.0,105.0,8.1,1.0,0.0,6.0,111.1,18.5,91.0%
4,5.0,Zack Moss (IND),96.0,466.0,4.9,56.0,7.0,4.0,14.0,17.0,110.0,7.9,1.0,0.0,5.0,103.6,20.7,84.3%
5,6.0,De'Von Achane (MIA),38.0,460.0,12.1,76.0,17.0,5.0,9.0,11.0,67.0,7.4,2.0,1.0,4.0,101.7,25.4,92.0%
6,7.0,D'Andre Swift (PHI),86.0,452.0,5.3,43.0,5.0,2.0,23.0,27.0,115.0,5.0,1.0,1.0,6.0,95.7,16.0,97.4%
7,8.0,Bijan Robinson (ATL),80.0,401.0,5.0,38.0,4.0,0.0,26.0,32.0,189.0,7.3,2.0,1.0,6.0,95.0,15.8,100.0%
8,9.0,Tony Pollard (DAL),96.0,370.0,3.9,31.0,4.0,2.0,25.0,29.0,176.0,7.0,0.0,1.0,6.0,91.6,15.3,99.8%
9,10.0,Kenneth Walker III (SEA),83.0,345.0,4.2,36.0,5.0,6.0,11.0,13.0,100.0,9.1,0.0,0.0,5.0,91.5,18.3,99.0%


### Find the column numbers (indices) in the df

In [2]:
# Enumerate the columns to get their positions (numbers) and names
column_numbers = list(enumerate(df.columns))

column_numbers

[(0, 'Rank'),
 (1, 'Player'),
 (2, 'ATT'),
 (3, 'YDS'),
 (4, 'Y/A'),
 (5, 'LG'),
 (6, '20+'),
 (7, 'TD'),
 (8, 'REC'),
 (9, 'TGT'),
 (10, 'YDS.1'),
 (11, 'Y/R'),
 (12, 'TD.1'),
 (13, 'FL'),
 (14, 'G'),
 (15, 'FPTS'),
 (16, 'FPTS/G'),
 (17, 'ROST')]

### Check the data types of the columns

In [3]:
df.dtypes

Rank      float64
Player     object
ATT       float64
YDS       float64
Y/A       float64
LG        float64
20+       float64
TD        float64
REC       float64
TGT       float64
YDS.1     float64
Y/R       float64
TD.1      float64
FL        float64
G         float64
FPTS      float64
FPTS/G    float64
ROST       object
dtype: object

### Convert columns with an 'object' data type to a 'float64' data type

In [4]:
# Columns with object datatype that need conversion
convert = df.select_dtypes(include=['object']).columns.tolist()

# Exclude 'Player' and 'ROST' columns as they are likely non-numeric categorical columns
convert.remove('Player')
convert.remove('ROST')

# Convert each column to float64
for col in convert:
   df[col] = df[col].str.replace(',', '').str.replace('-', '0').astype(float)

# Verify the data types
df.dtypes

Rank      float64
Player     object
ATT       float64
YDS       float64
Y/A       float64
LG        float64
20+       float64
TD        float64
REC       float64
TGT       float64
YDS.1     float64
Y/R       float64
TD.1      float64
FL        float64
G         float64
FPTS      float64
FPTS/G    float64
ROST       object
dtype: object

### Convert the relevant stats to a per game basis

In [5]:
# Define relevant columns
relevant_columns = df.columns[2:16].tolist()

# List of columns to exclude from the per-game calculation
exclude_from_per_game = ['Y/A', 'LG', 'Y/R', 'FL', 'G', 'FPTS', 'FPTS/G']

# Convert stats to a per-game basis for only the columns not in the exclude list
for col in relevant_columns:
    if col not in exclude_from_per_game:
        df[col + '_per_game'] = (df[col] / df['G']).round(1)

# Update the relevant columns list for correlation
# It will contain original columns that were excluded from per game calculation
# and the new per game columns for the rest
relevant_columns_for_correlation = exclude_from_per_game + \
                                  [col + '_per_game' for col in relevant_columns 
                                        if col not in exclude_from_per_game]

# Display the relevant columns for correlation and the 'Rank' and 'Player' columns
df_average = df[['Rank', 'Player'] + relevant_columns_for_correlation].head(10)
df_average

Unnamed: 0,Rank,Player,Y/A,LG,Y/R,FL,G,FPTS,FPTS/G,ATT_per_game,YDS_per_game,20+_per_game,TD_per_game,REC_per_game,TGT_per_game,YDS.1_per_game,TD.1_per_game
0,1.0,Christian McCaffrey (SF),5.0,65.0,7.7,1.0,6.0,148.0,24.7,18.3,92.2,1.5,1.2,3.8,4.5,29.5,0.3
1,2.0,Raheem Mostert (MIA),5.7,49.0,8.1,1.0,6.0,139.4,23.2,12.5,71.5,1.3,1.5,3.0,3.3,24.2,0.3
2,3.0,Travis Etienne Jr. (JAC),4.0,35.0,8.2,0.0,6.0,115.3,19.2,18.8,75.2,1.0,0.8,3.5,4.0,28.7,0.0
3,4.0,Kyren Williams (LAR),4.7,31.0,8.1,0.0,6.0,111.1,18.5,16.2,76.0,0.7,1.0,2.2,4.0,17.5,0.2
4,5.0,Zack Moss (IND),4.9,56.0,7.9,0.0,5.0,103.6,20.7,19.2,93.2,1.4,0.8,2.8,3.4,22.0,0.2
5,6.0,De'Von Achane (MIA),12.1,76.0,7.4,1.0,4.0,101.7,25.4,9.5,115.0,4.2,1.2,2.2,2.8,16.8,0.5
6,7.0,D'Andre Swift (PHI),5.3,43.0,5.0,1.0,6.0,95.7,16.0,14.3,75.3,0.8,0.3,3.8,4.5,19.2,0.2
7,8.0,Bijan Robinson (ATL),5.0,38.0,7.3,1.0,6.0,95.0,15.8,13.3,66.8,0.7,0.0,4.3,5.3,31.5,0.3
8,9.0,Tony Pollard (DAL),3.9,31.0,7.0,1.0,6.0,91.6,15.3,16.0,61.7,0.7,0.3,4.2,4.8,29.3,0.0
9,10.0,Kenneth Walker III (SEA),4.2,36.0,9.1,0.0,5.0,91.5,18.3,16.6,69.0,1.0,1.2,2.2,2.6,20.0,0.0


### Calculate the correlations for relevant stats for different conditions

In [6]:
# Exclude the columns from correlation calculation
columns_to_exclude_from_correlation = ['FPTS/G', 'FPTS', 'FL', 'G']
relevant_columns_for_correlation = [col for col in relevant_columns_for_correlation 
                                        if col not in columns_to_exclude_from_correlation]

# Define a function to calculate correlations for given conditions
def compute_correlations(dataframe):
    return dataframe[relevant_columns_for_correlation].corrwith(dataframe['FPTS/G'])

# Compute correlations for various conditions
correlations_all = compute_correlations(df)
correlations_fpts_nonzero = compute_correlations(df[df['FPTS/G'] > 0])
correlations_top50 = compute_correlations(df[df['Rank'] <= 50])
correlations_top25 = compute_correlations(df[df['Rank'] <= 25])

# Compile all correlations into a DataFrame for comparison
all_correlations = pd.DataFrame({
    'All Players': correlations_all,
    'FPTS > 0': correlations_fpts_nonzero,
    'Top 50 Players': correlations_top50,
    'Top 25 Players': correlations_top25
})

# Calculate the average correlation across the four conditions
all_correlations['Average'] = all_correlations.mean(axis=1)

all_correlations

Unnamed: 0,All Players,FPTS > 0,Top 50 Players,Top 25 Players,Average
Y/A,0.691919,0.487921,0.482236,0.429338,0.522854
LG,0.838683,0.760884,0.656063,0.417652,0.668321
Y/R,0.528366,0.219815,0.216804,0.150575,0.27889
ATT_per_game,0.872521,0.850889,0.649145,0.372826,0.686345
YDS_per_game,0.939076,0.93002,0.869514,0.830678,0.892322
20+_per_game,0.69158,0.689915,0.694768,0.582689,0.664738
TD_per_game,0.791886,0.786619,0.782411,0.799284,0.79005
REC_per_game,0.783501,0.747472,0.493773,0.253472,0.569555
TGT_per_game,0.792611,0.759624,0.489912,0.216995,0.564785
YDS.1_per_game,0.803817,0.773345,0.555487,0.386295,0.629736


### Assign the weights for the relevant stats

In [7]:
# Calculate R^2 for the 'Average' correlation
all_correlations['R^2'] = all_correlations['Average'] ** 2

# Assign weights based on the given criteria
all_correlations['Weight'] = all_correlations.apply(lambda row: 1 + row['R^2'] if row['Average'] > 0.65 else 1, axis=1)

# Display the R^2 and weights for each column
weights = all_correlations[['Average', 'R^2', 'Weight']]
weights

Unnamed: 0,Average,R^2,Weight
Y/A,0.522854,0.273376,1.0
LG,0.668321,0.446653,1.446653
Y/R,0.27889,0.07778,1.0
ATT_per_game,0.686345,0.47107,1.47107
YDS_per_game,0.892322,0.796238,1.796238
20+_per_game,0.664738,0.441877,1.441877
TD_per_game,0.79005,0.624179,1.624179
REC_per_game,0.569555,0.324392,1.0
TGT_per_game,0.564785,0.318982,1.0
YDS.1_per_game,0.629736,0.396568,1.0


### Add the assigned weights to the relevant per-game stats

In [8]:
# Multiply each relevant column by its corresponding weight
for col in relevant_columns_for_correlation:
    weight = weights.loc[col, 'Weight']
    df[col + '_weighted'] = (df[col] * weight).round(1)

# Extract the weighted columns to view the results
weighted_columns = [col + '_weighted' for col in relevant_columns_for_correlation]
df_weighted = df[['Rank', 'Player', 'FPTS/G'] + weighted_columns]

df_weighted.head(10)

Unnamed: 0,Rank,Player,FPTS/G,Y/A_weighted,LG_weighted,Y/R_weighted,ATT_per_game_weighted,YDS_per_game_weighted,20+_per_game_weighted,TD_per_game_weighted,REC_per_game_weighted,TGT_per_game_weighted,YDS.1_per_game_weighted,TD.1_per_game_weighted
0,1.0,Christian McCaffrey (SF),24.7,5.0,94.0,7.7,26.9,165.6,2.2,1.9,3.8,4.5,29.5,0.3
1,2.0,Raheem Mostert (MIA),23.2,5.7,70.9,8.1,18.4,128.4,1.9,2.4,3.0,3.3,24.2,0.3
2,3.0,Travis Etienne Jr. (JAC),19.2,4.0,50.6,8.2,27.7,135.1,1.4,1.3,3.5,4.0,28.7,0.0
3,4.0,Kyren Williams (LAR),18.5,4.7,44.8,8.1,23.8,136.5,1.0,1.6,2.2,4.0,17.5,0.2
4,5.0,Zack Moss (IND),20.7,4.9,81.0,7.9,28.2,167.4,2.0,1.3,2.8,3.4,22.0,0.2
5,6.0,De'Von Achane (MIA),25.4,12.1,109.9,7.4,14.0,206.6,6.1,1.9,2.2,2.8,16.8,0.5
6,7.0,D'Andre Swift (PHI),16.0,5.3,62.2,5.0,21.0,135.3,1.2,0.5,3.8,4.5,19.2,0.2
7,8.0,Bijan Robinson (ATL),15.8,5.0,55.0,7.3,19.6,120.0,1.0,0.0,4.3,5.3,31.5,0.3
8,9.0,Tony Pollard (DAL),15.3,3.9,44.8,7.0,23.5,110.8,1.0,0.5,4.2,4.8,29.3,0.0
9,10.0,Kenneth Walker III (SEA),18.3,4.2,52.1,9.1,24.4,123.9,1.4,1.9,2.2,2.6,20.0,0.0


### Define the columns to be used for the average weighted score

In [9]:
# Columns for the "average" calculation
average_columns = [
    'ATT_per_game_weighted', 'YDS_per_game_weighted', 'TD_per_game_weighted', 
    'REC_per_game_weighted', 'TGT_per_game_weighted', 'YDS.1_per_game_weighted', 'FPTS/G'
]

# Columns for the "average2" calculation (correlation > 0.6)
relevant_cols_gt_0_6 = weights[weights['Average'] > 0.6].index.tolist()
average2_columns = [col + '_weighted' for col in relevant_cols_gt_0_6 if col + '_weighted' in df.columns]
average2_columns.append('FPTS/G')

# Display the columns used in the 'average2' calculation (where correlation is > 0.6)
average2_columns

['LG_weighted',
 'ATT_per_game_weighted',
 'YDS_per_game_weighted',
 '20+_per_game_weighted',
 'TD_per_game_weighted',
 'YDS.1_per_game_weighted',
 'FPTS/G']

### Calculate each player's average weighted score

In [10]:
# Calculate "average"
df['average'] = df[average_columns].mean(axis=1).round(1)

# Calculate "average2"
df['average2'] = df[average2_columns].mean(axis=1).round(1)

# Rank the 'average' and 'average2' columns with NaN handling
df['average_rank'] = df.sort_values('average', ascending=False)\
                ['average'].rank(method='first', ascending=False, na_option='bottom').astype(float)
df['average2_rank'] = df.sort_values('average2', ascending=False)\
                ['average2'].rank(method='first', ascending=False, na_option='bottom').astype(float)

# Calculate the variance in 'average'
df['variance'] = df['Rank'] - df['average_rank']

# Calculate the variance in 'average2'
df['variance2'] = df['Rank'] - df['average2_rank']

### Display and sort the results of average weighted score in descending order

In [11]:
# Display the results for 'average'
df[['Rank', 'Player', 'average', 'average_rank', 'variance']]\
    .sort_values(by='average_rank', ascending=True).head(30)    # Define the sort on this line

Unnamed: 0,Rank,Player,average,average_rank,variance
5,6.0,De'Von Achane (MIA),38.5,1.0,5.0
0,1.0,Christian McCaffrey (SF),36.7,2.0,-1.0
4,5.0,Zack Moss (IND),35.1,3.0,2.0
40,41.0,Austin Ekeler (LAC),31.6,4.0,37.0
2,3.0,Travis Etienne Jr. (JAC),31.4,5.0,-2.0
22,23.0,Alvin Kamara (NO),29.8,6.0,17.0
3,4.0,Kyren Williams (LAR),29.2,7.0,-3.0
1,2.0,Raheem Mostert (MIA),29.0,8.0,-6.0
13,14.0,David Montgomery (DET),28.8,9.0,5.0
6,7.0,D'Andre Swift (PHI),28.6,10.0,-3.0


In [12]:
# Display the results for 'average2'
df[['Rank', 'Player', 'average2', 'average2_rank', 'variance2']]\
    .sort_values(by='average2_rank', ascending=True).head(30)

Unnamed: 0,Rank,Player,average2,average2_rank,variance2
5,6.0,De'Von Achane (MIA),54.4,1.0,5.0
0,1.0,Christian McCaffrey (SF),49.3,2.0,-1.0
4,5.0,Zack Moss (IND),46.1,3.0,2.0
15,16.0,Breece Hall (NYJ),42.7,4.0,12.0
40,41.0,Austin Ekeler (LAC),42.0,5.0,36.0
11,12.0,Derrick Henry (TEN),39.8,6.0,6.0
1,2.0,Raheem Mostert (MIA),38.5,7.0,-5.0
2,3.0,Travis Etienne Jr. (JAC),37.7,8.0,-5.0
13,14.0,David Montgomery (DET),37.1,9.0,5.0
6,7.0,D'Andre Swift (PHI),36.5,10.0,-3.0
