In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import numpy as np
import sqlite3
import statistics as st

In [2]:
##Read in data from sqlite
conn = sqlite3.connect("sqlite_db.sqlite")
c = conn.cursor()



def data_set(predict_year, qb_td, ppr):
    
    predict_year = predict_year - 1
    
    years = [2018, 2019, 2020, 2021]
    years_list = []
    for year in years:
        if year < predict_year:
            years_list.append(year)
    
    position_list = ['QB', 'RB', 'TE', 'WR']
    target_merged_df_list = []
    
    #iterate through each year before the predict year for eac position
    for position in position_list:
        for year in years_list:
            next_year = year + 1
            #get train year data 
            base_df = pd.DataFrame(c.execute(f"SELECT * FROM '{position}_Stats' WHERE Year = '{year}'"))
            base_df.columns = list(map(lambda x: x[0], c.description))
            extra_df = pd.DataFrame(c.execute(f"SELECT * FROM '{position}_Extra_Stats' WHERE Year = '{year}'"))
            extra_df.columns = list(map(lambda x: x[0], c.description))
            extra_df = extra_df.rename(columns={'PlayerPlayerId': 'PlayerID'})
            extra_df = extra_df.drop(columns = ["PlayerShortName"], axis = 1)

            #Remove Duplicate Columns between the extra stats df and base df
            diff_cols = extra_df.columns.difference(base_df.columns)
            diff_cols = diff_cols.union(['PlayerID'])
            extra_df = extra_df[diff_cols]

            #create model data
            merged_df = pd.merge(base_df, extra_df, on="PlayerID")
            merged_df = merged_df.drop(columns = ["FantasyPoints", "index"], axis = 1)
            merged_df["Trending"],merged_df["Consistency"] =  last_year_trend(merged_df)
            merged_df["Depth"] =  depth_position(merged_df)
            

            #get next year data to create target
            next_df = pd.DataFrame(c.execute(f"SELECT * FROM '{position}_Stats' WHERE Year = '{next_year}'"))
            next_df.columns = list(map(lambda x: x[0], c.description))

            target_df = pd.DataFrame(next_df["PlayerID"])

            #use fantasy point calculation funtion in order to calculate fantasy points based on inputs
            if position == 'QB':
                target_df["FantasyPoints"] = qb_calc(next_df,qb_td)
                targetmerged_df = pd.merge(merged_df, target_df, on="PlayerID").set_index("PlayerID")
            elif position == "RB":
                target_df["FantasyPoints"] = wr_rb_te_calc(next_df,ppr)
                targetmerged_df = pd.merge(merged_df, target_df, on="PlayerID").set_index("PlayerID").fillna(0)
                targetmerged_df = targetmerged_df.loc[targetmerged_df["RushingAttempts"]>20]
            else:
                target_df["FantasyPoints"] = wr_rb_te_calc(next_df,ppr)
                targetmerged_df = pd.merge(merged_df, target_df, on="PlayerID").set_index("PlayerID").fillna(0)
                targetmerged_df = targetmerged_df.loc[targetmerged_df["ReceivingTargets"]>10]

            target_merged_df_list.append(targetmerged_df)

    return target_merged_df_list
    

def predict_year_data(predict_year, qb_td, ppr):
    predict_year = predict_year - 1
    
    position_list = ['QB', 'RB', 'TE', 'WR']
    merged_df_list = []
    results_df_list = []
    last_year_end_df_list = [] 
        
    for position in position_list:

        #query data by position
        base_df = pd.DataFrame(c.execute(f"SELECT * FROM '{position}_Stats' WHERE Year = '{predict_year}'"))
        base_df.columns = list(map(lambda x: x[0], c.description))
        extra_df = pd.DataFrame(c.execute(f"SELECT * FROM '{position}_Extra_Stats' WHERE Year = '{predict_year}'"))
        extra_df.columns = list(map(lambda x: x[0], c.description))
        extra_df = extra_df.rename(columns={'PlayerPlayerId': 'PlayerID'})
        extra_df = extra_df.drop(columns = ["PlayerShortName"], axis = 1)

        #Remove Duplicate Columns between the extra stats df and base df
        diff_cols = extra_df.columns.difference(base_df.columns)
        diff_cols = diff_cols.union(['PlayerID'])
        extra_df = extra_df[diff_cols]

        merged_df = pd.merge(base_df, extra_df, on="PlayerID")
        if position == "RB":
            merged_df = merged_df.drop(columns = ["FantasyPoints", "index"], axis = 1).fillna(0)
            merged_df["Depth"] =  depth_position(merged_df)
            merged_df["Trending"],merged_df["Consistency"] =  last_year_trend(merged_df)
            merged_df = merged_df.loc[merged_df["RushingAttempts"]>20]

        elif position == "WR" or position == "TE":
            merged_df = merged_df.drop(columns = ["FantasyPoints", "index"], axis = 1).fillna(0)
            merged_df["Depth"] =  depth_position(merged_df)
            merged_df["Trending"],merged_df["Consistency"] =  last_year_trend(merged_df)
            merged_df = merged_df.loc[merged_df["ReceivingTargets"]>10]


        else:
            merged_df["Trending"],merged_df["Consistency"] =  last_year_trend(merged_df)
            merged_df["Depth"] =  depth_position(merged_df)  
        
 

        merged_df_list.append(merged_df)
        
        #create dataframe for final display dataframe with last years Name, Rank, and team
        last_year_end_df = merged_df.loc[:,["Rank","PlayerID","Name","Team","Position"]]
        last_year_end_df_list.append(last_year_end_df)
        
        
        #create dataframe for final display dataframe to show how the players finished the season
        if (predict_year+1) < 2022:
            prediction_year = predict_year + 1
            results_df =   pd.DataFrame(c.execute(f"SELECT * FROM '{position}_Stats' WHERE Year = '{prediction_year}'"))
            results_df.columns = list(map(lambda x: x[0], c.description))
            if position == "QB":
                results_df["FantasyPoints"] = qb_calc(results_df,qb_td)
                results_df = results_df.loc[:,["Rank","PlayerID","Team","FantasyPoints"]].rename(columns={'Rank': f'{prediction_year} Actual Rank',f'Team':f'{prediction_year} Team'})        
                results_df_list.append(results_df)
            else:
                results_df["FantasyPoints"] = wr_rb_te_calc(results_df,ppr)
                results_df = results_df.loc[:,["Rank","PlayerID","Team","FantasyPoints"]].rename(columns={'Rank': f'{prediction_year} Actual Rank',f'Team':f'{prediction_year} Team'})        
                results_df_list.append(results_df)
            
    return last_year_end_df_list,results_df_list, merged_df_list

def qb_calc(df, qb_td):
    fantasy_points = (df["PassingYards"])/25 + (df["PassingTouchdowns"])*qb_td + (df["RushingYards"])/10 + (df["RushingTouchdowns"])*6 -(df["PassingInterceptions"])*2
    return fantasy_points

def wr_rb_te_calc(df, ppr):
    if ppr == True:
        ppr = 1
    else:
        ppr = 0
    fantasy_points = (df["RushingYards"])/10 + (df["RushingTouchdowns"])*6 + (df["Receptions"])*ppr + (df["ReceivingYards"])/10 + (df["ReceivingTouchdowns"])*6 - (df["FumblesLost"])*2
    return fantasy_points

def last_year_trend(df):
    
    names = list(df["Name"])
    year = df["Year"][0]
    trending = []
    consistency = []
    
    for name in names:
        try:
            weekly_stats_df = pd.DataFrame(c.execute(f"SELECT * FROM 'Weekly_Stats' WHERE Year = '{year}' AND Name = '{name}'"))
            weekly_stats_df.columns = list(map(lambda x: x[0], c.description))
            weekly_stats_df = weekly_stats_df.loc[:,["Name","Week","FantasyPoints"]]
            X = pd.DataFrame(weekly_stats_df["Week"])
            y = weekly_stats_df["FantasyPoints"]
            trend  = LinearRegression(fit_intercept=True).fit(X,y)
            trend_val = float(trend.coef_)
            consistency_val = st.stdev(y)/(st.mean(y))
            trending.append(trend_val)
            consistency.append(consistency_val)
        except:
            trending.append(0)
            consistency.append(0)
    
    return trending, consistency 

def depth_position(df):
    year = df["Year"][0]
    position = df["Position"][0]
    depth = []
    lower_name_df = []
    lower_name_depth = []
    if position == "WR":
        depth_df = pd.DataFrame(c.execute(f"SELECT * FROM 'DepthCharts' WHERE (year = '{year}' AND position = 'LWR') OR (Year = '{year}' AND position = 'RWR')"))
    else:
        depth_df = pd.DataFrame(c.execute(f"SELECT * FROM 'DepthCharts' WHERE (year = '{year}' AND position = '{position}')"))                        
    depth_df.columns = list(map(lambda x: x[0], c.description))
        
    for i in range(len(depth_df)):
        lower_name_depth.append(depth_df["name"][i].lower())

    for i in range(len(df)):
        lower_name_df.append(df["Name"][i].lower())
    
    depth_df["Name1"] = lower_name_depth
    df["Name1"] = lower_name_df
    
    if position == "QB":
        depth_df = pd.merge(df, depth_df, on="Name1", how = "left").fillna(1)
    else:
        depth_df = pd.merge(df, depth_df, on="Name1", how = "left").fillna(2)
        
    depth_list = list(depth_df["depth"])
    depth_list = [i - 1 for i in depth_list]
    return depth_list
        
    


predict_year = int(input("which year would you like to predict? "))
qb_td = int(input("how many points per passing td? "))
ppr = input("(True or False) is your league PPR ")
qb_no = int(input("how many qbs start? "))
rb_no = int(input("how many rbs start? "))
wr_no = int(input("how many wrs start? "))
te_no = int(input("how many tes start? "))
flex_no = int(input("how many flex start? "))
team_no = int(input("how many teams in your league? "))

#take user inputs and generate dataframes for model and display
model_data = data_set(predict_year, qb_td,ppr)
last_year_end_df, results_df, predict_data = predict_year_data(predict_year, qb_td,ppr)

which year would you like to predict? 2022
how many points per passing td? 4
(True or False) is your league PPR True
how many qbs start? 1
how many rbs start? 2
how many wrs start? 2
how many tes start? 1
how many flex start? 1
how many teams in your league? 12


In [3]:
## split dataframes into dfs per position

qb_model =[]
rb_model =[]
te_model =[]
wr_model =[]

dfs_per_position = predict_year - 2019

for i, df in enumerate(model_data):
    if i < (dfs_per_position):
        qb_model.append(df)
    if i < (dfs_per_position)*2 and i >= (dfs_per_position):
        rb_model.append(df)
    if i < (dfs_per_position)*3 and i >= (dfs_per_position)*2:
        te_model.append(df)
    if i >= (dfs_per_position)*3:
        wr_model.append(df)

qb_model = pd.concat(qb_model)
rb_model = pd.concat(rb_model)
te_model = pd.concat(te_model)
wr_model = pd.concat(wr_model)


#Create target per position (next years Fantasy Point total)
y_qb = qb_model["FantasyPoints"]
y_rb = rb_model["FantasyPoints"]
y_te = te_model["FantasyPoints"]
y_wr = wr_model["FantasyPoints"]

In [4]:
qb_stat = qb_model.loc[:,["PassingYards","PassingTouchdowns","RushingYards","RushingTouchdowns","PassingInterceptions"]]
rb_stat = rb_model.loc[:,["RushingYards","RushingTouchdowns","Receptions","ReceivingYards","ReceivingTouchdowns","FumblesLost"]]
te_stat = te_model.loc[:,["RushingYards","RushingTouchdowns","Receptions","ReceivingYards","ReceivingTouchdowns","FumblesLost"]]
wr_stat = wr_model.loc[:,["RushingYards","RushingTouchdowns","Receptions","ReceivingYards","ReceivingTouchdowns","FumblesLost"]]


#Remove non-numerical columns and unwanted features.
X_qb = qb_model.drop(columns = ["Rank","FantasyPoints","Name","Team","Position","PassingCompletions","PassingYardsPerAttempt","PassingRating","AirYardsPerAttempt","AirYardsPerGame","MoneyThrows","PassingCompletionPercentage","CompletionPercentage","ProtectionRate","TouchdownRate","PassAttemptsPerGame","AirYards","PassingAttempts","PassingInterceptions","RushingYardsPerAttempt","Played","Year","Name1"], axis = 1)
X_rb = rb_model.drop(columns = ["Rank","FantasyPoints","Name","Team","Position","BaseFrontCarryRate","LightFrontCarryRate","ShotgunCarryRate","StackedFrontCarryRate","Targets","ReceivingTargets","Carries","ReceivingTDs","Fumbles","UnderCenterCarryRate","UnderCenterYardsPerCarry","YardsPerCarry","AverageDefendersInTheBox","UnderCenterYardsPerCarry","ShotgunYardsPerCarry","Year","Name1"], axis = 1)
X_te = te_model.drop(columns = ["Rank","FantasyPoints","Name","Team","Position","Targets","Targets.1","ReceivingTDs","RushingAttempts","AirYards","ReceivingLong","ReceivingYardsPerTarget","RushingYards","RushingYardsPerAttempt","RushingTouchdowns","AirYardsPerReception","HogRate","TargetAccuracy","RedZoneTargets","ReceivingYardsPerReception","Fumbles","FumblesLost","ReceivingTargets","AirYardsPerGame","Played","Year","Name1"], axis = 1)
X_wr = wr_model.drop(columns = ["Rank","FantasyPoints","Name","Team","Position","Targets","Targets.1","ReceivingTDs","AirYards","AirYardsPerGame","AverageTargetDistance","ReceivingLong","ReceivingYardsPerTarget","HogRate","Fumbles","FumblesLost","FantasyPointsPerTarget","AirYardsPerReception","RedZoneTargets","TargetAccuracy","RushingTouchdowns","RushingAttempts","Played","Year","ReceptionPercentage","Name1"], axis = 1)




#Scale data
scaler = StandardScaler().fit(X_qb)
X_scaled_qb = scaler.transform(X_qb)

scaler = StandardScaler().fit(X_rb)
X_scaled_rb = scaler.transform(X_rb)

scaler = StandardScaler().fit(X_te)
X_scaled_te = scaler.transform(X_te)

scaler = StandardScaler().fit(X_wr)
X_scaled_wr = scaler.transform(X_wr)

In [5]:
#Perform multiple linear regression on qb data
LinReg_model_qb  = LinearRegression(fit_intercept=True).fit(X_scaled_qb,y_qb)

#Print coefficients and associated features

for i, col in enumerate(X_qb.columns):
    print('{} is associated with {}'.format(col,LinReg_model_qb.coef_[i]))
    
#print intercept
LinReg_model_qb.intercept_

PassingYards is associated with 1.702754938080469
PassingTouchdowns is associated with 23.992289239282798
RushingAttempts is associated with -30.09917408306037
RushingYards is associated with 7.184977064873793
RushingTouchdowns is associated with 26.864673520503498
FantasyPointsPerGame is associated with 39.620650702286554
DeepBallAttempts is associated with 27.95133999740009
DeepBallCompletionPercentage is associated with 4.351870625946241
Interceptions is associated with -33.22656948349484
PlayerAgeExact is associated with -15.001897048493085
PressuredCompletionPercentage is associated with -26.52255401964076
Trending is associated with 8.16811773276029
Consistency is associated with 16.143448772470776
Depth is associated with -7.798978834265411


205.95333333333326

In [6]:
#Perform multiple linear regression on rb data
LinReg_model_rb  = LinearRegression(fit_intercept=True).fit(X_scaled_rb,y_rb)

#Print coefficients and associated features

for i, col in enumerate(X_rb.columns):
    print('{} is associated with {}'.format(col,LinReg_model_rb.coef_[i]))
    
#print intercept
LinReg_model_rb.intercept_

Played is associated with -6.623928948164223
RushingAttempts is associated with -23.562542415904762
RushingYards is associated with 53.34007419247464
RushingYardsPerAttempt is associated with -1.9373005460020378
RushingTouchdowns is associated with 6.73316640165051
Receptions is associated with 23.17546870620004
ReceivingYards is associated with -14.48730910876641
ReceivingTouchdowns is associated with -3.4987054433990057
FumblesLost is associated with 6.45782872519133
FantasyPointsPerGame is associated with 19.056664335088545
OpportunityShare is associated with -30.318712392092017
PlayerAgeExact is associated with -8.846322479791084
SnapShare is associated with 12.638915039598471
Trending is associated with 6.6530304945733025
Consistency is associated with 5.940088287297477
Depth is associated with -4.775101184315642


86.71462450592885

In [7]:
#Perform multiple linear regression on te data
LinReg_model_te  = LinearRegression(fit_intercept=True).fit(X_scaled_te,y_te)

#Print coefficients and associated features

for i, col in enumerate(X_te.columns):
    print('{} is associated with {}'.format(col,LinReg_model_te.coef_[i]))
    
#print intercept
LinReg_model_te.intercept_

Receptions is associated with 4.5976751652365015
ReceptionPercentage is associated with -13.159339707364673
ReceivingYards is associated with 23.564297471626542
ReceivingTouchdowns is associated with -27.51807535780937
FantasyPointsPerGame is associated with 3.0119585558905824
AverageTargetDistance is associated with -2.146104495553887
EndzoneTargets is associated with 12.241636874175406
FantasyPointsPerTarget is associated with 21.488941848948752
SnapShare is associated with -4.508337260482955
TargetShare is associated with 11.325465967778072
Trending is associated with -6.512559509340042
Consistency is associated with -3.000906986778805
Depth is associated with 3.681019785580602


67.98941176470592

In [8]:
#Perform multiple linear regression on wr data
LinReg_model_wr  = LinearRegression(fit_intercept=True).fit(X_scaled_wr,y_wr)

#Print coefficients and associawrd features

for i, col in enumerate(X_wr.columns):
    print('{} is associated with {}'.format(col,LinReg_model_wr.coef_[i]))
    
#print intercept
LinReg_model_wr.intercept_

ReceivingTargets is associated with -19.72594579573818
Receptions is associated with 9.959394795263362
ReceivingYards is associated with 13.050857633908711
ReceivingTouchdowns is associated with 10.960107887697328
ReceivingYardsPerReception is associated with 2.375416939112573
RushingYards is associated with 0.3064094608453897
RushingYardsPerAttempt is associated with 9.487651247290643
FantasyPointsPerGame is associated with -1.9908576893218273
EndzoneTargets is associated with 1.9229859664767133
SnapShare is associated with -0.0983755918548255
TargetShare is associated with 16.5898840615763
Trending is associated with -2.2802699096756864
Consistency is associated with 7.33876302434974
Depth is associated with -3.8856990982888386


92.67721518987344

In [9]:
#predict qb stats
X_predict_qb = predict_data[0].drop(columns = ["PlayerID","Rank","Name","Team","Position","PassingCompletions","PassingYardsPerAttempt","PassingRating","AirYardsPerAttempt","AirYardsPerGame","MoneyThrows","PassingCompletionPercentage","CompletionPercentage","ProtectionRate","TouchdownRate","PassAttemptsPerGame","AirYards","PassingAttempts","PassingInterceptions","RushingYardsPerAttempt","Played","Year","index","FantasyPoints","Name1"], axis = 1)
scaler = StandardScaler().fit(X_predict_qb)
X_predict_scaled_qb = scaler.transform(X_predict_qb)

QB_proj = LinReg_model_qb.predict(X_predict_scaled_qb)

In [10]:
#predict rb stats
X_predict_rb = predict_data[1].drop(columns = ["Rank","PlayerID","Name","Team","Position","BaseFrontCarryRate","LightFrontCarryRate","ShotgunCarryRate","StackedFrontCarryRate","Targets","ReceivingTargets","Carries","ReceivingTDs","Fumbles","UnderCenterCarryRate","UnderCenterYardsPerCarry","YardsPerCarry","AverageDefendersInTheBox","UnderCenterYardsPerCarry","ShotgunYardsPerCarry","Year","Name1"], axis = 1)
scaler = StandardScaler().fit(X_predict_rb)
X_predict_scaled_rb = scaler.transform(X_predict_rb)

rb_proj = LinReg_model_rb.predict(X_predict_scaled_rb)

In [11]:
#predict te stats
X_predict_te = predict_data[2].drop(columns = ["PlayerID","Rank","Name","Team","Position","Targets.1","Targets","ReceivingTDs","RushingAttempts","AirYards","ReceivingLong","ReceivingYardsPerTarget","RushingYards","RushingYardsPerAttempt","RushingTouchdowns","AirYardsPerReception","HogRate","TargetAccuracy","RedZoneTargets","ReceivingYardsPerReception","Fumbles","FumblesLost","ReceivingTargets","AirYardsPerGame","Year","Played","Name1"], axis = 1)
scaler = StandardScaler().fit(X_predict_te)
X_predict_scaled_te = scaler.transform(X_predict_te)

te_proj = LinReg_model_te.predict(X_predict_scaled_te)

In [12]:
#predict wr stats
X_predict_wr = predict_data[3].drop(columns = ["PlayerID","Rank","Name","Team","Position","Targets","Targets.1","ReceivingTDs","AirYards","AirYardsPerGame","AverageTargetDistance","ReceivingLong","ReceivingYardsPerTarget","HogRate","Fumbles","FumblesLost","FantasyPointsPerTarget","AirYardsPerReception","RedZoneTargets","TargetAccuracy","RushingTouchdowns","RushingAttempts","Year","Played","ReceptionPercentage","Name1"], axis = 1)
scaler = StandardScaler().fit(X_predict_wr)
X_predict_scaled_wr = scaler.transform(X_predict_wr)

wr_proj = LinReg_model_wr.predict(X_predict_scaled_wr)

In [13]:
#create df for a visual representation of effectiveness for qbs
last_year_end_df[0][f"{predict_year}_proj"] = QB_proj

In [14]:
if predict_year != 2022:
    visual_df = pd.merge(last_year_end_df[0], results_df[0],on = "PlayerID")
    display(visual_df.sort_values([f"{predict_year}_proj"], ascending=False))

In [15]:
#create df for a visual representation of effectiveness for qbs
last_year_end_df[1][f"{predict_year}_proj"] = rb_proj

In [16]:
if predict_year != 2022:
    visual_df = pd.merge(last_year_end_df[1], results_df[1],on = "PlayerID")
    display(visual_df.sort_values([f"{predict_year}_proj"], ascending=False).head(50))

In [17]:
#create df for a visual representation of effectiveness for qbs
last_year_end_df[2][f"{predict_year}_proj"] = te_proj

In [18]:
if predict_year != 2022:
    visual_df = pd.merge(last_year_end_df[2], results_df[2],on = "PlayerID")
    display(visual_df.sort_values([f"{predict_year}_proj"], ascending=False))

In [19]:
#create df for a visual representation of effectiveness for qbs
last_year_end_df[3][f"{predict_year}_proj"] = wr_proj

In [20]:
if predict_year != 2022:
    visual_df = pd.merge(last_year_end_df[3], results_df[3],on = "PlayerID")
    display(visual_df.sort_values([f"{predict_year}_proj"], ascending=False).head(50))

In [21]:
standard_qb = st.stdev(qb_calc(qb_stat, qb_td))*qb_no*team_no
standard_rb = st.stdev(wr_rb_te_calc(rb_stat, ppr)) * (rb_no*team_no + 0.5 * flex_no*team_no)
standard_te = st.stdev(wr_rb_te_calc(te_stat, ppr)) * (te_no*team_no + 0.5 * flex_no*team_no)
standard_wr = st.stdev(wr_rb_te_calc(wr_stat, ppr)) * (wr_no*team_no + 0.5 * flex_no*team_no)
sumweight = standard_qb + standard_rb + standard_te + standard_wr
print(standard_qb/sumweight,standard_rb/sumweight,standard_te/sumweight,standard_wr/sumweight)

0.184885009710508 0.41498210518958306 0.1322071926769521 0.26792569242295694


In [22]:
last_year_end_df[0][f"{predict_year}_proj"] = last_year_end_df[0][f"{predict_year}_proj"] * standard_qb*0.9/sumweight
last_year_end_df[1][f"{predict_year}_proj"] = last_year_end_df[1][f"{predict_year}_proj"] * standard_rb/sumweight
last_year_end_df[2][f"{predict_year}_proj"] = last_year_end_df[2][f"{predict_year}_proj"] * standard_rb*0.8/sumweight
last_year_end_df[3][f"{predict_year}_proj"] = last_year_end_df[3][f"{predict_year}_proj"] * standard_rb/sumweight

In [23]:
last_year_end_df = pd.concat(last_year_end_df).sort_values([f"{predict_year}_proj"], ascending=False).reset_index()

In [24]:
last_year_end_df[f"{predict_year}_Overall_Rank"] = 0
for i, proj_value in enumerate(last_year_end_df[f"{predict_year}_proj"]):
    last_year_end_df[f"{predict_year}_Overall_Rank"][i] = i + 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  last_year_end_df[f"{predict_year}_Overall_Rank"][i] = i + 1


In [25]:
last_year_end_df = last_year_end_df.loc[:,["PlayerID","Name","Position",f"{predict_year}_Overall_Rank"]]
last_year_end_df.head(50)

Unnamed: 0,PlayerID,Name,Position,2022_Overall_Rank
0,21682,Jonathan Taylor,RB,1
1,18882,Cooper Kupp,WR,2
2,19798,Nick Chubb,RB,3
3,20790,Damien Harris,RB,4
4,22564,Ja'Marr Chase,WR,5
5,21861,Antonio Gibson,RB,6
6,19562,Austin Ekeler,RB,7
7,18858,Joe Mixon,RB,8
8,21768,Najee Harris,RB,9
9,21685,Justin Jefferson,WR,10


In [26]:
last_year_end_df.loc[last_year_end_df["Position"]== "QB"].head(10)

Unnamed: 0,PlayerID,Name,Position,2022_Overall_Rank
17,2593,Aaron Rodgers,QB,18
23,19801,Josh Allen,QB,24
28,21681,Justin Herbert,QB,29
32,18890,Patrick Mahomes,QB,33
34,4314,Tom Brady,QB,35
39,14252,Kirk Cousins,QB,40
40,21831,Jalen Hurts,QB,41
47,14536,Russell Wilson,QB,48
51,18055,Dak Prescott,QB,52
54,21693,Joe Burrow,QB,55


In [27]:
last_year_end_df.loc[last_year_end_df["Position"]== "RB"].head(10)

Unnamed: 0,PlayerID,Name,Position,2022_Overall_Rank
0,21682,Jonathan Taylor,RB,1
2,19798,Nick Chubb,RB,3
3,20790,Damien Harris,RB,4
5,21861,Antonio Gibson,RB,6
6,19562,Austin Ekeler,RB,7
7,18858,Joe Mixon,RB,8
8,21768,Najee Harris,RB,9
11,18872,Dalvin Cook,RB,12
12,17923,Ezekiel Elliott,RB,13
13,20824,Josh Jacobs,RB,14


In [28]:
last_year_end_df.loc[last_year_end_df["Position"]== "WR"].head(10)

Unnamed: 0,PlayerID,Name,Position,2022_Overall_Rank
1,18882,Cooper Kupp,WR,2
4,22564,Ja'Marr Chase,WR,5
9,21685,Justin Jefferson,WR,10
10,16470,Davante Adams,WR,11
14,20932,Deebo Samuel,WR,15
21,20875,DK Metcalf,WR,22
22,16597,Mike Evans,WR,23
25,16830,Tyler Lockett,WR,26
26,16568,Brandin Cooks,WR,27
29,18082,Tyreek Hill,WR,30


In [29]:
last_year_end_df.loc[last_year_end_df["Position"]== "TE"].head(10)

Unnamed: 0,PlayerID,Name,Position,2022_Overall_Rank
16,19803,Mark Andrews,TE,17
60,22508,Kyle Pitts,TE,61
100,19063,George Kittle,TE,101
101,16964,Darren Waller,TE,102
106,10974,Rob Gronkowski,TE,107
108,15048,Travis Kelce,TE,109
121,19853,Mike Gesicki,TE,122
122,14856,Zach Ertz,TE,123
133,19863,Dallas Goedert,TE,134
175,20850,Dawson Knox,TE,176


In [30]:
last_year_end_df.head(200).to_csv("top200.csv")
last_year_end_df.loc[last_year_end_df["Position"]== "QB"].head(10).to_csv("top10qbs.csv")
last_year_end_df.loc[last_year_end_df["Position"]== "RB"].head(10).to_csv("top10rbs.csv")
last_year_end_df.loc[last_year_end_df["Position"]== "WR"].head(10).to_csv("top10wrs.csv")
last_year_end_df.loc[last_year_end_df["Position"]== "TE"].head(10).to_csv("top10tes.csv")