In [2]:
def load_and_prepare_data(year, file_prefix, columns, offense=True):
    file_suffix = 'OFF' if offense else 'DEF'
    file_path = f'{file_prefix}{year}{file_suffix}.csv'
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return pd.DataFrame()  
    data = pd.read_csv(file_path)
    data['Year'] = year
    data = data[1:33]
    data.columns = columns
    data['Year'] = data['Year'].astype('int')
    return data



In [3]:
offensive_columns = [
    'O_Rank', 'Team', 'O_Games_Played', 'O_Points_For', 'O_Total_Yards',
    'O_Plays', 'O_Y/Play', 'O_TO', 'O_Fumbles_Lost', 'O_1st_D', 'O_P_Completions',
    'O_P_Attempts', 'O_P_Yards', 'O_P_TD', 'O_P_Int', 'O_P_Y/PA', 'O_P_Passing_1st_D',
    'O_R_Att', 'O_R_Yards', 'O_R_TD', 'O_R_Y/A', 'O_R_1st_D', 'O_Pe', 'O_Pe_Yards',
    'O_Pe_1st_D', 'O_Scoring_Drives', 'O_TO_%', 'O_Expected_Points', 'Year'
]

In [4]:
defensive_columns = [
    'D_Rank', 'Team', 'D_Games_Played', 'D_Points_Allowed', 'D_Total_Yards',
    'D_Plays', 'D_Y/Play', 'D_TD', 'D_Fumbles_Lost', 'D_1st_D', 'D_P_Completions',
    'D_P_Attempts', 'D_P_Yards', 'D_P_TD', 'D_P_Int', 'D_P_Y/PA', 'D_P_Passing_1st_D',
    'D_R_Att', 'D_R_Yards', 'D_R_TD', 'D_R_Y/A', 'D_R_1st_D', 'D_Pe', 'D_Pe_Yards',
    'D_Pe_1st_D', 'D_Scoring_Drives', 'D_TD_%', 'D_Expected_Points', 'Year'
]

years = [2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014]

In [5]:
offensive_data = [load_and_prepare_data(year, 'NFL-', offensive_columns, offense=True) for year in years]
defensive_data = [load_and_prepare_data(year, 'NFL-', defensive_columns, offense=False) for year in years]

OFF_total = pd.concat(offensive_data)
DEF_total = pd.concat(defensive_data)

In [6]:
# Merge offensive and defensive data
combined_data = OFF_total.merge(DEF_total, on=['Team', 'Year'])

In [7]:
# Load and prepare the 2024 data(2024 current ranks for offense and defense)
OFF_2024 = load_and_prepare_data(2024, 'NFL-', offensive_columns, offense=True)
DEF_2024 = load_and_prepare_data(2024, 'NFL-', defensive_columns, offense=False)

testing_set = OFF_2024.merge(DEF_2024, on=['Team', 'Year'])
testing_set.to_csv(r'C:\Users\richa\ProjectNFL\testingset.csv', index=False)

In [None]:
# Combine the testing set with the combined data
mydata = pd.concat([testing_set, combined_data])
mydata.head()

In [9]:
# Convert columns to float
total_columns = [
    'O_Games_Played', 'O_Points_For', 'O_Total_Yards', 'O_Plays', 'O_Y/Play',
    'O_TO', 'O_Fumbles_Lost', 'O_1st_D', 'O_P_Completions', 'O_P_Attempts',
    'O_P_Yards', 'O_P_TD', 'O_P_Int', 'O_P_Y/PA', 'O_P_Passing_1st_D', 'O_R_Att',
    'O_R_Yards', 'O_R_TD', 'O_R_Y/A', 'O_R_1st_D', 'O_Pe', 'O_Pe_Yards',
    'O_Pe_1st_D', 'O_Scoring_Drives', 'O_TO_%', 'O_Expected_Points',
    'D_Games_Played', 'D_Points_Allowed', 'D_Total_Yards', 'D_Plays', 'D_Y/Play',
    'D_TD', 'D_Fumbles_Lost', 'D_1st_D', 'D_P_Completions', 'D_P_Attempts',
    'D_P_Yards', 'D_P_TD', 'D_P_Int', 'D_P_Y/PA', 'D_P_Passing_1st_D', 'D_R_Att',
    'D_R_Yards', 'D_R_TD', 'D_R_Y/A', 'D_R_1st_D', 'D_Pe', 'D_Pe_Yards',
    'D_Pe_1st_D', 'D_Scoring_Drives', 'D_TD_%', 'D_Expected_Points'
]

mydata[total_columns] = mydata[total_columns].astype('float')

In [None]:
mydata['Superbowl'] = 0
superbowl = pd.read_csv('superbowl.csv')
superbowl.head()

In [11]:
superbowl['Date'] = pd.to_datetime(superbowl['Date'])

In [None]:
superbowl['Year'] = pd.DatetimeIndex(superbowl['Date']).year
superbowl= superbowl[['Year', 'Winner']]
superbowl.head()

In [13]:
superbowl['Year'] = superbowl['Year'] - 1

In [14]:
superbowl.Year = superbowl.Year.astype('object')

In [15]:
superbowl['Superbowl'] = 1

In [None]:
superbowl.Year = superbowl.Year.astype('int')
mydata.Year = mydata.Year.astype('int')

test = pd.merge(mydata, superbowl, left_on = ['Year','Team'], right_on = ['Year', 'Winner'], how = 'left')
test['Superbowl_y'].value_counts()

In [17]:
test.drop(['Superbowl_x', 'Winner'], axis=1, inplace = True)

In [18]:
pd.set_option('display.max_rows', 500)
test.Superbowl_y = test.Superbowl_y.fillna(0)
test.Superbowl_y = test.Superbowl_y.astype('int')
test.rename(columns = {'Superbowl_y': 'Superbowl'}, inplace = True)

In [None]:
test.dtypes
test.Superbowl.sum()

In [20]:
# Import necessary libraries
import sys
import pylab as pl
import scipy.optimize as opt
from sklearn import preprocessing
%matplotlib inline 
import matplotlib.pyplot as plt


In [None]:
test

In [22]:
# Set print options for numpy
np.set_printoptions(threshold=sys.maxsize)


In [23]:
# Convert ranks to integers
test.O_Rank = test.O_Rank.astype(int)
test.D_Rank = test.D_Rank.astype(int)

# Define features for training columns
training_columns = [
    'O_Rank', 'O_Points_For', 'O_Total_Yards', 'O_Y/Play', 'O_TO', 'O_1st_D',
    'O_P_Completions', 'O_P_Yards', 'O_P_TD', 'O_P_Int', 'O_P_Y/PA', 'O_P_Passing_1st_D',
    'O_R_Yards', 'O_R_TD', 'O_R_Y/A', 'O_R_1st_D', 'O_Pe', 'O_Pe_Yards',
    'O_Scoring_Drives', 'D_Rank', 'D_Points_Allowed', 'D_Total_Yards',
    'D_Y/Play', 'D_TD', 'D_Fumbles_Lost', 'D_1st_D', 'D_P_Completions',
    'D_P_Yards', 'D_P_TD', 'D_P_Int', 'D_P_Y/PA', 'D_P_Passing_1st_D',
    'D_R_Yards', 'D_R_TD', 'D_R_Y/A', 'D_R_1st_D', 'D_Pe', 'D_Pe_Yards',
    'D_Scoring_Drives'
]

# Prepare data
X = test[training_columns].values
y = test['Superbowl'].values

In [24]:
from sklearn import preprocessing
# Standardize the training columns
X = preprocessing.StandardScaler().fit_transform(X)

In [None]:
# Select training and testing data
teams = test.loc[ :31 , 'Team']

O_rank = test.loc[0:31, 'O_Rank']
D_rank = test.loc[0:31, 'D_Rank']

X_test = X[0:32]
X_train = X[32:]

y_test = y[0:32]
y_train = y[32:]

print(X_test.shape)
print(y_test.shape)
print(X_train.shape)
print(y_train.shape)



In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
LR = LogisticRegression(C=0.01, solver='liblinear').fit(X_train,y_train)
predicts = LR.predict(X_test)
predicts

In [None]:
from sklearn import metrics

# Evaluate Logistic Regression Model
predicts_binary = LR.predict(X_train)
print("Logistic Regression Accuracy:", metrics.accuracy_score(y_train, predicts_binary))

In [None]:
# Predictions DataFrame for Logistic Regression
predictions = pd.DataFrame({'Team': teams, 'Predictions': predicts})
predictions

In [None]:
predicts_prob = LR.predict_proba(X_test)
predicts_prob

In [30]:
# Prepare predictions DataFrame with probabilities
predictions2 = pd.DataFrame({
    'Team': teams,
    'Prediction': predicts_prob[:, 1],
    'O_Rank': O_rank,
    'D_Rank': D_rank
}).sort_values(by='Prediction', ascending=False)
predictions2['Prediction_Rank'] = predictions2['Prediction'].rank(ascending=False)

In [31]:
# AdaBoost with SVC
from sklearn.ensemble import AdaBoostClassifier

from sklearn.svm import SVC
from sklearn import metrics

svc = SVC(probability=True, kernel='linear')
abc = AdaBoostClassifier(n_estimators=20, estimator=svc, learning_rate=1)
model = abc.fit(X_train, y_train)

In [32]:
# AdaBoost Predictions
y_pred_binary = model.predict(X_train)
y_pred = model.predict_proba(X_test)

In [33]:
# Prepare predictions DataFrame for AdaBoost
predictions3 = pd.DataFrame({
    'Team': teams,
    'Prediction': y_pred[:, 1],
    'O_Rank': O_rank,
    'D_Rank': D_rank
}).sort_values(by='Prediction', ascending=False)
predictions3['Prediction_Rank'] = predictions3['Prediction'].rank(ascending=False)

In [None]:
predictions3

In [None]:
# Calculate Average and New Rank
df = predictions3.copy()
df['Average_Rank'] = (df['O_Rank'] + df['D_Rank']) / 2
df['New_Rank'] = df['Average_Rank'].rank(method='dense')
df = df.sort_values(by='New_Rank').reset_index(drop=True)
df

In [None]:
# Print final DataFrame
print(df[['Team', 'O_Rank', 'D_Rank', 'Average_Rank', 'New_Rank']])

In [37]:
import pandas as pd

# Sample DataFrame 
data = {
    'Team': ['Detroit Lions', 'Buffalo Bills', 'Minnesota Vikings', 'Green Bay Packers', 
             'Denver Broncos', 'Kansas City Chiefs', 'Philadelphia Eagles', 'Baltimore Ravens',
             'Pittsburgh Steelers', 'Los Angeles Chargers', 'Washington Commanders', 
             'Houston Texans', 'Tampa Bay Buccaneers', 'Seattle Seahawks', 
             'Arizona Cardinals', 'Cincinnati Bengals', 'Los Angeles Rams', 
             'Miami Dolphins', 'San Francisco 49ers', 'New Orleans Saints', 
             'New York Jets', 'Atlanta Falcons', 'Chicago Bears', 
             'Indianapolis Colts', 'Dallas Cowboys', 'Tennessee Titans', 
             'New England Patriots', 'Las Vegas Raiders', 'New York Giants', 
             'Jacksonville Jaguars', 'Cleveland Browns', 'Carolina Panthers'],
    'O_Rank': [1, 2, 9, 7, 10, 11, 8, 3, 12, 18, 4, 13, 5, 15, 14, 6, 
               17, 23, 16, 22, 24, 19, 26, 21, 20, 25, 30, 29, 32, 28, 
               31, 27],
    'D_Rank': [8, 11, 2, 6, 4, 3, 5, 19, 7, 1, 18, 9, 22, 12, 13, 28, 
               17, 10, 21, 16, 15, 20, 14, 23, 30, 31, 24, 27, 25, 29, 
               26, 32],
    'Losses': [2, 3, 2, 4, 6, 1, 3, 5, 5, 6, 5, 6, 7, 7, 8, 8, 6, 8,
               9, 10, 11, 7, 11, 8, 8, 12, 12, 12, 13, 12, 12, 12]  
    }

In [38]:
# Calculate Average Rank including Losses
df = pd.DataFrame(data)
df['Average_Rank'] = (df['O_Rank'] + df['D_Rank'] + df['Losses']) / 3
df['New_Rank'] = df['Average_Rank'].rank(method='dense')
df = df.sort_values(by='New_Rank').reset_index(drop=True)

In [None]:
# Print final DataFrame
print(df[['Team', 'O_Rank', 'D_Rank', 'Losses', 'Average_Rank', 'New_Rank']])

In [None]:
df_sorted = df.sort_values(by=['New_Rank', 'O_Rank'])
print(df_sorted)

In [41]:
df_sorted.to_csv('updated_ranks.csv', index = False)

In [None]:
# Evaluate AdaBoost Model
print("AdaBoost Accuracy:", metrics.accuracy_score(y_train, y_pred_binary))