## Under or Over Ranked Teams

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import plotly.graph_objects as go

%matplotlib inline

In [21]:
records = pd.read_csv('scraped_results_df.csv')
# 'game_loc' indicated where the game was played at. Since the winner is listed first in 
# the original data, the '@' indicates the game was played at the loser's home.  Therefore, 
# we can create a new column called 'Winner_home' if the '@' sign is not present.
#records['winner_home'] = records['game_loc']!='@'

# The rank is included in the winner and losers name within parenthesis.  The below regex will identify 
# numerical digits within the parenthesis and extract them to a new column as 'floats'.  We'll also 
# remove the rank in parenthesis from the original winner column.  We'll do this for winners and losers.
records['winner_rank'] = records['winner'].str.extract('\(([0-9]+)\)', expand=True).astype('float')
records['winner_name'] = records['winner'].str.replace('\(([0-9]+)\)', '').str.replace('\xa0', '')
records['loser_rank'] = records['loser'].str.extract('\(([0-9]+)\)', expand=True).astype('float')
records['loser_name'] = records['loser'].str.replace('\(([0-9]+)\)', '').str.replace('\xa0', '')

# Calculate a rank_diff socre.  The more negative this is, the more of an upset it is.
records['rank_diff'] = records['loser_rank'] - records['winner_rank']

# Add a pts_diff between the two pts as we can use margin of victory to see how close a 
# game is.
records['pts_diff'] = records['winner_pts'] - records['loser_pts']

# We no longer need several of these columns, so lets drop them.  
records.drop(['Unnamed: 0', 'winner','loser'], axis=1, inplace=True)

records.set_index(['year','week_number', 'row'], inplace=True)

In [27]:
records_recent = records.loc[2009:2019]



In [31]:
losers = records_recent.groupby('loser_name').mean()
losers['num ranked games'] = records_recent.groupby('loser_name').count()['rank_diff']
losers.dropna(inplace=True)
losers.sort_values('rank_diff', ascending=True)


Unnamed: 0_level_0,winner_pts,loser_pts,winner_rank,loser_rank,rank_diff,pts_diff,num ranked games
loser_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Houston,40.377778,26.288889,22.285714,13.800000,-17.000000,14.088889,1
Utah State,33.534483,19.551724,17.076923,14.000000,-7.000000,13.982759,1
Alabama,32.076923,23.692308,8.153846,2.384615,-5.769231,8.384615,13
Ohio State,29.650000,18.550000,11.200000,4.846154,-5.142857,11.100000,7
Navy,34.358491,22.094340,11.636364,20.750000,-5.000000,12.264151,1
...,...,...,...,...,...,...,...
California,37.957746,20.549296,10.925926,19.857143,13.333333,17.408451,3
Boston College,29.478261,14.652174,10.920000,20.666667,15.000000,14.826087,1
Connecticut,34.962500,16.850000,16.888889,25.000000,16.000000,18.112500,2
Maryland,36.986842,17.973684,12.793103,25.000000,17.000000,19.013158,1


In [5]:
records_recent.groupby('loser_name').mean()[(records_recent.groupby('loser_name').count()['rank_diff'] > 5)].sort_values('rank_diff', ascending=True).head()

Unnamed: 0_level_0,winner_pts,loser_pts,winner_home,winner_rank,loser_rank,rank_diff,pts_diff
loser_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Alabama,28.177419,19.532258,0.596774,9.95122,7.857143,-2.73913,8.645161
Miami (FL),30.658228,17.316456,0.64557,13.842105,10.935484,-2.631579,13.341772
Oklahoma,33.38,21.3,0.8,9.914286,7.586957,-2.375,12.08
Georgia,30.597015,19.626866,0.686567,11.489362,11.06,-1.525,10.970149
North Carolina,34.504065,19.813008,0.585366,14.56,18.083333,-1.5,14.691057


In [15]:
records_recent.groupby('winner_name').mean()[(records_recent.groupby('winner_name').count()['rank_diff'] > 5)].sort_values('rank_diff', ascending=False)

Unnamed: 0_level_0,winner_pts,loser_pts,winner_home,winner_rank,loser_rank,rank_diff,pts_diff
winner_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Texas,39.545455,16.778409,0.625,8.129771,14.372093,7.129032,22.767045
Ohio State,36.966667,15.304762,0.671429,7.219388,12.923077,6.716667,21.661905
Alabama,35.927835,11.628866,0.716495,3.825806,11.676056,6.545455,24.298969
Southern California,37.685083,17.220994,0.60221,8.283582,13.942308,6.357143,20.464088
Oklahoma,42.554502,18.279621,0.672986,8.155779,13.308824,5.953125,24.274882
West Virginia,37.775641,18.49359,0.653846,13.473684,15.347826,5.461538,19.282051
Georgia,35.256684,15.962567,0.679144,10.066667,14.895833,5.333333,19.294118
Missouri,39.29078,18.148936,0.702128,15.387755,16.5,5.076923,21.141844
Miami (FL),36.689441,15.478261,0.621118,8.33,12.885714,4.923077,21.21118
Oregon,43.778409,21.215909,0.642045,9.40625,13.644444,4.868421,22.5625


In [7]:
records_recent[(records_recent['loser_name'] == 'Iowa State') & (records_recent['rank_diff'] > 0)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,winner_pts,loser_pts,game_date,game_time,game_day,game_loc,notes,winner_home,winner_rank,winner_name,loser_rank,loser_name,rank_diff,pts_diff
year,week_number,row,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2002,9,431,49.0,3.0,"Oct 19, 2002",,Sat,,,True,2.0,Oklahoma,9.0,Iowa State,7.0,46.0
2002,10,494,21.0,10.0,"Oct 26, 2002",,Sat,,,True,7.0,Texas,17.0,Iowa State,10.0,11.0
2002,12,574,58.0,7.0,"Nov 9, 2002",,Sat,,,True,12.0,Kansas State,21.0,Iowa State,9.0,51.0
2017,12,675,49.0,42.0,"Nov 11, 2017",12:00 PM,Sat,@,"Jack Trice Stadium - Ames, Iowa",False,12.0,Oklahoma State,24.0,Iowa State,12.0,7.0
2018,13,752,24.0,10.0,"Nov 17, 2018",8:00 PM,Sat,,Darrell K Royal-Texas Memorial Stadium - Austi...,True,13.0,Texas,18.0,Iowa State,5.0,14.0
2018,19,867,28.0,26.0,"Dec 28, 2018",9:00 PM,Fri,,"Alamo Bowl (Alamodome - San Antonio, Texas)",True,12.0,Washington State,25.0,Iowa State,13.0,2.0


In [9]:
records_recent[(records_recent['loser_name'] == 'Alabama') & (records_recent['rank_diff'] < 0)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,winner_pts,loser_pts,game_date,game_time,game_day,game_loc,notes,winner_home,winner_rank,winner_name,loser_rank,loser_name,rank_diff,pts_diff
year,week_number,row,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2000,4,158,21.0,0.0,"Sep 16, 2000",,Sat,@,,False,25.0,Southern Mississippi,15.0,Alabama,-10.0,21.0
2005,11,562,16.0,13.0,"Nov 12, 2005",,Sat,@,,False,5.0,Louisiana State,4.0,Alabama,-1.0,3.0
2005,12,603,28.0,18.0,"Nov 19, 2005",,Sat,,,True,11.0,Auburn,8.0,Alabama,-3.0,10.0
2007,4,218,26.0,23.0,"Sep 22, 2007",,Sat,@,,False,22.0,Georgia,16.0,Alabama,-6.0,3.0
2008,15,768,31.0,20.0,"Dec 6, 2008",,Sat,,"Georgia Dome - Atlanta, Georgia",True,2.0,Florida,1.0,Alabama,-1.0,11.0
2008,15,769,31.0,20.0,"Dec 6, 2008",,Sat,,"SEC Championship (Atlanta, GA)",True,2.0,Florida,1.0,Alabama,-1.0,11.0
2008,19,816,31.0,17.0,"Jan 2, 2009",,Fri,,"Sugar Bowl (New Orleans, LA)",True,7.0,Utah,4.0,Alabama,-3.0,14.0
2010,6,359,35.0,21.0,"Oct 9, 2010",,Sat,,,True,19.0,South Carolina,1.0,Alabama,-18.0,14.0
2010,10,558,24.0,21.0,"Nov 6, 2010",,Sat,,,True,12.0,Louisiana State,5.0,Alabama,-7.0,3.0
2012,11,657,29.0,24.0,"Nov 10, 2012",,Sat,@,,False,15.0,Texas A&M,1.0,Alabama,-14.0,5.0
