<h2>Import libraries

In [136]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns

<h2> Import data file & display data

In [137]:
heisman_df = pd.read_csv("Heisman Info.csv")
display(heisman_df)

Unnamed: 0,Year,Winner,Position,Win,Loss,Yards,TD
0,2000,Chris Weinke,QB,11,2,4167,33
1,2001,Eric Crouch,QB,11,2,2688,26
2,2002,Carson Palmer,QB,11,2,3942,33
3,2003,Jason White,QB,12,2,3846,40
4,2004,Matt Leinart,QB,13,0,3322,33
5,2005,Reggie Bush,RB,12,1,2218,18
6,2006,Troy Smith,QB,12,1,2542,30
7,2007,Tim Tebow,QB,9,4,4181,55
8,2008,Sam Bradford,QB,12,2,4720,50
9,2009,Mark Ingram,RB,14,0,1992,20


<h2> Check data types of fields to ensure they are the desired type.

In [138]:
heisman_df.dtypes

Year         int64
Winner      object
Position    object
Win          int64
Loss         int64
Yards        int64
TD           int64
dtype: object

<h2> Limit dataset to QBs only, as the position has 75% chance of winning this year

In [139]:
heisman_df = heisman_df.query('Position == "QB" ')
display(heisman_df)

Unnamed: 0,Year,Winner,Position,Win,Loss,Yards,TD
0,2000,Chris Weinke,QB,11,2,4167,33
1,2001,Eric Crouch,QB,11,2,2688,26
2,2002,Carson Palmer,QB,11,2,3942,33
3,2003,Jason White,QB,12,2,3846,40
4,2004,Matt Leinart,QB,13,0,3322,33
6,2006,Troy Smith,QB,12,1,2542,30
7,2007,Tim Tebow,QB,9,4,4181,55
8,2008,Sam Bradford,QB,12,2,4720,50
10,2010,Cam Newton,QB,14,0,4327,50
11,2011,Robert Griffin III,QB,10,3,4992,47


<h2>Validate data only has QBs

In [140]:
heisman_df.Position.unique()

array(['QB'], dtype=object)

<h2>Create dataframe for outlier group

In [141]:
qb_outliers = heisman_df.query('Year in (2007,2011,2016,2022)')
display(qb_outliers)

Unnamed: 0,Year,Winner,Position,Win,Loss,Yards,TD
7,2007,Tim Tebow,QB,9,4,4181,55
11,2011,Robert Griffin III,QB,10,3,4992,47
16,2016,Lamar Jackson,QB,9,4,5114,51
22,2022,Caleb Willaims,QB,11,3,4919,52


<h2>Create dataframe for standard group, excluding Jayden Daniels

In [142]:
qb_standard = heisman_df.query('Year not in (2007,2011,2016,2022,2023) & Position == "QB" ')
display(qb_standard)

Unnamed: 0,Year,Winner,Position,Win,Loss,Yards,TD
0,2000,Chris Weinke,QB,11,2,4167,33
1,2001,Eric Crouch,QB,11,2,2688,26
2,2002,Carson Palmer,QB,11,2,3942,33
3,2003,Jason White,QB,12,2,3846,40
4,2004,Matt Leinart,QB,13,0,3322,33
6,2006,Troy Smith,QB,12,1,2542,30
8,2008,Sam Bradford,QB,12,2,4720,50
10,2010,Cam Newton,QB,14,0,4327,50
12,2012,Johnny Manziel,QB,11,2,5116,47
13,2013,Jamies Winson,QB,14,0,4057,40


<h2>Get avg win/loss record for standard group

In [143]:
qb_win_loss_standard = qb_standard.groupby(["Position"]).agg({"Win": "mean", "Loss": "mean"})
display(qb_win_loss_standard)

Unnamed: 0_level_0,Win,Loss
Position,Unnamed: 1_level_1,Unnamed: 2_level_1
QB,12.4,1.4


<h2>Get avg win/loss record for outlier group

In [144]:
qb_win_loss_outliers = qb_outliers.groupby(["Position"]).agg({"Win": "mean", "Loss": "mean"})
display(qb_win_loss_outliers)

Unnamed: 0_level_0,Win,Loss
Position,Unnamed: 1_level_1,Unnamed: 2_level_1
QB,9.75,3.5


<h2>Get average yards for outlier group, excluding Tebow

In [145]:
qb_yard_avg_outliers = qb_outliers.query('Year != 2007').groupby(["Position"]).agg({"Yards": "mean"}).astype(int)
display(qb_yard_avg_outliers)

Unnamed: 0_level_0,Yards
Position,Unnamed: 1_level_1
QB,5008


<h2>Get average TDs for outlier group, including Tebow

In [146]:
qb_td_avg_outliers = qb_outliers.groupby(["Position"]).agg({"TD": "mean"}).astype(int)
display(qb_td_avg_outliers)

Unnamed: 0_level_0,TD
Position,Unnamed: 1_level_1
QB,51


<h2>Merge yards data with TD data for outlier group

In [147]:
qb_yds_td_avg = qb_yard_avg_outliers.merge(qb_td_avg_outliers, how="inner", on=["Position"])
display(qb_yds_td_avg)

Unnamed: 0_level_0,Yards,TD
Position,Unnamed: 1_level_1,Unnamed: 2_level_1
QB,5008,51


<h2>Create dataframe for Daniels only

In [148]:
jd_df = heisman_df.query('Year == 2023')[["Position","Yards","TD"]]
display(jd_df)

Unnamed: 0,Position,Yards,TD
23,QB,4946,50
