In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
nfl_teams = pd.read_csv("/kaggle/input/nfl-scores-and-betting-data/nfl_teams.csv")
nfl_teams.head()

In [None]:
scores = pd.read_csv("/kaggle/input/nfl-scores-and-betting-data/spreadspoke_scores.csv")
scores.head()

In [None]:
scores.describe()

In [None]:
scores.dtypes, scores.shape

In [None]:
# dropping scores from dataset that do not contain a favorite
scores_with_fav = scores.dropna(subset = ["team_favorite_id", "spread_favorite", "over_under_line"], axis = 0)
scores_with_fav.shape

In [None]:
# turning over-under line to a numeric field
scores_with_fav["over_under_line"] = pd.to_numeric(scores_with_fav.over_under_line, errors = "coerce")
scores_with_fav.dtypes

In [None]:
# dropping all games without an over-under line, 
scores_with_fav = scores_with_fav.dropna(subset = ["over_under_line"], axis = 0)
scores_with_fav.shape

In [None]:
scores_with_fav.head()

In [None]:
# no duplicated data
scores.loc[scores.duplicated()]

In [None]:
# adding total points to df
scores_with_fav["total_pts"] = scores_with_fav.score_home + scores_with_fav.score_away
scores_with_fav

In [None]:
# returns separate dataframes for a given team's home and away games
def get_team_games(team, df):
    team_home = df.loc[df.team_home.str.contains(team)]
    team_away = df.loc[df.team_away.str.contains(team)]
    
    return team_home, team_away

## Colts Analysis

In [None]:
colts_home, colts_away = get_team_games("Colts", scores_with_fav)
colts_home.shape, colts_away.shape

In [None]:
# overall, we have 696 games of Colts data, 347 home, 349 away
colts_scores = pd.concat([colts_home, colts_away], ignore_index = True)
colts_scores.head()

In [None]:
# how many times were the Colts favored in games?
# Colts were favored in 320/696 games
colts_favored = colts_scores.loc[colts_scores["team_favorite_id"] == "IND"]
display(colts_favored.head(), colts_favored.shape)

In [None]:
# games that the colts are favored in when they are home -> 212/347
colts_favored_home = colts_favored.loc[colts_favored["team_home"].str.contains("Colts")]
display(colts_favored_home.head(), colts_favored_home.shape)

In [None]:
# need to figure out how to get rid of this warning
colts_favored_home["pts_diff"] = colts_favored_home.score_home - colts_favored_home.score_away

In [None]:
# when the colts were favored at home, how often did they cover? 94/212
colts_covered_hm = colts_favored_home.pts_diff > abs(colts_favored_home.spread_favorite)
colts_pushed_hm = colts_favored_home.pts_diff == abs(colts_favored_home.spread_favorite)
colts_covered_hm.sum(), colts_pushed_hm.sum()

In [None]:
# games that the colts are favored in when they are away -> 108/349
colts_favored_rd = colts_favored.loc[colts_favored["team_away"].str.contains("Colts")]
display(colts_favored_rd.head(), colts_favored_rd.shape)

# adding in pts diff to colts road games
colts_favored_rd["pts_diff"] = colts_favored_rd.score_away - colts_favored_rd.score_home
colts_favored_rd.head()

In [None]:
# when the colts were favored on the road, how often did they cover? 61/108
colts_covered_awy = colts_favored_rd.pts_diff > abs(colts_favored_rd.spread_favorite)
colts_pushed_awy = colts_favored_rd.pts_diff == abs(colts_favored_rd.spread_favorite)
colts_covered_awy.sum(), colts_pushed_awy.sum()

In [None]:
# creating summary statistics for Colts games
colts_hm = colts_home.shape[0], colts_favored_home.shape[0], colts_covered_hm.sum(), colts_pushed_hm.sum()
colts_awy = colts_away.shape[0], colts_favored_rd.shape[0], colts_covered_awy.sum(), colts_pushed_awy.sum()
total = colts_scores.shape[0], colts_favored.shape[0], (colts_covered_hm.sum() + colts_covered_awy.sum()), (colts_pushed_hm.sum() + colts_pushed_awy.sum())


# creating summary table for games that colts were favored in:
d_colts_fav = {
    "Colts Home" : pd.Series(colts_hm, index = ["Total Games","Favored", "Covered", "Pushed"]),
    "Colts Away" : pd.Series(colts_awy, index = ["Total Games","Favored", "Covered", "Pushed"]),
    "Total" : pd.Series(total, index = ["Total Games", "Favored", "Covered", "Pushed"])
}

covered_sum = pd.DataFrame(d_colts_fav)
covered_sum

## Colts Analysis (2016-2020)
Let's analyze statistics from the past 10 years, 2011-2020.  Spefically, I'm going to focus on the average amount of points
per game scored when the Colts are at home and when they are on the road.

In [None]:
colts_home, colts_away = get_team_games("Colts", scores_with_fav)

# 83 games in 5 seasons
colts_home = colts_home[colts_home.schedule_season > 2015]
colts_away = colts_away[colts_away.schedule_season > 2015]
colts_home.shape, colts_away.shape

In [None]:
colts_home.head()

In [None]:
colts_home.agg(['min', 'max', 'mean', 'std'])

In [None]:
colts_away.head()

In [None]:
# creating summary statistics from Colts and their oppenents
colts_avg_ppg = (colts_home.score_home.mean() + colts_away.score_away.mean()) / 2
opp_avg_ppg = (colts_home.score_away.mean() + colts_away.score_home.mean()) / 2
colts_sum_stats = round(colts_home.score_home.mean(), 2), round(colts_away.score_away.mean(), 2), round(colts_avg_ppg, 2)
opponent_sum_stats = round(colts_home.score_away.mean(),2), round(colts_away.score_home.mean(), 2), round(opp_avg_ppg, 2)

# creating summary table
d = {
    "Colts": pd.Series(colts_sum_stats, index = ["Home", "Away", "Avg"]), 
    "Opponent": pd.Series(opponent_sum_stats, index = ["Home", "Away", "Avg"])
}
colts_sum_table = pd.DataFrame(d)
colts_sum_table

In [None]:
# create visualization of summary table
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

colts_sum_table.plot(kind = "barh", figsize = (10,8), color = ['#1338BE', 'black'])
plt.title("Colts Avg Pts/G from 2011-2020", fontsize = 16, fontweight = 'bold')

In [None]:
colts_recent = pd.concat([colts_home, colts_away], ignore_index = True)
# home many times did a recent Colts game go "over"? 79/169
colts_game_over = colts_recent["total_pts"] > colts_recent["over_under_line"]
colts_recent.shape[0], colts_game_over.sum()

## Bengals Analysis

In [None]:
# getting all bengals games, 337 home games, 337 away games
bengals_home, bengals_away = get_team_games("Bengals", scores_with_fav)

# combining into one dataset
bengals_scores = pd.concat([bengals_home, bengals_away], ignore_index = True)

In [None]:
# adding point differential to all bengals games
bengals_home["pts_diff"] = bengals_home["score_home"] - bengals_home["score_away"]
bengals_away["pts_diff"] = bengals_away["score_away"] - bengals_away["score_home"]

display(bengals_home.head(), bengals_away.head())

In [None]:
# out 337 home games, bengals are favored in 183
bengals_favored_home = bengals_home.loc[bengals_home["team_favorite_id"] == "CIN"]
display(bengals_favored_home.head(), bengals_favored_home.shape)

In [None]:
# out of 337 away games, bengals are favored in 81 of them
bengals_favored_away = bengals_away.loc[bengals_away["team_favorite_id"] == "CIN"]
display(bengals_favored_away.head(), bengals_favored_away.shape)

In [None]:
# how often do the bengals cover when they're favored at home
bengals_covered_hm = bengals_favored_home.pts_diff > abs(bengals_favored_home.spread_favorite)
bengals_pushed_hm = bengals_favored_home.pts_diff == abs(bengals_favored_home.spread_favorite)
bengals_covered_hm.sum(), bengals_pushed_hm.sum()


In [None]:
# how often do the bengals cover when they're favored on the road
bengals_covered_rd = bengals_favored_away.pts_diff > abs(bengals_favored_away.spread_favorite)
bengals_pushed_rd = bengals_favored_away.pts_diff == abs(bengals_favored_away.spread_favorite)
bengals_covered_rd.sum(), bengals_pushed_rd.sum()

In [None]:
bengals_favored = pd.concat([bengals_favored_home, bengals_favored_away], ignore_index = True)
# creating summary statistics for Colts games
bengals_hm = bengals_home.shape[0], bengals_favored_home.shape[0], bengals_covered_hm.sum(), bengals_pushed_hm.sum()
bengals_awy = bengals_away.shape[0], bengals_favored_away.shape[0], bengals_covered_rd.sum(), bengals_pushed_rd.sum()
total = bengals_scores.shape [0], bengals_favored.shape[0], (bengals_covered_hm.sum() + bengals_covered_rd.sum()), (bengals_pushed_hm.sum() + bengals_pushed_rd.sum())


# creating summary table for games that colts were favored in:
d_bengals_fav = {
    "Bengals Home" : pd.Series(bengals_hm, index = ["Total Games", "Favored", "Covered", "Pushed"]),
    "Bengals Away" : pd.Series(bengals_awy, index = ["Total Games", "Favored", "Covered", "Pushed"]),
    "Total" : pd.Series(total, index = ["Total Games", "Favored", "Covered", "Pushed"])
}

covered_sum = pd.DataFrame(d_bengals_fav)
covered_sum

## Bengals Analysis (2016-2020)

In [None]:
# getting 2011-2020 seasons
bengals_home, bengals_away = get_team_games("Bengals", scores_with_fav)
bengals_home = bengals_home.loc[bengals_home.schedule_season > 2015]
bengals_away = bengals_away.loc[bengals_away.schedule_season > 2015]
bengals_away.head()

In [None]:
# creating summary table for the bengals
bengals_avg = (bengals_home.score_home.mean() + bengals_away.score_away.mean())/2
opponent_avg = (bengals_home.score_away.mean()+ bengals_away.score_home.mean())/2
bengals_summary = round(bengals_home.score_home.mean(), 2), round(bengals_away.score_away.mean(), 2), round(bengals_avg, 2)
opponent_summary = round(bengals_home.score_away.mean(), 2), round(bengals_away.score_home.mean(), 2), round(opponent_avg, 2)

bengals_dict = {
    "Bengals": pd.Series(bengals_summary, index = ["Home", "Away", "Avg"]), 
    "Opponent": pd.Series(opponent_summary, index = ["Home", "Away", "Avg"])
}
bengals_sum_table = pd.DataFrame(bengals_dict)
bengals_sum_table

In [None]:
bengals_sum_table.plot(kind = "barh", figsize = (10,8), color = ['#FC6A03', 'black'])
plt.title("Points Scored in Bengals Games (2016-2020)", fontsize = 16, fontweight = 'bold')

# Conclusion

Betting on either the Colts or the Bengals when they are favored would be a losing proposition, becuase neither of these teams have covered in more than half of such games.  This seems to be even more true for the lowly Bengals.  The Colts have covered in 48% of all games that they have been favored in, while the Bengals have only covered 46% of games they have been favored in.

Over the past 5 years, the Bengals have had a rough stretch.  They have not had a single winning season while losing by an average of almost 5 points a game (including losing by an average of 6.58 points a game on the road).  

The Colts have been relatively decent over the past 5 years, posting 2 winning seasons and one 8-8 season.  Over the same 5 year time frame they have been winning games by an average of 0.5 points per game.