# NFL superbowl & mvp analysis 

## Imports

In [1]:
# Import Dependecies
import numpy as np
import datetime as dt
from datetime import datetime
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine
import requests
import gmaps

In [2]:
# Import Configurations

# Import DB User id & password 
from Configs import  db_user
from Configs import db_password

# Import API Key

from Configs import g_key
gmaps.configure(g_key)

In [3]:
# Import Functions 
from Functions import int_to_roman

## Script notes

In [4]:
# Script Notes: This scripot will:
#
# Read the csv file supoerbowl.csv in Resources directory to fetch NFL superbowl history into a Dataframe 
# Sort & Transaform the DataFrame
# Read the nfl_players.json file in Resources directory to fetch the information about NFL players into a DataFrame 
# Clean-up, Index & Tranform the Dataframe
#
# For each SupoerBowl match:
#    Fetch Latitude & Longitude information using Google API
#    Add Latitude & Longitude information to the superbowl dataframe  
#
# Load the superbowl & players DataFrame to Postgres nfl_db 
# 
# load DataFrame(s) to Postgres SQL tables
# Join the tables and save as a view
# Display data from the view to confirm data loaded
#

## Load superbowl data from CSV file 

In [5]:
# Load the Superbowl CSV file 

# Path to Superbowl input CSV file
suberbowl_file = "../Resources/superbowl.csv"

# Load csv file to DataFrame
superbowl_df = pd.read_csv(suberbowl_file)

# Display DataFrame
superbowl_df.head()

Unnamed: 0,Date,SB,Winner,Winner Pts,Loser,Loser Pts,MVP,Stadium,City,State
0,Feb 2 2020,LIV (54),Kansas City Chiefs,31,San Francisco 49ers,20,Patrick Mahomes,Hard Rock Stadium,Miami Gardens,Florida
1,Feb 3 2019,LIII (53),New England Patriots,13,Los Angeles Rams,3,Julian Edelman,Mercedes-Benz Stadium,Atlanta,Georgia
2,Feb 4 2018,LII (52),Philadelphia Eagles,41,New England Patriots,33,Nick Foles,U.S. Bank Stadium,Minneapolis,Minnesota
3,Feb 5 2017,LI (51),New England Patriots,34,Atlanta Falcons,28,Tom Brady,NRG Stadium,Houston,Texas
4,Feb 7 2016,50,Denver Broncos,24,Carolina Panthers,10,Von Miller,Levi's Stadium,Santa Clara,California


### Data clean-up & transformation (superbowl)

In [6]:
# Cleanup & Transform the superbowl DataFrame

# Copy the Supoerbowl DataFrame to Superbowl Transformed Dataframe  
superbowl_t_df = superbowl_df.copy()

# Note: The SB column all rows do NOT have both the number and roman number for the superbowl id. 
# Establish a numeric id based on date and convert to roman number also as superbowl name

# Set a Python date for teh date in CSV  
superbowl_t_df['date'] = pd.to_datetime(superbowl_t_df['Date'])
# Sort the Dataframe based on Date 
superbowl_t_df.sort_values('date', ascending=True, inplace=True)

# Establish an ID for the Superbowl number sorted on Superbowl Date
# Start with an empty list and append to it for the number of SuperBowls 
id_list = []
id_roman_list = []
for i in range(0, len(superbowl_t_df["SB"])):
    id_list.append(int(i+1))
    id_roman_list.append("Superbowl " + str(int_to_roman(i+1)))
# Add id & name as columns to the Dataframe 
superbowl_t_df['id'] = id_list
superbowl_t_df['superbowl'] = id_roman_list

# Add a column for score
superbowl_t_df['score'] = superbowl_t_df['Winner Pts'].astype(str) + ' - ' + superbowl_t_df['Loser Pts'].astype(str)

# Clean up the MVP Column to remove the "+" at the end
superbowl_t_df['MVP'] = superbowl_t_df['MVP'].str.replace('+', '')

# Drop columns "SB", "Date", "Winner Pts", "Loser Pts"
superbowl_t_df.drop(superbowl_t_df.columns[[0, 1, 3, 5]], axis = 1, inplace = True)

# Rename Columns to match columns in postgres table  
superbowl_t_df.rename(columns={"Winner" : "winner", "Loser" : "loser",\
                               "MVP" : "mvp", "Stadium" : "stadium", "date" : "superbowl_date", \
                               "City" : "city", "State" : "state", }, inplace=True)

# Rearrange the dataframe to match the table definition 
cols = ['id', 'superbowl', 'superbowl_date', 'winner', 'loser', 'score', 'mvp', 'city', 'state', 'stadium']
superbowl_t_df = superbowl_t_df[cols]

# set up additional columns to hold information
superbowl_t_df["lat"] = ""
superbowl_t_df["lng"] = ""

# Set index as id
superbowl_t_df.set_index("id", inplace=True)

# Display the DataFrame
superbowl_t_df.head()

Unnamed: 0_level_0,superbowl,superbowl_date,winner,loser,score,mvp,city,state,stadium,lat,lng
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,Superbowl I,1967-01-15,Green Bay Packers,Kansas City Chiefs,35 - 10,Bart Starr,Los Angeles,California,Memorial Coliseum,,
2,Superbowl II,1968-01-14,Green Bay Packers,Oakland Raiders,33 - 14,Bart Starr,Miami,Florida,Orange Bowl,,
3,Superbowl III,1969-01-12,New York Jets,Baltimore Colts,16 - 7,Joe Namath,Miami,Florida,Orange Bowl,,
4,Superbowl IV,1970-01-11,Kansas City Chiefs,Minnesota Vikings,23 - 7,Len Dawson,New Orleans,Louisiana,Tulane Stadium,,
5,Superbowl V,1971-01-17,Baltimore Colts,Dallas Cowboys,16 - 13,Chuck Howley,Miami,Florida,Orange Bowl,,


### Get Latitude & Longitude information (Google Maps API)

In [7]:
# Base URL to vall Google API
base_url = "https://maps.googleapis.com/maps/api/geocode/json"
    
# params dict to call Google API - will be updated with new city each iteration
params = {"key": g_key}

# Loop through the superbowl_t_df and run a lat/long search for each city
for index, row in superbowl_t_df.iterrows():
    city1 = row['city']
    state1 = row['state']
    # update address key value to paramd
    params['address'] = f"{city1},{state1}"
    
    # make API request
    cities_lat_lng = requests.get(base_url, params=params)

    # convert to json
    cities_lat_lng = cities_lat_lng.json()
    
    # Add to DataFrame
    superbowl_t_df.loc[index, "lat"] = cities_lat_lng["results"][0]["geometry"]["location"]["lat"]
    superbowl_t_df.loc[index, "lng"] = cities_lat_lng["results"][0]["geometry"]["location"]["lng"]

# Display the DataFrame confirm lat lng appear
superbowl_t_df.head()

Unnamed: 0_level_0,superbowl,superbowl_date,winner,loser,score,mvp,city,state,stadium,lat,lng
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,Superbowl I,1967-01-15,Green Bay Packers,Kansas City Chiefs,35 - 10,Bart Starr,Los Angeles,California,Memorial Coliseum,34.0522,-118.244
2,Superbowl II,1968-01-14,Green Bay Packers,Oakland Raiders,33 - 14,Bart Starr,Miami,Florida,Orange Bowl,25.7617,-80.1918
3,Superbowl III,1969-01-12,New York Jets,Baltimore Colts,16 - 7,Joe Namath,Miami,Florida,Orange Bowl,25.7617,-80.1918
4,Superbowl IV,1970-01-11,Kansas City Chiefs,Minnesota Vikings,23 - 7,Len Dawson,New Orleans,Louisiana,Tulane Stadium,29.9511,-90.0715
5,Superbowl V,1971-01-17,Baltimore Colts,Dallas Cowboys,16 - 13,Chuck Howley,Miami,Florida,Orange Bowl,25.7617,-80.1918


## Load player data from JSON file 

In [8]:
# Load player info from JSON file

# Path to player info JSON file
player_file = "../Resources/nfl_players.json"

# Load csv file to DataFrame
players_df = pd.read_json(player_file)

# Display DataFrame
players_df.head()

Unnamed: 0,player_id,name,position,height,weight,current_team,birth_date,birth_place,death_date,college,high_school,draft_team,draft_round,draft_position,draft_year,current_salary,hof_induction_year
0,1809,Robert Blackmon,DB,6-0,208.0,,1967-05-12,"Bay City, TX",,Baylor,"Van Vleck, TX",Seattle Seahawks,2.0,34.0,1990.0,,
1,23586,Dean Wells,LB,6-3,248.0,,1970-07-20,"Louisville, KY",,Kentucky,"Holy Cross, KY",Seattle Seahawks,4.0,85.0,1993.0,,
2,355,Kiko Alonso,ILB,6-3,238.0,Miami Dolphins,1990-08-14,"Newton, MA",,Oregon,"Los Gatos, CA",Buffalo Bills,2.0,46.0,2013.0,1075000.0,
3,18182,Steve Ramsey,QB,6-2,210.0,,1948-04-22,"Dallas, TX",1999-10-15,North Texas,"W.W. Samuell, TX",New Orleans Saints,5.0,126.0,1970.0,,
4,16250,Cory Nelms,CB,6-0,195.0,,1988-02-27,"Neptune, NJ",,Miami (FL),"Neptune, NJ",,,,,,


### Data clean-up & transformation (players)

In [9]:
# Cleanup & Transform the players DataFrame

# Copy the Players DataFrame to Players Transformed Dataframe  
players_t_df = players_df.copy()

# Retain columns needed
players_t_df = players_df[["player_id", "name", "position", "height", "weight", "current_team", "birth_date"]].copy()
players_t_df["height_inch"] = 0

# Rearrange the columns to match the table
cols = ["player_id", "name", "position", "height", "height_inch", "weight", "current_team", "birth_date"]
players_t_df = players_t_df[cols]

players_t_df.set_index("player_id", inplace=True)

# Display the DataFrame
players_t_df.head()

Unnamed: 0_level_0,name,position,height,height_inch,weight,current_team,birth_date
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1809,Robert Blackmon,DB,6-0,0,208.0,,1967-05-12
23586,Dean Wells,LB,6-3,0,248.0,,1970-07-20
355,Kiko Alonso,ILB,6-3,0,238.0,Miami Dolphins,1990-08-14
18182,Steve Ramsey,QB,6-2,0,210.0,,1948-04-22
16250,Cory Nelms,CB,6-0,0,195.0,,1988-02-27


## Load data from Dataframe(s) to Postgres DB

### Establish connection to database 

In [10]:
rds_connection_string = db_user+":"+db_password+"@localhost:5432/nfl_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

### Check for tables

In [19]:
engine.table_names()

['superbowl', 'players']

### Use pandas to load CSV & API converted Superbowl DataFrame into database

In [None]:
# Load superbowl data
superbowl_t_df.to_sql(name='superbowl', con=engine, if_exists='append', index=True)

### Use pandas to load JSON converted Players DataFrame into database

In [None]:
# Load players data
players_t_df.to_sql(name='players', con=engine, if_exists='append', index=True)

### Confirm data has been added by querying the view

In [24]:
# Read SQL view (Joins Superbowl & Players tables) to a DataFrame
superbowl_players_df = pd.read_sql_query('select * from superbowl_player_view', con=engine)

In [25]:
# Diplay the Dataframe
superbowl_players_df.head(100)

Unnamed: 0,id,superbowl,superbowl_date,winner,loser,score,mvp,city,state,stadium,lat,lng,player_id,name,position,height,height_inch,weight,current_team,birth_date
0,1,Superbowl I,1967-01-15,Green Bay Packers,Kansas City Chiefs,35 - 10,Bart Starr,Los Angeles,California,Memorial Coliseum,34.052234,-118.243685,21143.0,Bart Starr,QB,6-1,0.0,197.0,,1934-01-09
1,2,Superbowl II,1968-01-14,Green Bay Packers,Oakland Raiders,33 - 14,Bart Starr,Miami,Florida,Orange Bowl,25.76168,-80.19179,21143.0,Bart Starr,QB,6-1,0.0,197.0,,1934-01-09
2,3,Superbowl III,1969-01-12,New York Jets,Baltimore Colts,16 - 7,Joe Namath,Miami,Florida,Orange Bowl,25.76168,-80.19179,16174.0,Joe Namath,QB,6-2,0.0,200.0,,1943-05-31
3,4,Superbowl IV,1970-01-11,Kansas City Chiefs,Minnesota Vikings,23 - 7,Len Dawson,New Orleans,Louisiana,Tulane Stadium,29.951066,-90.071532,5361.0,Len Dawson,QB,6-0,0.0,190.0,,1935-06-20
4,5,Superbowl V,1971-01-17,Baltimore Colts,Dallas Cowboys,16 - 13,Chuck Howley,Miami,Florida,Orange Bowl,25.76168,-80.19179,10333.0,Chuck Howley,LB,6-3,0.0,228.0,,1936-06-28
5,6,Superbowl VI,1972-01-16,Dallas Cowboys,Miami Dolphins,24 - 3,Roger Staubach,New Orleans,Louisiana,Tulane Stadium,29.951066,-90.071532,21155.0,Roger Staubach,QB,6-3,0.0,197.0,,1942-02-05
6,7,Superbowl VII,1973-01-14,Miami Dolphins,Washington Redskins,14 - 7,Jake Scott,Los Angeles,California,Memorial Coliseum,34.052234,-118.243685,,,,,,,,
7,8,Superbowl VIII,1974-01-13,Miami Dolphins,Minnesota Vikings,24 - 7,Larry Csonka,Houston,Texas,Rice Stadium,29.760427,-95.369803,,,,,,,,
8,9,Superbowl IX,1975-01-12,Pittsburgh Steelers,Minnesota Vikings,16 - 6,Franco Harris,New Orleans,Louisiana,Tulane Stadium,29.951066,-90.071532,9167.0,Franco Harris,RB,6-2,0.0,230.0,,1950-03-07
9,10,Superbowl X,1976-01-18,Pittsburgh Steelers,Dallas Cowboys,21 - 17,Lynn Swann,Miami,Florida,Orange Bowl,25.76168,-80.19179,21633.0,Lynn Swann,WR,5-11,0.0,180.0,,1952-03-07


In [None]:
### Show where most NFL Super Bowl happens from 1967 to 2020

In [16]:
# Store latitude and longitude in locations
locations = superbowl_t_df[["lat", "lng"]]

# Customize the size of the figure
figure_layout = {
    'width': '1000px',
    'height': '400px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'
}
fig = gmaps.figure(layout=figure_layout)

In [17]:
#Show where most NFL Super Bowl happens from 1967 to 2020
## Assign the marker layer to a variable
markers = gmaps.marker_layer(locations)
# Add the layer to the map
fig.add_layer(markers)
fig

Figure(layout=FigureLayout(border='1px solid black', height='300px', margin='0 auto 0 auto', padding='1px', wi…

#### GMAP heatmap is saved in Output directory and displayed below

<img src="../Output/superbowl_gmap.png">