# Pro Football Reference Web Scraper

To acquire the defense data that I need, I will need to build a web scraper to scrap the data for Pro Football Reference.

In [42]:
# Import scraping modules
from urllib.request import urlopen
from bs4 import BeautifulSoup
# Import data manipulation modules
import pandas as pd
import numpy as np
# Import data visualization modules
import matplotlib as mpl
import matplotlib.pyplot as plt

#### Defense Scaping Function

The web scraping function, get_year, will take in the year of the data you want as the input.  After using urlopen and Beautiful Soup to parse the data, the table will be converted into a dataframe using Pandas.  Once the dataframe is created, 3 additional steps are used to preprocess the data.  The first step is to drop the all the rows except the individual 32 team data rows.  The second step is to rename the columns that have the same name so that each column is unique.The final step is to add a column that tells the year used for the dataframe.  Finally the cleaned dataframe is returned.

In [43]:
# Create function to create dataframe
def get_table(year):
    '''Pulls the table from Pro Football Reference and creates a dataframe
    
    Parameter
    ---------
    year : int
        The year for which seasons data you would like to import
        
    Returns
    -------
    data : df
        A dataframe based on the table that was scraped from Pro Football Focus'''
    
    # URL of page
    url = 'https://www.pro-football-reference.com'
    # Open URL and pass to BeautifulSoup
    html = urlopen(url + '/years/' + str(year) + '/opp.htm')
    stats_page = BeautifulSoup(html)
    
    # Collect table headers
    column_headers = stats_page.findAll('tr')[1]
    column_headers = [i.getText() for i in column_headers.findAll('th')]
    
    # Collect table rows
    rows = stats_page.findAll('tr')[1:]
    # Get stats from each row
    def_stats = []
    for i in range(len(rows)):
      def_stats.append([col.getText() for col in rows[i].findAll('td')])
    
    # Create DataFrame from our scraped data
    data = pd.DataFrame(def_stats, columns=column_headers[1:])
    
    # Drops first row and rows after teams, keeping only our teams
    data = data[:33]
    data = data.drop(data.index[0])
    
    # Renames Columns that have the same name
    new_columns = data.columns.values
    new_columns[-5] = 'Yds_Penalty'
    new_columns[-11] = 'Att_Rush'
    new_columns[-10] = 'Yds_Rush'
    new_columns[-9] = 'TDs_Rush'
    new_columns[-12] = '1stD_Pass'
    new_columns[-17] = 'Att_Pass'
    new_columns[-16] = 'Yds_Pass'
    new_columns[-15] = 'TDs_Pass'
    new_columns[-7] = '1stD_Rush'
    data.columns = new_columns
    
    # Create column with the year
    data['Year'] = year
    return data

#### Pull and name each dataframe

Here I will run the function on the 3 years of data that I would like to pull from Pro Football Reference.

In [44]:
# Run function on each years data
df_2017 = get_table(2017)
df_2018 = get_table(2018)
df_2019 = get_table(2019)

In [45]:
# Preview df of 2017 data
df_2017

Unnamed: 0,Tm,G,PF,Yds,Ply,Y/P,TO,FL,1stD,Cmp,...,TDs_Rush,Y/A,1stD_Rush,Pen,Yds_Penalty,1stPy,Sc%,TO%,EXP,Year
1,Minnesota Vikings,16,252,4415,956,4.6,19,5,260,324,...,10,3.7,59,105,928,33,27.9,10.1,145.07,2017
2,Jacksonville Jaguars,16,268,4578,993,4.6,33,12,257,289,...,9,4.3,91,112,1050,23,23.9,15.6,252.15,2017
3,Los Angeles Chargers,16,272,5254,997,5.3,27,9,282,329,...,11,4.9,99,109,996,30,28.5,15.1,120.25,2017
4,Philadelphia Eagles,16,295,4904,976,5.0,31,12,272,363,...,7,3.8,62,101,917,33,27.1,15.4,162.09,2017
5,New England Patriots,16,296,5856,1022,5.7,18,6,325,367,...,6,4.7,98,111,1035,23,30.2,9.9,12.09,2017
6,Baltimore Ravens,16,303,5201,1045,5.0,34,12,296,333,...,12,4.1,87,96,807,30,29.7,17.4,181.55,2017
7,Pittsburgh Steelers,16,308,4910,940,5.2,22,6,270,297,...,14,4.4,89,110,1041,24,30.3,12.4,31.72,2017
8,Atlanta Falcons,16,315,5094,994,5.1,16,8,323,362,...,9,4.1,97,103,880,29,36.0,9.1,127.47,2017
9,Chicago Bears,16,320,5106,1003,5.1,22,14,298,343,...,10,4.0,85,118,910,33,33.1,11.2,129.87,2017
10,New Orleans Saints,16,326,5384,1002,5.4,25,5,308,329,...,11,4.4,92,108,850,44,33.3,13.6,64.3,2017


In [46]:
# Preview df of 2018 data
df_2018

Unnamed: 0,Tm,G,PF,Yds,Ply,Y/P,TO,FL,1stD,Cmp,...,TDs_Rush,Y/A,1stD_Rush,Pen,Yds_Penalty,1stPy,Sc%,TO%,EXP,Year
1,Chicago Bears,16,283,4795,1004,4.8,36,9,278,377,...,5,3.8,67,114,1022,25,28.6,19.5,95.5,2018
2,Baltimore Ravens,16,287,4687,974,4.8,17,5,286,337,...,11,3.7,82,113,1017,30,29.1,9.5,39.88,2018
3,Tennessee Titans,16,303,5334,1001,5.3,17,6,298,337,...,9,4.3,98,120,984,18,33.1,9.9,-22.13,2018
4,Houston Texans,16,316,5490,1022,5.4,29,14,298,385,...,8,3.4,76,117,945,22,31.6,13.9,-10.29,2018
5,Jacksonville Jaguars,16,316,4983,970,5.1,17,6,288,312,...,16,4.3,89,92,735,37,33.1,8.3,4.66,2018
6,Dallas Cowboys,16,324,5268,981,5.4,20,11,309,367,...,12,3.8,89,89,681,27,36.5,12.0,-52.66,2018
7,New England Patriots,16,325,5746,1002,5.7,28,10,322,370,...,7,4.9,93,91,838,21,32.6,15.0,-32.66,2018
8,Los Angeles Chargers,16,329,5339,981,5.4,20,7,311,351,...,11,4.3,101,103,828,32,33.7,11.8,-38.61,2018
9,Minnesota Vikings,16,341,4955,993,5.0,20,8,303,315,...,13,4.1,107,106,834,36,33.7,10.5,23.15,2018
10,Indianapolis Colts,16,344,5435,997,5.5,26,11,315,384,...,12,3.9,87,131,1115,25,35.5,14.5,-37.23,2018


In [47]:
# Preview df of 2019 data
df_2019

Unnamed: 0,Tm,G,PF,Yds,Ply,Y/P,TO,FL,1stD,Cmp,...,TDs_Rush,Y/A,1stD_Rush,Pen,Yds_Penalty,1stPy,Sc%,TO%,EXP,Year
1,New England Patriots,16,225,4414,948,4.7,36,11,261,303,...,7,4.2,72,107,920,39,19.4,17.3,165.75,2019
2,Buffalo Bills,16,259,4772,985,4.8,23,9,295,348,...,12,4.3,93,94,815,33,23.6,12.4,39.85,2019
3,Baltimore Ravens,16,282,4809,921,5.2,25,12,276,318,...,12,4.4,74,97,795,39,32.9,14.6,16.61,2019
4,Chicago Bears,16,298,5186,1017,5.1,19,9,306,362,...,16,3.9,86,113,923,30,31.5,10.7,-4.15,2019
5,Minnesota Vikings,16,303,5465,1053,5.2,31,14,324,394,...,8,4.3,78,83,713,31,34.5,17.0,-7.88,2019
6,Pittsburgh Steelers,16,303,4866,1030,4.7,38,18,304,314,...,7,3.8,110,115,1118,30,29.9,19.0,85.78,2019
7,Kansas City Chiefs,16,308,5594,1043,5.4,23,7,344,352,...,14,4.9,115,116,844,39,34.6,13.6,-65.69,2019
8,San Francisco 49ers,16,310,4509,968,4.7,27,15,285,318,...,11,4.5,105,116,957,30,29.0,14.2,77.41,2019
9,Green Bay Packers,16,313,5642,998,5.7,25,8,310,326,...,15,4.7,99,97,968,20,34.5,14.1,-63.65,2019
10,Denver Broncos,16,316,5392,1003,5.4,17,7,305,348,...,9,4.2,96,121,1041,34,37.3,8.4,-35.98,2019


#### Join Tables

The 3 dataframes need to be joined into a single dataframe.  This will be performed using the pd.concat method.

In [48]:
# Concat the 3 dataframes
def_df = pd.concat([df_2017, df_2018, df_2019])
def_df

Unnamed: 0,Tm,G,PF,Yds,Ply,Y/P,TO,FL,1stD,Cmp,...,TDs_Rush,Y/A,1stD_Rush,Pen,Yds_Penalty,1stPy,Sc%,TO%,EXP,Year
1,Minnesota Vikings,16,252,4415,956,4.6,19,5,260,324,...,10,3.7,59,105,928,33,27.9,10.1,145.07,2017
2,Jacksonville Jaguars,16,268,4578,993,4.6,33,12,257,289,...,9,4.3,91,112,1050,23,23.9,15.6,252.15,2017
3,Los Angeles Chargers,16,272,5254,997,5.3,27,9,282,329,...,11,4.9,99,109,996,30,28.5,15.1,120.25,2017
4,Philadelphia Eagles,16,295,4904,976,5.0,31,12,272,363,...,7,3.8,62,101,917,33,27.1,15.4,162.09,2017
5,New England Patriots,16,296,5856,1022,5.7,18,6,325,367,...,6,4.7,98,111,1035,23,30.2,9.9,12.09,2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28,Arizona Cardinals,16,442,6432,1080,6.0,17,10,375,421,...,9,4.4,94,114,1041,38,42.6,9.5,-174.55,2019
29,Tampa Bay Buccaneers,16,449,5503,1073,5.1,28,16,331,408,...,11,3.3,70,111,971,39,39.6,13.5,12.23,2019
30,New York Giants,16,451,6037,1061,5.7,16,6,343,369,...,19,3.9,100,117,922,32,39.7,8.7,-105.11,2019
31,Carolina Panthers,16,470,5992,1042,5.8,21,7,355,347,...,31,5.2,128,109,940,30,41.4,9.4,-116.88,2019


In [49]:
# Check info to verify that we have 96 rows of data
def_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 96 entries, 1 to 32
Data columns (total 28 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Tm           96 non-null     object
 1   G            96 non-null     object
 2   PF           96 non-null     object
 3   Yds          96 non-null     object
 4   Ply          96 non-null     object
 5   Y/P          96 non-null     object
 6   TO           96 non-null     object
 7   FL           96 non-null     object
 8   1stD         96 non-null     object
 9   Cmp          96 non-null     object
 10  Att_Pass     96 non-null     object
 11  Yds_Pass     96 non-null     object
 12  TDs_Pass     96 non-null     object
 13  Int          96 non-null     object
 14  NY/A         96 non-null     object
 15  1stD_Pass    96 non-null     object
 16  Att_Rush     96 non-null     object
 17  Yds_Rush     96 non-null     object
 18  TDs_Rush     96 non-null     object
 19  Y/A          96 non-null     ob

#### Replace Team Names Function

The other dataframe that we will be using has all of the team names as 3-letter abbreviations.  To enable the two dataframes to be merged, we will need to abbreviate all the team names in this dataframe.  This will be accomplished through running a dictionary of the names and abbreviations through a replace function.

In [50]:
# Create dictionary with the key being the original team name and the value being the abbreviation
team_dic = {'Minnesota Vikings':'MIN', 'Jacksonville Jaguars':'JAX','Los Angeles Chargers':'LAC','Philadelphia Eagles':'PHI',
           'New England Patriots':'NE', 'Baltimore Ravens':'BAL', 'Pittsburgh Steelers':'PIT','Atlanta Falcons':'ATL',
           'Chicago Bears':'CHI','New Orleans Saints':'NO','Carolina Panthers':'CAR','Los Angeles Rams':'LA',
            'Dallas Cowboys':'DAL','Seattle Seahawks':'SEA','Cincinnati Bengals':'CIN','Tennessee Titans':'TEN',
           'Buffalo Bills':'BUF','Arizona Cardinals':'ARI','Oakland Raiders':'LV','Detroit Lions':'DET','New York Jets':'NYJ',
           'Denver Broncos':'DEN','Tampa Bay Buccaneers':'TB','San Francisco 49ers':'SF','Green Bay Packers':'GB',
           'New York Giants':'NYG','Washington Redskins':'WAS','Miami Dolphins':'MIA','Indianapolis Colts':'IND',
            'Cleveland Browns':'CLE','Houston Texans':'HOU','Kansas City Chiefs':'KC'}

In [51]:
def replace_all(dic):
    '''Replaces each key in the Team column with its abbreviation
    
    Parameter
    ---------
    dic : dict
        The dictionary that you would like to use to replace
        
    Returns
    -------
    def_df : df
        The defense datafream with each teams name changed to an abbreviation'''
    
    # Loop through each key/value replacing the key with its value
    for i, j in dic.items():
        def_df['Tm'] = def_df['Tm'].replace(i, j)
    return def_df

In [52]:
# Run fuction to replace team names
replace_all(team_dic)

Unnamed: 0,Tm,G,PF,Yds,Ply,Y/P,TO,FL,1stD,Cmp,...,TDs_Rush,Y/A,1stD_Rush,Pen,Yds_Penalty,1stPy,Sc%,TO%,EXP,Year
1,MIN,16,252,4415,956,4.6,19,5,260,324,...,10,3.7,59,105,928,33,27.9,10.1,145.07,2017
2,JAX,16,268,4578,993,4.6,33,12,257,289,...,9,4.3,91,112,1050,23,23.9,15.6,252.15,2017
3,LAC,16,272,5254,997,5.3,27,9,282,329,...,11,4.9,99,109,996,30,28.5,15.1,120.25,2017
4,PHI,16,295,4904,976,5.0,31,12,272,363,...,7,3.8,62,101,917,33,27.1,15.4,162.09,2017
5,NE,16,296,5856,1022,5.7,18,6,325,367,...,6,4.7,98,111,1035,23,30.2,9.9,12.09,2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28,ARI,16,442,6432,1080,6.0,17,10,375,421,...,9,4.4,94,114,1041,38,42.6,9.5,-174.55,2019
29,TB,16,449,5503,1073,5.1,28,16,331,408,...,11,3.3,70,111,971,39,39.6,13.5,12.23,2019
30,NYG,16,451,6037,1061,5.7,16,6,343,369,...,19,3.9,100,117,922,32,39.7,8.7,-105.11,2019
31,CAR,16,470,5992,1042,5.8,21,7,355,347,...,31,5.2,128,109,940,30,41.4,9.4,-116.88,2019


#### Feature Engineering

We first need to convert all of our feature columns into numeric so that we can create new ones using division.

In [53]:
# Convert all columns besides the team names to numeric
cols = def_df.columns.drop('Tm')
def_df[cols] = def_df[cols].apply(pd.to_numeric, errors='coerce')
def_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 96 entries, 1 to 32
Data columns (total 28 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Tm           96 non-null     object 
 1   G            96 non-null     int64  
 2   PF           96 non-null     int64  
 3   Yds          96 non-null     int64  
 4   Ply          96 non-null     int64  
 5   Y/P          96 non-null     float64
 6   TO           96 non-null     int64  
 7   FL           96 non-null     int64  
 8   1stD         96 non-null     int64  
 9   Cmp          96 non-null     int64  
 10  Att_Pass     96 non-null     int64  
 11  Yds_Pass     96 non-null     int64  
 12  TDs_Pass     96 non-null     int64  
 13  Int          96 non-null     int64  
 14  NY/A         96 non-null     float64
 15  1stD_Pass    96 non-null     int64  
 16  Att_Rush     96 non-null     int64  
 17  Yds_Rush     96 non-null     int64  
 18  TDs_Rush     96 non-null     int64  
 19  Y/A       

The stats that we currently have are almost all total stats for the entire season.  To get a better sense of how good a defense is vs any other defense and the probability of an event occuring on a single play, it would be better to average these stats on a per play basis.  I created 7 individual features to achieve this:

* **TD_PA** = pass touchdowns allowed / pass attempts faced
* **INT_PA** = interceptions / pass attempts faced
* **1D_PA** = 1st downs allowed through pass plays / pass attempts faced
* **Comp_Pct** = pass completions allowed / pass attemps faced
* **TD_RA** = rush touchdowns allowed / rush attempts faced
* **1D_PA** = 1st downs allowed through run plays / rush attempts faced
* **Fum_Play** = total fumbles recovered / total plays faced

I also created 1 additional feature that showed the perecentage of the time that they faced a pass play.  

* **Pass_Pct** = pass attempts faced / (pass attempts faced + run attempts faced)



In [54]:
# Create new features
def_df['TD_PA'] = def_df['TDs_Pass'] / def_df['Att_Pass']
def_df['INT_PA'] = def_df['Int'] / def_df['Att_Pass']
def_df['1D_PA'] = def_df['1stD_Pass'] / def_df['Att_Pass']
def_df['Comp_Pct'] = def_df['Cmp'] / def_df['Att_Pass']
def_df['TD_RA'] = def_df['TDs_Rush'] / def_df['Att_Rush']
def_df['1D_RA'] = def_df['1stD_Rush'] / def_df['Att_Rush']
def_df['Fum_Play'] = def_df['FL'] / def_df['Ply']
def_df['Pass_Pct'] = def_df['Att_Pass'] /  (def_df['Att_Pass'] + def_df['Att_Rush'])
def_df

Unnamed: 0,Tm,G,PF,Yds,Ply,Y/P,TO,FL,1stD,Cmp,...,EXP,Year,TD_PA,INT_PA,1D_PA,Comp_Pct,TD_RA,1D_RA,Fum_Play,Pass_Pct
1,MIN,16,252,4415,956,4.6,19,5,260,324,...,145.07,2017,0.023423,0.025225,0.302703,0.583784,0.027473,0.162088,0.005230,0.603917
2,JAX,16,268,4578,993,4.6,33,12,257,289,...,252.15,2017,0.033399,0.041257,0.280943,0.567780,0.020979,0.212121,0.012085,0.542644
3,LAC,16,272,5254,997,5.3,27,9,282,329,...,120.25,2017,0.032443,0.034351,0.291985,0.627863,0.025581,0.230233,0.009027,0.549266
4,PHI,16,295,4904,976,5.0,31,12,272,363,...,162.09,2017,0.039933,0.031614,0.294509,0.603993,0.020772,0.183976,0.012295,0.640725
5,NE,16,296,5856,1022,5.7,18,6,325,367,...,12.09,2017,0.040678,0.020339,0.345763,0.622034,0.015385,0.251282,0.005871,0.602041
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28,ARI,16,442,6432,1080,6.0,17,10,375,421,...,-174.55,2019,0.063228,0.011647,0.404326,0.700499,0.020501,0.214123,0.009259,0.577885
29,TB,16,449,5503,1073,5.1,28,16,331,408,...,12.23,2019,0.045181,0.018072,0.334337,0.614458,0.030387,0.193370,0.014911,0.647173
30,NYG,16,451,6037,1061,5.7,16,6,343,369,...,-105.11,2019,0.053957,0.017986,0.379496,0.663669,0.040512,0.213220,0.005655,0.542439
31,CAR,16,470,5992,1042,5.8,21,7,355,347,...,-116.88,2019,0.038603,0.025735,0.362132,0.637868,0.069663,0.287640,0.006718,0.550051


#### Select Columns for Final DF

We are now ready to select the columns for the final dataframe.  The final DF will include:

* **Tm** - Team name
* **Year** - season's year
* **PF** - points allowed by defense
* **Fum_Play** - percentage of the time a fumble is caused/recovered per play
* **Comp_Pct** - completion percentage allowed
* **TD_PA** - touchdowns allowed per pass attempt
* **INT_PA** - interceptions per pass attempt
* **NY/A** - yards allowed per pass attempt
* **1D_PA** - 1st downs allowed per pass attempt
* **TD_RA** - touchdowns allowed per rush attempt
* **Y/A** - yards allowed per rush attempt
* **1D_RA** - 1st downs allowed per rush attempt
* **Pass_Pct** - perecentage of the time the defenses face a pass (as opposed to run play)

We will drop everything else as it is included in our created features.


In [55]:
# Create new DF with select columns
def_df_final = def_df[['Tm', 'Year', 'PF', 'Fum_Play', 'Comp_Pct', 'TD_PA', 'INT_PA', 'NY/A', '1D_PA', 'TD_RA', 'Y/A', '1D_RA', 'Pass_Pct']]
def_df_final

Unnamed: 0,Tm,Year,PF,Fum_Play,Comp_Pct,TD_PA,INT_PA,NY/A,1D_PA,TD_RA,Y/A,1D_RA,Pass_Pct
1,MIN,2017,252,0.005230,0.583784,0.023423,0.025225,5.2,0.302703,0.027473,3.7,0.162088,0.603917
2,JAX,2017,268,0.012085,0.567780,0.033399,0.041257,4.8,0.280943,0.020979,4.3,0.212121,0.542644
3,LAC,2017,272,0.009027,0.627863,0.032443,0.034351,5.6,0.291985,0.025581,4.9,0.230233,0.549266
4,PHI,2017,295,0.012295,0.603993,0.039933,0.031614,5.7,0.294509,0.020772,3.8,0.183976,0.640725
5,NE,2017,296,0.005871,0.622034,0.040678,0.020339,6.4,0.345763,0.015385,4.7,0.251282,0.602041
...,...,...,...,...,...,...,...,...,...,...,...,...,...
28,ARI,2019,442,0.009259,0.700499,0.063228,0.011647,7.0,0.404326,0.020501,4.4,0.214123,0.577885
29,TB,2019,449,0.014911,0.614458,0.045181,0.018072,6.1,0.334337,0.030387,3.3,0.193370,0.647173
30,NYG,2019,451,0.005655,0.663669,0.053957,0.017986,7.1,0.379496,0.040512,3.9,0.213220,0.542439
31,CAR,2019,470,0.006718,0.637868,0.038603,0.025735,6.2,0.362132,0.069663,5.2,0.287640,0.550051


#### Add Prefix to Final DF Columns

To distinguish this DF from our offensive stats DF, we will add the prefix Def_ to every column.

In [56]:
# Add prefix 
def_df_final = def_df_final.add_prefix('Def_')
def_df_final

Unnamed: 0,Def_Tm,Def_Year,Def_PF,Def_Fum_Play,Def_Comp_Pct,Def_TD_PA,Def_INT_PA,Def_NY/A,Def_1D_PA,Def_TD_RA,Def_Y/A,Def_1D_RA,Def_Pass_Pct
1,MIN,2017,252,0.005230,0.583784,0.023423,0.025225,5.2,0.302703,0.027473,3.7,0.162088,0.603917
2,JAX,2017,268,0.012085,0.567780,0.033399,0.041257,4.8,0.280943,0.020979,4.3,0.212121,0.542644
3,LAC,2017,272,0.009027,0.627863,0.032443,0.034351,5.6,0.291985,0.025581,4.9,0.230233,0.549266
4,PHI,2017,295,0.012295,0.603993,0.039933,0.031614,5.7,0.294509,0.020772,3.8,0.183976,0.640725
5,NE,2017,296,0.005871,0.622034,0.040678,0.020339,6.4,0.345763,0.015385,4.7,0.251282,0.602041
...,...,...,...,...,...,...,...,...,...,...,...,...,...
28,ARI,2019,442,0.009259,0.700499,0.063228,0.011647,7.0,0.404326,0.020501,4.4,0.214123,0.577885
29,TB,2019,449,0.014911,0.614458,0.045181,0.018072,6.1,0.334337,0.030387,3.3,0.193370,0.647173
30,NYG,2019,451,0.005655,0.663669,0.053957,0.017986,7.1,0.379496,0.040512,3.9,0.213220,0.542439
31,CAR,2019,470,0.006718,0.637868,0.038603,0.025735,6.2,0.362132,0.069663,5.2,0.287640,0.550051


#### Save clean DF

In [57]:
# Save DF to csv file
#def_df_final.to_csv(r'C:\Users\Garth Torok\MattTorok\capstone\Data\def_df.csv', index=False)