# Week 14 -- Data Cleaning

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import re
import warnings
warnings.simplefilter('ignore')

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.1f' % x)

## Functions

In [3]:
def check_names(df1, df2):
    """
    This funciton compares the list of names from one dataframe to another and updates where the names do not match 
    so they will merge correctly.
    
    Parameters:
        df1: first dataframe to compare to; this should be the dataframe on the left that you will join with when 
        they merge.
        df2: second dataframe, should be the dataframe on the right for the later merge.
        
    Returns:
        Updated name list for df2 that should match df1.
    """
    
    df_players = list(df1['Player'])    
    for i in range(len(df2['Player'])):
        name = df2['Player'][i]
        split_name = name.split(' ')
        if (split_name[0] + ' ' + split_name[1]) in df_players:
            df2['Player'][i] = split_name[0] + ' ' + split_name[1]
        else:
            df2['Player'][i] = name

In [4]:
def name_update(dataframe, column):
    """
    This function updates a column in a dataframe by stripping any excess spaces surrounding the observation.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column: the column you want to update.
    
    Returns:
        Updated dataframe column.    
    """
    dataframe[column] = dataframe[column].map(lambda x: x.strip())

In [5]:
def update_int_dtype(dataframe, column_list):
    """
    This function updates the data type for a list of columns to integer.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe column data types to integer.   
    """
    for column in column_list:
        dataframe[column] = dataframe[column].map(lambda x: int(x))

In [6]:
def update_float_dtype(dataframe, column_list):
    """
    This function takes a dataframe object and converts it to a float.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
    
    Returns:
        Updated dataframe column data types to a float.  
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == '':
                dataframe[column][i] = '0.0'
            else:
                dataframe[column][i] = item 
            updated_item = str(dataframe[column][i])
            ones, tenths = updated_item.split('.')
            ones = int(ones)
            tenths = int(tenths) * .1
            dataframe[column][i] = ones + tenths
        dataframe[column] = dataframe[column].map(lambda x: float(x))

In [7]:
def fill_blanks(dataframe, column_list):
    """
    This function fills blank values with 0. This should be used for a column that will be an integer.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.    
    
    Returns:
        Updated dataframe columns with filled values.      
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == '':
                dataframe[column][i] = 0
            else:
                dataframe[column][i] = item

In [8]:
def remove_comma(dataframe, column_list):
    """
    This function removes the comma from a value in a column.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe columns less any commas that might appear.    
    """  
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if len(item.split(',')) > 1:
                one, two = item.split(',')
                dataframe[column][i] = one + two

In [9]:
def adjust_float(dataframe, column_list):
    """
    This function adjusts values in a column to be workable with the 'update_float_dtype' function.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe columns with objects with a decimal point value behind it.    
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if len(item) < 3:
                dataframe[column][i] = item + '.0'
            else:
                dataframe[column][i] = item

In [10]:
def update_percent(dataframe, column):
    """
    This function updates a column with a percent sign so it can be manipulated into a float with the 
    'update_float_dtype' function.
    
    Parameters:
       dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe column with no percent sign and added decimal point where needed.     
    """
    dataframe[column] = dataframe[column].map(lambda x: x.strip('%'))
    for i in range(len(dataframe[column])):
        item = dataframe[column][i]
        if len(item) < 3:
            dataframe[column][i] = item + '.0'
        else:
            dataframe[column][i] = item

In [11]:
def replace_dash(dataframe, column_list, dash):
    """
    This function replaces a - in a dataframe so datatypes can then be updated.
    
    Parameters:
       dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        dash: set to a value in a specific column in the dataframe.
        
    Returns:
        Updated dataframe column with no percent sign and added decimal point where needed. 
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == dash:
                dataframe[column][i] = '0'
            else:
                dataframe[column][i] = item

## Read in Collected Data

In [12]:
df = pd.read_pickle('player_stats')
fantasy_pts = pd.read_pickle('fantasy_weeks')
defense = pd.read_pickle('defense_data')
kicking = pd.read_pickle('kicking_data')

## Merge Last Week Fantasy Pts with this Week

In [13]:
no_games = 14

In [14]:
lastwk = pd.read_pickle('fantweeks_1_' + str(no_games - 1))
lastwk.head()

Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13
0,Kyler Murray,ARI,QB,317.9,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9,20.3,8.9,17.4
1,Patrick Mahomes II,KC,QB,312.6,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,,22.5,31.3,19.3
2,Russell Wilson,SEA,QB,298.5,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9,20.1,14.4,16.0
3,Josh Allen,BUF,QB,295.6,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4,,16.5,30.1
4,Aaron Rodgers,GB,QB,285.8,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9,26.4,22.7,25.6,23.5


In [15]:
fantasy_pts = pd.merge(fantasy_pts, lastwk, 'left', on='Player')
fantasy_pts.head()

Unnamed: 0,Player,Team_x,Position_x,TTL_x,Week_14,Team_y,Position_y,TTL_y,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13
0,Kyler Murray,ARI,QB,336.4,18.5,ARI,QB,317.9,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9,20.3,8.9,17.4
1,Patrick Mahomes II,KC,QB,334.2,21.6,KC,QB,312.6,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,,22.5,31.3,19.3
2,Russell Wilson,SEA,QB,321.6,23.1,SEA,QB,298.5,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9,20.1,14.4,16.0
3,Aaron Rodgers,GB,QB,316.7,30.9,GB,QB,285.8,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9,26.4,22.7,25.6,23.5
4,Josh Allen,BUF,QB,314.9,19.3,BUF,QB,295.6,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4,,16.5,30.1


In [16]:
fantasy_pts.drop(columns=['Team_y', 'Position_y', 'TTL_y'], inplace=True)
fantasy_pts.rename(columns = {'Team_x': 'Team', 'Position_x': 'Position', 'TTL_x': 'TTL'}, inplace = True)
fantasy_pts.columns

Index(['Player', 'Team', 'Position', 'TTL', 'Week_14', 'Week_1', 'Week_2',
       'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7', 'Week_8', 'Week_9',
       'Week_10', 'Week_11', 'Week_12', 'Week_13'],
      dtype='object')

In [17]:
fantasy_pts = fantasy_pts[['Player', 'Team', 'Position', 'TTL', 'Week_1', 'Week_2', 'Week_3', 'Week_4', 'Week_5', 
                           'Week_6', 'Week_7', 'Week_8', 'Week_9', 'Week_10', 'Week_11', 'Week_12', 'Week_13', 
                           'Week_14']]
fantasy_pts.head()

Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14
0,Kyler Murray,ARI,QB,336.4,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9,20.3,8.9,17.4,18.5
1,Patrick Mahomes II,KC,QB,334.2,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,,22.5,31.3,19.3,21.6
2,Russell Wilson,SEA,QB,321.6,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9,20.1,14.4,16.0,23.1
3,Aaron Rodgers,GB,QB,316.7,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9,26.4,22.7,25.6,23.5,30.9
4,Josh Allen,BUF,QB,314.9,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4,,16.5,30.1,19.3


In [18]:
print(fantasy_pts.shape)

(690, 18)


### Pickle Combined DataFrame for Next Week

In [19]:
fantasy_pts.to_pickle('fantweeks_1_' + str(no_games))

## Merge Offensive Players & Weekly Fantasy Pts

In [20]:
print(df.shape)
df.head()

(601, 28)


Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank
0,Dalvin Cook,MIN,RB,25,12,12,0,0,0,0,0,273,1352,4.95,14,47,37,322,8.7,1,4,3,15,3.0,,275.9,1,1
1,Derrick Henry,TEN,RB,26,13,13,0,0,0,0,0,297,1532,5.16,14,29,17,109,6.41,0,1,1,14,,,254.6,2,2
2,Tyreek Hill,KAN,WR,26,13,13,0,0,0,0,0,12,122,10.17,2,119,77,1158,15.04,14,1,0,16,,,262.5,1,3
3,Alvin Kamara,NOR,RB,25,13,8,0,0,0,0,0,154,723,4.69,10,96,77,699,9.08,4,1,0,14,,,264.7,3,4
4,Travis Kelce,KAN,TE,31,13,13,1,2,4,0,0,0,0,,0,120,90,1250,13.89,9,1,1,9,,,222.2,1,5


In [21]:
print(fantasy_pts.shape)
fantasy_pts.head()

(690, 18)


Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14
0,Kyler Murray,ARI,QB,336.4,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9,20.3,8.9,17.4,18.5
1,Patrick Mahomes II,KC,QB,334.2,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,,22.5,31.3,19.3,21.6
2,Russell Wilson,SEA,QB,321.6,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9,20.1,14.4,16.0,23.1
3,Aaron Rodgers,GB,QB,316.7,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9,26.4,22.7,25.6,23.5,30.9
4,Josh Allen,BUF,QB,314.9,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4,,16.5,30.1,19.3


In [22]:
check_names(df, fantasy_pts)

In [23]:
name_update(df, 'Player')
name_update(fantasy_pts, 'Player')

In [24]:
offense = pd.merge(df, fantasy_pts, 'left', on='Player')

In [25]:
offense.drop(columns = ['Team_y', 'Position_y'], inplace = True)
offense.rename(columns = {'Team_x': 'Team', 'Position_x': 'Position'}, inplace = True)

In [26]:
print(offense.shape)
offense.head(10)

(601, 43)


Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14
0,Dalvin Cook,MIN,RB,25,12,12,0,0,0,0,0,273,1352,4.95,14,47,37,322,8.7,1,4,3,15,3.0,,275.9,1,1,257.4,20.8,15.1,23.9,26.6,14.9,,,46.6,37.2,11.2,20.0,6.2,17.9,17.0
1,Derrick Henry,TEN,RB,26,13,13,0,0,0,0,0,297,1532,5.16,14,29,17,109,6.41,0,1,1,14,,,254.6,2,2,246.1,13.1,8.4,25.0,,18.3,38.4,13.2,17.2,6.8,10.9,19.2,36.5,4.9,34.2
2,Tyreek Hill,KAN,WR,26,13,13,0,0,0,0,0,12,122,10.17,2,119,77,1158,15.04,14,1,0,16,,,262.5,1,3,224.0,10.6,16.8,16.2,12.4,15.3,2.5,11.5,21.8,24.1,,16.0,44.9,8.8,23.1
3,Alvin Kamara,NOR,RB,25,13,8,0,0,0,0,0,154,723,4.69,10,96,77,699,9.08,4,1,0,14,,,264.7,3,4,226.2,18.7,29.4,31.7,17.9,11.9,,14.8,16.3,10.9,27.8,10.5,5.2,15.7,15.4
4,Travis Kelce,KAN,TE,31,13,13,1,2,4,0,0,0,0,,0,120,90,1250,13.89,9,1,1,9,,,222.2,1,5,177.2,11.0,15.0,8.7,7.0,16.8,16.5,3.1,16.9,15.9,,18.9,8.2,19.6,19.6
5,Patrick Mahomes,KAN,QB,25,13,13,340,497,4208,33,5,52,250,4.81,2,1,0,0,,0,3,1,2,,2.0,334.3,1,6,334.2,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,,22.5,31.3,19.3,21.6
6,Kyler Murray,ARI,QB,23,13,13,309,461,3231,23,10,115,712,6.19,10,0,0,0,,0,7,3,10,,,336.4,2,7,336.4,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9,20.3,8.9,17.4,18.5
7,Davante Adams,GNB,WR,28,11,11,0,0,0,0,0,0,0,,0,121,91,1144,12.57,14,1,1,14,,,241.9,2,8,196.4,27.6,3.6,,,,6.1,31.6,23.3,23.3,10.6,16.6,12.1,24.1,17.5
8,Aaron Rodgers,GNB,QB,37,13,13,312,448,3685,39,4,29,100,3.45,2,1,1,-6,-6.0,0,3,2,2,,,317.3,3,9,316.7,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9,26.4,22.7,25.6,23.5,30.9
9,James Robinson,JAX,RB,22,13,13,0,0,0,0,0,224,1035,4.62,7,57,46,326,7.09,2,3,1,9,1.0,,213.1,4,10,190.1,9.0,18.0,24.9,10.7,5.0,11.3,27.7,,15.9,11.2,9.4,21.9,16.8,8.3


In [27]:
offense.isnull().sum()

Player                     0
Team                       0
Position                   0
Age                        0
Games                      0
GamesStarted               0
CompletedPasses            0
PassesAttempted            0
PassingYds                 0
PassingTDs                 0
Interceptions              0
RushingAttempts            0
RushingYds                 0
RushingYdspAtt             0
RushingTDs                 0
Targeted                   0
Receptions                 0
ReceivingYds               0
YdspReception              0
ReceivingTDs               0
Fumbles                    0
LostFumbles                0
TtlTDs                     0
TwoPTConversions           0
TwoPTConversionPasses      0
FDFantasyPts               0
PositionRank               0
OverallRank                0
TTL                       24
Week_1                   191
Week_2                   194
Week_3                   195
Week_4                   221
Week_5                   250
Week_6        

In [28]:
offense.fillna('0.0', inplace = True)

In [29]:
off_integers = ['Age', 'Games', 'GamesStarted', 'CompletedPasses', 'PassesAttempted', 'PassingYds', 'PassingTDs', 
            'Interceptions', 'RushingAttempts', 'RushingYds', 'RushingTDs', 'Targeted', 'Receptions', 
            'ReceivingYds', 'ReceivingTDs', 'Fumbles', 'LostFumbles', 'TtlTDs']

off_floats = ['RushingYdspAtt', 'YdspReception', 'FDFantasyPts', 'TTL','Week_1', 'Week_2', 'Week_3', 'Week_4', 
              'Week_5', 'Week_6', 'Week_7', 'Week_8', 'Week_9', 'Week_10', 'Week_11', 'Week_12', 'Week_13', 
              'Week_14']

In [45]:
fill_blanks(offense, off_integers)

In [46]:
update_int_dtype(offense, off_integers)

In [47]:
two_pts = ['TwoPTConversions', 'TwoPTConversionPasses']
fill_blanks(offense, two_pts)
update_int_dtype(offense, two_pts)

In [48]:
update_float_dtype(offense, off_floats)

In [49]:
offense.head()

Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14
0,Dalvin Cook,MIN,RB,25,12,12,0,0,0,0,0,273,1352,13.5,14,47,37,322,15.0,1,4,3,15,3,0,275.9,1,1,257.4,20.8,15.1,23.9,26.6,14.9,0.0,0.0,46.6,37.2,11.2,20.0,6.2,17.9,17.0
1,Derrick Henry,TEN,RB,26,13,13,0,0,0,0,0,297,1532,6.6,14,29,17,109,10.1,0,1,1,14,0,0,254.6,2,2,246.1,13.1,8.4,25.0,0.0,18.3,38.4,13.2,17.2,6.8,10.9,19.2,36.5,4.9,34.2
2,Tyreek Hill,KAN,WR,26,13,13,0,0,0,0,0,12,122,11.7,2,119,77,1158,15.4,14,1,0,16,0,0,262.5,1,3,224.0,10.6,16.8,16.2,12.4,15.3,2.5,11.5,21.8,24.1,0.0,16.0,44.9,8.8,23.1
3,Alvin Kamara,NOR,RB,25,13,8,0,0,0,0,0,154,723,10.9,10,96,77,699,9.8,4,1,0,14,0,0,264.7,3,4,226.2,18.7,29.4,31.7,17.9,11.9,0.0,14.8,16.3,10.9,27.8,10.5,5.2,15.7,15.4
4,Travis Kelce,KAN,TE,31,13,13,1,2,4,0,0,0,0,0.0,0,120,90,1250,21.9,9,1,1,9,0,0,222.2,1,5,177.2,11.0,15.0,8.7,7.0,16.8,16.5,3.1,16.9,15.9,0.0,18.9,8.2,19.6,19.6


In [50]:
team_dict = {'LAR': 'Los Angeles Rams', 
             'SEA': 'Seattle Seahawks', 
             'BAL': 'Baltimore Ravens', 
             'KAN': 'Kansas City Chiefs', 
             'JAX': 'Jacksonville Jaguars', 
             'DET': 'Detroit Lions', 
             'PIT': 'Pittsburgh Steelers', 
             'MIN': 'Minnesota Vikings', 
             'CHI': 'Chicago Bears', 
             'ATL': 'Atlanta Falcons', 
             'BUF': 'Buffalo Bills', 
             'GNB': 'Green Bay Packers', 
             'MIA': 'Miami Dolphins', 
             'CLE': 'Cleveland Browns', 
             'CAR': 'Carolina Panthers', 
             'DAL': 'Dallas Cowboys', 
             'ARI': 'Arizona Cardinals', 
             'HOU': 'Houston Texans', 
             'NYG': 'New York Giants', 
             'WAS': 'Washington Football Team', 
             'IND': 'Indianapolis Colts', 
             'LVR': 'Las Vegas Raiders', 
             'DEN': 'Denver Broncos', 
             'TEN': 'Tennessee Titans', 
             'NWE': 'New England Patriots', 
             'CIN': 'Cincinnati Bengals', 
             'NYJ': 'New York Jets', 
             'LAC': 'Los Angeles Chargers', 
             'PHI': 'Philadelphia Eagles', 
             'TAM': 'Tampa Bay Buccaneers', 
             'NOR': 'New Orleans Saints', 
             'SFO': 'San Francisco 49ers'}

team_names = pd.DataFrame()
team_names['Team'] = list(team_dict.keys())
team_names['Long_Name'] = list(team_dict.values())
team_names.head()

Unnamed: 0,Team,Long_Name
0,LAR,Los Angeles Rams
1,SEA,Seattle Seahawks
2,BAL,Baltimore Ravens
3,KAN,Kansas City Chiefs
4,JAX,Jacksonville Jaguars


## Defense Stats

In [51]:
defense.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks
0,Pittsburgh Steelers,13,237,811,4.9,3940,314,1312,4.2,7,452,5.3,253,11.6,2628,19,35,19,54.3%,25,17,8,45
1,Miami Dolphins,13,245,821,5.7,4710,341,1563,4.6,14,446,6.6,281,12.2,3147,16,38,22,57.9%,25,16,9,34
2,Los Angeles Rams,13,246,815,4.6,3716,315,1224,3.9,10,458,5.0,286,9.7,2492,14,37,22,59.5%,21,13,8,42
3,New Orleans Saints,13,265,780,5.0,3879,312,1159,3.7,7,432,5.8,262,11.2,2720,22,36,24,66.7%,20,13,7,36
4,Baltimore Ravens,13,273,862,5.3,4545,329,1477,4.5,12,505,5.8,328,10.0,3068,19,38,25,65.8%,19,8,11,28


In [52]:
def_fantasy = fantasy_pts[fantasy_pts['Position'] == 'DST']

In [53]:
defense_df = pd.merge(defense, def_fantasy, 'left', left_on = 'Team', right_on = 'Player')

In [54]:
defense_df.head()

Unnamed: 0,Team_x,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,Player,Team_y,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14
0,Pittsburgh Steelers,13,237,811,4.9,3940,314,1312,4.2,7,452,5.3,253,11.6,2628,19,35,19,54.3%,25,17,8,45,Pittsburgh Steelers,PIT,DST,133.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0,7.0,12.0,17.0,14.0,3.0,6.0
1,Miami Dolphins,13,245,821,5.7,4710,341,1563,4.6,14,446,6.6,281,12.2,3147,16,38,22,57.9%,25,16,9,34,Miami Dolphins,MIA,DST,126.0,2.0,2.0,12.0,3.0,12.0,15.0,,17.0,8.0,4.0,5.0,14.0,14.0,10.0
2,Los Angeles Rams,13,246,815,4.6,3716,315,1224,3.9,10,458,5.0,286,9.7,2492,14,37,22,59.5%,21,13,8,42,Los Angeles Rams,LAR,DST,123.0,4.0,7.0,4.0,11.0,12.0,0.0,15.0,5.0,,13.0,5.0,15.0,11.0,21.0
3,New Orleans Saints,13,265,780,5.0,3879,312,1159,3.7,7,432,5.8,262,11.2,2720,22,36,24,66.7%,20,13,7,36,New Orleans Saints,NO,DST,99.0,15.0,4.0,-3.0,4.0,3.0,,1.0,7.0,16.0,14.0,16.0,14.0,6.0,2.0
4,Baltimore Ravens,13,273,862,5.3,4545,329,1477,4.5,12,505,5.8,328,10.0,3068,19,38,25,65.8%,19,8,11,28,Baltimore Ravens,BAL,DST,110.0,15.0,15.0,1.0,6.0,26.0,7.0,,4.0,14.0,1.0,3.0,8.0,4.0,0.0


In [55]:
defense_df.drop(columns=['Player', 'Team_y', 'Position'], inplace=True)
defense_df.rename(columns={'Team_x': 'Team'}, inplace=True)

In [56]:
defense_df.shape

(32, 38)

In [57]:
defense_df.isnull().sum()

Team                         0
GP                           0
Ttl_Pts_Allowed              0
Ttl_Offense_Plays_Allowed    0
Yds_p_Play                   0
Ttl_Yds                      0
Rushing_Att                  0
Rushing_Yds                  0
Rushing_Yds_p_Att            0
Rushing_TDs                  0
Passing_Att                  0
Passing_Yds_p_Att            0
Completions                  0
Yds_p_Completion             0
Passing_Yds                  0
Passing_TDs                  0
RZ_Att                       0
RZ_TD                        0
RZ_Percent                   0
Ttl_Turnovers                0
Interceptions                0
Fumbles                      0
Sacks                        0
TTL                          0
Week_1                       0
Week_2                       0
Week_3                       0
Week_4                       0
Week_5                       4
Week_6                       4
Week_7                       4
Week_8                       4
Week_9  

In [58]:
defense_df

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14
0,Pittsburgh Steelers,13,237,811,4.9,3940,314,1312,4.2,7,452,5.3,253,11.6,2628,19,35,19,54.3%,25,17,8,45,133.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0,7.0,12.0,17.0,14.0,3.0,6.0
1,Miami Dolphins,13,245,821,5.7,4710,341,1563,4.6,14,446,6.6,281,12.2,3147,16,38,22,57.9%,25,16,9,34,126.0,2.0,2.0,12.0,3.0,12.0,15.0,,17.0,8.0,4.0,5.0,14.0,14.0,10.0
2,Los Angeles Rams,13,246,815,4.6,3716,315,1224,3.9,10,458,5.0,286,9.7,2492,14,37,22,59.5%,21,13,8,42,123.0,4.0,7.0,4.0,11.0,12.0,0.0,15.0,5.0,,13.0,5.0,15.0,11.0,21.0
3,New Orleans Saints,13,265,780,5.0,3879,312,1159,3.7,7,432,5.8,262,11.2,2720,22,36,24,66.7%,20,13,7,36,99.0,15.0,4.0,-3.0,4.0,3.0,,1.0,7.0,16.0,14.0,16.0,14.0,6.0,2.0
4,Baltimore Ravens,13,273,862,5.3,4545,329,1477,4.5,12,505,5.8,328,10.0,3068,19,38,25,65.8%,19,8,11,28,110.0,15.0,15.0,1.0,6.0,26.0,7.0,,4.0,14.0,1.0,3.0,8.0,4.0,0.0
5,Washington Football Team,13,275,819,5.0,4075,338,1393,4.1,10,441,5.6,280,10.6,2682,19,36,18,50%,17,12,5,40,105.0,15.0,4.0,1.0,4.0,2.0,4.0,17.0,,5.0,0.0,12.0,15.0,3.0,23.0
6,New England Patriots,13,279,779,5.7,4453,366,1612,4.4,12,394,6.9,260,11.4,2841,17,38,24,63.2%,19,15,4,19,98.0,11.0,6.0,15.0,4.0,,7.0,4.0,3.0,3.0,5.0,0.0,5.0,31.0,4.0
7,Kansas City Chiefs,13,281,839,5.6,4665,358,1669,4.7,11,457,6.2,285,11.0,2996,21,36,27,75%,20,15,5,24,98.0,7.0,5.0,7.0,20.0,-1.0,3.0,18.0,7.0,1.0,,1.0,5.0,5.0,14.0
8,Chicago Bears,13,291,856,5.3,4517,375,1501,4.0,9,451,6.3,283,11.4,3016,20,44,23,52.3%,14,7,7,30,80.0,3.0,12.0,4.0,2.0,6.0,11.0,9.0,1.0,4.0,12.0,,-4.0,3.0,17.0
9,New York Giants,13,291,837,5.3,4458,335,1319,3.9,11,469,6.3,315,10.8,3139,18,46,23,50%,20,10,10,33,90.0,4.0,9.0,-2.0,5.0,11.0,14.0,5.0,4.0,13.0,4.0,,9.0,13.0,1.0


In [59]:
defense_df.fillna('0.0', inplace=True)

In [60]:
def_int = ['GP', 'Ttl_Pts_Allowed', 'Ttl_Offense_Plays_Allowed', 'Ttl_Yds', 'Rushing_Att', 'Rushing_Yds', 
           'Rushing_TDs', 'Passing_Att', 'Completions', 'Passing_Yds', 'Passing_TDs', 'RZ_Att', 'RZ_TD', 
           'Ttl_Turnovers', 'Interceptions', 'Fumbles', 'Sacks']

def_floats = ['Yds_p_Play', 'TTL', 'Week_1', 'Rushing_Yds_p_Att', 'Passing_Yds_p_Att', 'Yds_p_Completion',
              'Week_2', 'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7', 'Week_8', 'Week_9', 'Week_10', 'Week_11', 
              'Week_12', 'Week_13', 'Week_14']



In [61]:
remove_comma(defense_df, def_int)

In [62]:
update_int_dtype(defense_df, def_int)

In [63]:
adjust_float(defense_df, def_floats)

In [64]:
update_float_dtype(defense_df, def_floats)

In [65]:
update_percent(defense_df, 'RZ_Percent')

In [66]:
rz = ['RZ_Percent']
update_float_dtype(defense_df, rz)

In [67]:
defense_df.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14
0,Pittsburgh Steelers,13,237,811,4.9,3940,314,1312,4.2,7,452,5.3,253,11.6,2628,19,35,19,54.3,25,17,8,45,133.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0,7.0,12.0,17.0,14.0,3.0,6.0
1,Miami Dolphins,13,245,821,5.7,4710,341,1563,4.6,14,446,6.6,281,12.2,3147,16,38,22,57.9,25,16,9,34,126.0,2.0,2.0,12.0,3.0,12.0,15.0,0.0,17.0,8.0,4.0,5.0,14.0,14.0,10.0
2,Los Angeles Rams,13,246,815,4.6,3716,315,1224,3.9,10,458,5.0,286,9.7,2492,14,37,22,59.5,21,13,8,42,123.0,4.0,7.0,4.0,11.0,12.0,0.0,15.0,5.0,0.0,13.0,5.0,15.0,11.0,21.0
3,New Orleans Saints,13,265,780,5.0,3879,312,1159,3.7,7,432,5.8,262,11.2,2720,22,36,24,66.7,20,13,7,36,99.0,15.0,4.0,-3.0,4.0,3.0,0.0,1.0,7.0,16.0,14.0,16.0,14.0,6.0,2.0
4,Baltimore Ravens,13,273,862,5.3,4545,329,1477,4.5,12,505,5.8,328,10.0,3068,19,38,25,65.8,19,8,11,28,110.0,15.0,15.0,1.0,6.0,26.0,7.0,0.0,4.0,14.0,1.0,3.0,8.0,4.0,0.0


## Kicking Stats

In [68]:
kicking.head()

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS
0,Younghoe Koo,K,ATL,12,34,97.1,54,0-0,8-8,10-10,8-9,7-7,28,89.3,124
1,Jason Sanders,K,MIA,13,32,93.8,56,1-1,7-7,4-4,10-12,8-8,30,100.0,120
2,Rodrigo Blankenship,K,IND,13,30,90.0,44,0-0,7-7,11-12,9-10,0-1,37,94.6,116
3,Daniel Carlson,K,LV,13,28,92.9,54,0-0,13-13,6-6,3-5,4-4,39,97.4,116
4,Tyler Bass,K,BUF,13,31,80.6,58,1-1,6-6,6-9,8-9,4-6,40,95.0,113


In [69]:
kicking_df = pd.merge(kicking, fantasy_pts, left_on = 'PLAYER', right_on = 'Player')
kicking_df.drop(columns = ['Player', 'Team', 'Position'], inplace = True)
kicking_df

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14
0,Younghoe Koo,K,ATL,12,34,97.1,54,0-0,8-8,10-10,8-9,7-7,28,89.3,124,146.0,9.0,17.0,8.0,,13.0,20.0,2.0,13.0,12.0,,13.0,21.0,12.0,6.0
1,Jason Sanders,K,MIA,13,32,93.8,56,1-1,7-7,4-4,10-12,8-8,30,100.0,120,146.0,4.0,11.0,7.0,18.0,22.0,6.0,,4.0,14.0,14.0,10.0,12.0,14.0,10.0
2,Rodrigo Blankenship,K,IND,13,30,90.0,44,0-0,7-7,11-12,9-10,0-1,37,94.6,116,125.0,8.0,15.0,12.0,14.0,11.0,8.0,,3.0,5.0,11.0,15.0,2.0,7.0,14.0
3,Daniel Carlson,K,LV,13,28,92.9,54,0-0,13-13,6-6,3-5,4-4,39,97.4,116,127.0,12.0,12.0,8.0,13.0,11.0,,9.0,10.0,7.0,15.0,7.0,6.0,8.0,9.0
4,Tyler Bass,K,BUF,13,31,80.6,58,1-1,6-6,6-9,8-9,4-6,40,95.0,113,129.0,9.0,7.0,5.0,6.0,5.0,6.0,23.0,6.0,15.0,18.0,,11.0,10.0,8.0
5,Ryan Succop,K,TB,13,26,92.3,50,1-1,7-7,8-8,7-8,1-2,41,92.7,110,119.0,5.0,7.0,9.0,8.0,14.0,10.0,9.0,15.0,4.0,17.0,6.0,6.0,,9.0
6,Harrison Butker,K,KC,13,25,92.0,58,2-2,6-6,9-9,3-5,3-3,46,87.0,109,118.0,10.0,13.0,4.0,8.0,6.0,8.0,14.0,5.0,11.0,,5.0,9.0,17.0,8.0
7,Joey Slye,K,CAR,13,33,78.8,56,0-0,10-11,8-8,7-8,1-6,31,93.5,107,116.0,12.0,5.0,15.0,7.0,11.0,11.0,7.0,5.0,8.0,6.0,10.0,10.0,,9.0
8,Justin Tucker,K,BAL,13,24,91.7,55,0-0,6-6,5-6,8-8,3-4,40,97.5,105,119.0,9.0,16.0,9.0,8.0,10.0,16.0,,8.0,7.0,5.0,10.0,2.0,10.0,9.0
9,Wil Lutz,K,NO,13,25,80.0,53,0-0,5-6,8-8,6-8,1-3,44,100.0,104,112.0,10.0,6.0,13.0,5.0,15.0,,11.0,14.0,8.0,10.0,6.0,8.0,3.0,3.0


In [70]:
dash = kicking_df['LNG'][39]
dash

'—'

In [71]:
kicking_df.fillna('0.0', inplace = True)

In [73]:
kick_int = ['GP', 'FGM_A', 'LNG', 'XPM_A', 'PTS']

kick_float = ['FG%', 'XP%', 'TTL', 'Week_1', 'Week_2', 'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7', 'Week_8', 
              'Week_9', 'Week_10', 'Week_11', 'Week_12', 'Week_13', 'Week_14']


In [74]:
replace_dash(kicking_df, kick_int, dash)

In [75]:
replace_dash(kicking_df, kick_float, dash)

In [76]:
update_int_dtype(kicking_df, kick_int)

In [77]:
adjust_float(kicking_df, kick_float)

In [78]:
update_float_dtype(kicking_df, kick_float)

In [79]:
kicking_df.head()

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14
0,Younghoe Koo,K,ATL,12,34,97.1,54,0-0,8-8,10-10,8-9,7-7,28,89.3,124,146.0,9.0,17.0,8.0,0.0,13.0,20.0,2.0,13.0,12.0,0.0,13.0,21.0,12.0,6.0
1,Jason Sanders,K,MIA,13,32,93.8,56,1-1,7-7,4-4,10-12,8-8,30,100.0,120,146.0,4.0,11.0,7.0,18.0,22.0,6.0,0.0,4.0,14.0,14.0,10.0,12.0,14.0,10.0
2,Rodrigo Blankenship,K,IND,13,30,90.0,44,0-0,7-7,11-12,9-10,0-1,37,94.6,116,125.0,8.0,15.0,12.0,14.0,11.0,8.0,0.0,3.0,5.0,11.0,15.0,2.0,7.0,14.0
3,Daniel Carlson,K,LV,13,28,92.9,54,0-0,13-13,6-6,3-5,4-4,39,97.4,116,127.0,12.0,12.0,8.0,13.0,11.0,0.0,9.0,10.0,7.0,15.0,7.0,6.0,8.0,9.0
4,Tyler Bass,K,BUF,13,31,80.6,58,1-1,6-6,6-9,8-9,4-6,40,95.0,113,129.0,9.0,7.0,5.0,6.0,5.0,6.0,23.0,6.0,15.0,18.0,0.0,11.0,10.0,8.0


In [80]:
kicking_df.dtypes

PLAYER       object
POSITION     object
TEAM         object
GP            int64
FGM_A         int64
FG%         float64
LNG           int64
_1_19        object
_20_29       object
_30_39       object
_40_49       object
OVER50       object
XPM_A         int64
XP%         float64
PTS           int64
TTL         float64
Week_1      float64
Week_2      float64
Week_3      float64
Week_4      float64
Week_5      float64
Week_6      float64
Week_7      float64
Week_8      float64
Week_9      float64
Week_10     float64
Week_11     float64
Week_12     float64
Week_13     float64
Week_14     float64
dtype: object

## Pickle Cleaned DataFrames

In [66]:
offense.to_pickle('players')
team_names.to_pickle('long_names')
defense_df.to_pickle('defense')
kicking_df.to_pickle('kicking')