# Week 13 -- Data Cleaning

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import re
import warnings
warnings.simplefilter('ignore')

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.1f' % x)

## Functions

In [3]:
def check_names(df1, df2):
    """
    This funciton compares the list of names from one dataframe to another and updates where the names do not match 
    so they will merge correctly.
    
    Parameters:
        df1: first dataframe to compare to; this should be the dataframe on the left that you will join with when 
        they merge.
        df2: second dataframe, should be the dataframe on the right for the later merge.
        
    Returns:
        Updated name list for df2 that should match df1.
    """
    
    df_players = list(df1['Player'])    
    for i in range(len(df2['Player'])):
        name = df2['Player'][i]
        split_name = name.split(' ')
        if (split_name[0] + ' ' + split_name[1]) in df_players:
            df2['Player'][i] = split_name[0] + ' ' + split_name[1]
        else:
            df2['Player'][i] = name

In [4]:
def name_update(dataframe, column):
    """
    This function updates a column in a dataframe by stripping any excess spaces surrounding the observation.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column: the column you want to update.
    
    Returns:
        Updated dataframe column.    
    """
    dataframe[column] = dataframe[column].map(lambda x: x.strip())

In [5]:
def update_int_dtype(dataframe, column_list):
    """
    This function updates the data type for a list of columns to integer.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe column data types to integer.   
    """
    for column in column_list:
        dataframe[column] = dataframe[column].map(lambda x: int(x))

In [6]:
def update_float_dtype(dataframe, column_list):
    """
    This function takes a dataframe object and converts it to a float.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
    
    Returns:
        Updated dataframe column data types to a float.  
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == '':
                dataframe[column][i] = '0.0'
            else:
                dataframe[column][i] = item 
            updated_item = str(dataframe[column][i])
            ones, tenths = updated_item.split('.')
            ones = int(ones)
            tenths = int(tenths) * .1
            dataframe[column][i] = ones + tenths
        dataframe[column] = dataframe[column].map(lambda x: float(x))

In [7]:
def fill_blanks(dataframe, column_list):
    """
    This function fills blank values with 0. This should be used for a column that will be an integer.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.    
    
    Returns:
        Updated dataframe columns with filled values.      
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == '':
                dataframe[column][i] = 0
            else:
                dataframe[column][i] = item

In [8]:
def remove_comma(dataframe, column_list):
    """
    This function removes the comma from a value in a column.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe columns less any commas that might appear.    
    """  
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if len(item.split(',')) > 1:
                one, two = item.split(',')
                dataframe[column][i] = one + two

In [9]:
def adjust_float(dataframe, column_list):
    """
    This function adjusts values in a column to be workable with the 'update_float_dtype' function.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe columns with objects with a decimal point value behind it.    
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if len(item) < 3:
                dataframe[column][i] = item + '.0'
            else:
                dataframe[column][i] = item

In [10]:
def update_percent(dataframe, column):
    """
    This function updates a column with a percent sign so it can be manipulated into a float with the 
    'update_float_dtype' function.
    
    Parameters:
       dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe column with no percent sign and added decimal point where needed.     
    """
    dataframe[column] = dataframe[column].map(lambda x: x.strip('%'))
    for i in range(len(dataframe[column])):
        item = dataframe[column][i]
        if len(item) < 3:
            dataframe[column][i] = item + '.0'
        else:
            dataframe[column][i] = item

In [11]:
def replace_dash(dataframe, column_list, dash):
    """
    This function replaces a - in a dataframe so datatypes can then be updated.
    
    Parameters:
       dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        dash: set to a value in a specific column in the dataframe.
        
    Returns:
        Updated dataframe column with no percent sign and added decimal point where needed. 
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == dash:
                dataframe[column][i] = '0'
            else:
                dataframe[column][i] = item

## Read in Collected Data

In [12]:
df = pd.read_pickle('player_stats')
fantasy_pts = pd.read_pickle('fantasy_weeks')
defense = pd.read_pickle('defense_data')
kicking = pd.read_pickle('kicking_data')

## Merge Last Week Fantasy Pts with this Week

In [13]:
no_games = 13

In [14]:
lastwk = pd.read_pickle('fantweeks_1_' + str(no_games - 1))
lastwk.head()

Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12
0,Kyler Murray,ARI,QB,300.5,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9,20.3,8.9
1,Patrick Mahomes II,KC,QB,293.3,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,,22.5,31.3
2,Russell Wilson,SEA,QB,282.5,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9,20.1,14.4
3,Josh Allen,BUF,QB,265.5,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4,,16.5
4,Deshaun Watson,HOU,QB,262.4,21.8,15.7,18.1,20.9,26.9,32.0,24.2,,24.2,14.1,31.4,33.1


In [15]:
fantasy_pts = pd.merge(fantasy_pts, lastwk, 'left', on='Player')
fantasy_pts.head()

Unnamed: 0,Player,Team_x,Position_x,TTL_x,Week_13,Team_y,Position_y,TTL_y,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12
0,Kyler Murray,ARI,QB,317.9,17.4,ARI,QB,300.5,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9,20.3,8.9
1,Patrick Mahomes II,KC,QB,312.6,19.3,KC,QB,293.3,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,,22.5,31.3
2,Russell Wilson,SEA,QB,298.5,16.0,SEA,QB,282.5,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9,20.1,14.4
3,Josh Allen,BUF,QB,295.6,30.1,BUF,QB,265.5,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4,,16.5
4,Aaron Rodgers,GB,QB,285.8,23.5,GB,QB,262.3,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9,26.4,22.7,25.6


In [16]:
fantasy_pts.drop(columns=['Team_y', 'Position_y', 'TTL_y'], inplace=True)
fantasy_pts.rename(columns = {'Team_x': 'Team', 'Position_x': 'Position', 'TTL_x': 'TTL'}, inplace = True)
fantasy_pts.columns

Index(['Player', 'Team', 'Position', 'TTL', 'Week_13', 'Week_1', 'Week_2',
       'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7', 'Week_8', 'Week_9',
       'Week_10', 'Week_11', 'Week_12'],
      dtype='object')

In [17]:
fantasy_pts = fantasy_pts[['Player', 'Team', 'Position', 'TTL', 'Week_1', 'Week_2', 'Week_3', 'Week_4', 'Week_5', 
                           'Week_6', 'Week_7', 'Week_8', 'Week_9', 'Week_10', 'Week_11', 'Week_12', 'Week_13']]
fantasy_pts.head()

Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13
0,Kyler Murray,ARI,QB,317.9,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9,20.3,8.9,17.4
1,Patrick Mahomes II,KC,QB,312.6,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,,22.5,31.3,19.3
2,Russell Wilson,SEA,QB,298.5,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9,20.1,14.4,16.0
3,Josh Allen,BUF,QB,295.6,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4,,16.5,30.1
4,Aaron Rodgers,GB,QB,285.8,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9,26.4,22.7,25.6,23.5


In [18]:
print(fantasy_pts.shape)

(680, 17)


### Pickle Combined DataFrame for Next Week

In [19]:
fantasy_pts.to_pickle('fantweeks_1_' + str(no_games))

## Merge Offensive Players & Weekly Fantasy Pts

In [20]:
print(df.shape)
df.head()

(594, 28)


Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank
0,Dalvin Cook,MIN,RB,25,11,11,0,0,0,0,0,251,1250,4.98,13,43,35,314,8.97,1,4,3,14,3.0,,257.9,1,1
1,Derrick Henry,TEN,RB,26,12,12,0,0,0,0,0,271,1317,4.86,12,27,15,102,6.8,0,1,1,12,,,219.4,2,2
2,Alvin Kamara,NOR,RB,25,12,7,0,0,0,0,0,143,673,4.71,9,86,70,655,9.36,4,1,0,13,,,245.8,3,3
3,Tyreek Hill,KAN,WR,26,12,12,0,0,0,0,0,11,90,8.18,1,111,74,1079,14.58,13,1,0,14,,,237.9,1,4
4,Patrick Mahomes,KAN,QB,25,12,12,316,463,3815,31,2,47,241,5.13,2,1,0,0,,0,2,1,2,,2.0,312.7,1,5


In [21]:
print(fantasy_pts.shape)
fantasy_pts.head()

(680, 17)


Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13
0,Kyler Murray,ARI,QB,317.9,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9,20.3,8.9,17.4
1,Patrick Mahomes II,KC,QB,312.6,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,,22.5,31.3,19.3
2,Russell Wilson,SEA,QB,298.5,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9,20.1,14.4,16.0
3,Josh Allen,BUF,QB,295.6,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4,,16.5,30.1
4,Aaron Rodgers,GB,QB,285.8,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9,26.4,22.7,25.6,23.5


In [22]:
check_names(df, fantasy_pts)

In [23]:
name_update(df, 'Player')
name_update(fantasy_pts, 'Player')

In [24]:
offense = pd.merge(df, fantasy_pts, 'left', on='Player')

In [25]:
offense.drop(columns = ['Team_y', 'Position_y'], inplace = True)
offense.rename(columns = {'Team_x': 'Team', 'Position_x': 'Position'}, inplace = True)

In [26]:
print(offense.shape)
offense.head(10)

(594, 42)


Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13
0,Dalvin Cook,MIN,RB,25,11,11,0,0,0,0,0,251,1250,4.98,13,43,35,314,8.97,1,4,3,14,3.0,,257.9,1,1,240.4,20.8,15.1,23.9,26.6,14.9,,,46.6,37.2,11.2,20.0,6.2,17.9
1,Derrick Henry,TEN,RB,26,12,12,0,0,0,0,0,271,1317,4.86,12,27,15,102,6.8,0,1,1,12,,,219.4,2,2,211.9,13.1,8.4,25.0,,18.3,38.4,13.2,17.2,6.8,10.9,19.2,36.5,4.9
2,Alvin Kamara,NOR,RB,25,12,7,0,0,0,0,0,143,673,4.71,9,86,70,655,9.36,4,1,0,13,,,245.8,3,3,210.8,18.7,29.4,31.7,17.9,11.9,,14.8,16.3,10.9,27.8,10.5,5.2,15.7
3,Tyreek Hill,KAN,WR,26,12,12,0,0,0,0,0,11,90,8.18,1,111,74,1079,14.58,13,1,0,14,,,237.9,1,4,200.9,10.6,16.8,16.2,12.4,15.3,2.5,11.5,21.8,24.1,,16.0,44.9,8.8
4,Patrick Mahomes,KAN,QB,25,12,12,316,463,3815,31,2,47,241,5.13,2,1,0,0,,0,2,1,2,,2.0,312.7,1,5,312.6,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,,22.5,31.3,19.3
5,Kyler Murray,ARI,QB,23,12,12,285,426,2987,22,10,102,665,6.52,10,0,0,0,,0,6,3,10,,,318.0,2,6,317.9,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9,20.3,8.9,17.4
6,Travis Kelce,KAN,TE,31,12,12,1,2,4,0,0,0,0,,0,111,82,1114,13.59,8,1,1,8,,,198.6,1,7,157.6,11.0,15.0,8.7,7.0,16.8,16.5,3.1,16.9,15.9,,18.9,8.2,19.6
7,James Robinson,JAX,RB,22,12,12,0,0,0,0,0,212,968,4.57,7,53,42,310,7.38,2,3,1,9,1.0,,202.8,4,8,181.8,9.0,18.0,24.9,10.7,5.0,11.3,27.7,,15.9,11.2,9.4,21.9,16.8
8,Davante Adams,GNB,WR,28,10,10,0,0,0,0,0,0,0,,0,111,84,1029,12.25,13,1,1,13,,,220.9,2,9,178.9,27.6,3.6,,,,6.1,31.6,23.3,23.3,10.6,16.6,12.1,24.1
9,Russell Wilson,SEA,QB,32,12,12,305,436,3479,32,11,68,424,6.24,1,0,0,0,,0,7,4,1,,1.0,298.6,3,10,298.5,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9,20.1,14.4,16.0


In [27]:
offense.isnull().sum()

Player                     0
Team                       0
Position                   0
Age                        0
Games                      0
GamesStarted               0
CompletedPasses            0
PassesAttempted            0
PassingYds                 0
PassingTDs                 0
Interceptions              0
RushingAttempts            0
RushingYds                 0
RushingYdspAtt             0
RushingTDs                 0
Targeted                   0
Receptions                 0
ReceivingYds               0
YdspReception              0
ReceivingTDs               0
Fumbles                    0
LostFumbles                0
TtlTDs                     0
TwoPTConversions           0
TwoPTConversionPasses      0
FDFantasyPts               0
PositionRank               0
OverallRank                0
TTL                       26
Week_1                   185
Week_2                   187
Week_3                   189
Week_4                   215
Week_5                   245
Week_6        

In [28]:
offense.fillna('0.0', inplace = True)

In [29]:
off_integers = ['Age', 'Games', 'GamesStarted', 'CompletedPasses', 'PassesAttempted', 'PassingYds', 'PassingTDs', 
            'Interceptions', 'RushingAttempts', 'RushingYds', 'RushingTDs', 'Targeted', 'Receptions', 
            'ReceivingYds', 'ReceivingTDs', 'Fumbles', 'LostFumbles', 'TtlTDs']

off_floats = ['RushingYdspAtt', 'YdspReception', 'FDFantasyPts', 'TTL','Week_1', 'Week_2', 'Week_3', 'Week_4', 
              'Week_5', 'Week_6', 'Week_7', 'Week_8', 'Week_9', 'Week_10', 'Week_11', 'Week_12', 'Week_13']

In [30]:
update_int_dtype(offense, off_integers)

In [31]:
two_pts = ['TwoPTConversions', 'TwoPTConversionPasses']
fill_blanks(offense, two_pts)
update_int_dtype(offense, two_pts)

In [32]:
update_float_dtype(offense, off_floats)

In [33]:
offense.head()

Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13
0,Dalvin Cook,MIN,RB,25,11,11,0,0,0,0,0,251,1250,13.8,13,43,35,314,17.7,1,4,3,14,3,0,257.9,1,1,240.4,20.8,15.1,23.9,26.6,14.9,0.0,0.0,46.6,37.2,11.2,20.0,6.2,17.9
1,Derrick Henry,TEN,RB,26,12,12,0,0,0,0,0,271,1317,12.6,12,27,15,102,14.0,0,1,1,12,0,0,219.4,2,2,211.9,13.1,8.4,25.0,0.0,18.3,38.4,13.2,17.2,6.8,10.9,19.2,36.5,4.9
2,Alvin Kamara,NOR,RB,25,12,7,0,0,0,0,0,143,673,11.1,9,86,70,655,12.6,4,1,0,13,0,0,245.8,3,3,210.8,18.7,29.4,31.7,17.9,11.9,0.0,14.8,16.3,10.9,27.8,10.5,5.2,15.7
3,Tyreek Hill,KAN,WR,26,12,12,0,0,0,0,0,11,90,9.8,1,111,74,1079,19.8,13,1,0,14,0,0,237.9,1,4,200.9,10.6,16.8,16.2,12.4,15.3,2.5,11.5,21.8,24.1,0.0,16.0,44.9,8.8
4,Patrick Mahomes,KAN,QB,25,12,12,316,463,3815,31,2,47,241,6.3,2,1,0,0,0.0,0,2,1,2,0,2,312.7,1,5,312.6,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,0.0,22.5,31.3,19.3


In [34]:
team_dict = {'LAR': 'Los Angeles Rams', 
             'SEA': 'Seattle Seahawks', 
             'BAL': 'Baltimore Ravens', 
             'KAN': 'Kansas City Chiefs', 
             'JAX': 'Jacksonville Jaguars', 
             'DET': 'Detroit Lions', 
             'PIT': 'Pittsburgh Steelers', 
             'MIN': 'Minnesota Vikings', 
             'CHI': 'Chicago Bears', 
             'ATL': 'Atlanta Falcons', 
             'BUF': 'Buffalo Bills', 
             'GNB': 'Green Bay Packers', 
             'MIA': 'Miami Dolphins', 
             'CLE': 'Cleveland Browns', 
             'CAR': 'Carolina Panthers', 
             'DAL': 'Dallas Cowboys', 
             'ARI': 'Arizona Cardinals', 
             'HOU': 'Houston Texans', 
             'NYG': 'New York Giants', 
             'WAS': 'Washington Football Team', 
             'IND': 'Indianapolis Colts', 
             'LVR': 'Las Vegas Raiders', 
             'DEN': 'Denver Broncos', 
             'TEN': 'Tennessee Titans', 
             'NWE': 'New England Patriots', 
             'CIN': 'Cincinnati Bengals', 
             'NYJ': 'New York Jets', 
             'LAC': 'Los Angeles Chargers', 
             'PHI': 'Philadelphia Eagles', 
             'TAM': 'Tampa Bay Buccaneers', 
             'NOR': 'New Orleans Saints', 
             'SFO': 'San Francisco 49ers'}

team_names = pd.DataFrame()
team_names['Team'] = list(team_dict.keys())
team_names['Long_Name'] = list(team_dict.values())
team_names.head()

Unnamed: 0,Team,Long_Name
0,LAR,Los Angeles Rams
1,SEA,Seattle Seahawks
2,BAL,Baltimore Ravens
3,KAN,Kansas City Chiefs
4,JAX,Jacksonville Jaguars


## Defense Stats

In [35]:
defense.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks
0,Pittsburgh Steelers,12,211,740,4.9,3606,287,1208,4.2,7,409,5.3,229,11.8,2398,17,30,17,56.7%,23,16,7,44
1,Miami Dolphins,12,212,760,5.6,4262,317,1469,4.6,13,412,6.3,257,11.9,2793,14,36,21,58.3%,21,13,8,31
2,Baltimore Ravens,12,231,786,5.2,4052,301,1339,4.4,8,457,5.6,299,9.8,2713,17,34,21,61.8%,18,7,11,28
3,New Orleans Saints,12,241,714,4.9,3466,276,913,3.3,5,402,5.8,245,11.3,2553,21,33,22,66.7%,19,13,6,36
4,Los Angeles Rams,12,243,757,4.6,3496,286,1117,3.9,10,435,5.1,272,9.7,2379,14,33,22,66.7%,20,12,8,36


In [36]:
def_fantasy = fantasy_pts[fantasy_pts['Position'] == 'DST']

In [37]:
defense_df = pd.merge(defense, def_fantasy, 'left', left_on = 'Team', right_on = 'Player')

In [38]:
defense_df.head()

Unnamed: 0,Team_x,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,Player,Team_y,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13
0,Pittsburgh Steelers,12,211,740,4.9,3606,287,1208,4.2,7,409,5.3,229,11.8,2398,17,30,17,56.7%,23,16,7,44,Pittsburgh Steelers,PIT,DST,127.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0,7.0,12.0,17.0,14.0,3.0
1,Miami Dolphins,12,212,760,5.6,4262,317,1469,4.6,13,412,6.3,257,11.9,2793,14,36,21,58.3%,21,13,8,31,Miami Dolphins,MIA,DST,116.0,2.0,2.0,12.0,3.0,12.0,15.0,,17.0,8.0,4.0,5.0,14.0,14.0
2,Baltimore Ravens,12,231,786,5.2,4052,301,1339,4.4,8,457,5.6,299,9.8,2713,17,34,21,61.8%,18,7,11,28,Baltimore Ravens,BAL,DST,110.0,15.0,15.0,1.0,6.0,26.0,7.0,,4.0,14.0,1.0,3.0,8.0,4.0
3,New Orleans Saints,12,241,714,4.9,3466,276,913,3.3,5,402,5.8,245,11.3,2553,21,33,22,66.7%,19,13,6,36,New Orleans Saints,NO,DST,97.0,15.0,4.0,-3.0,4.0,3.0,,1.0,7.0,16.0,14.0,16.0,14.0,6.0
4,Los Angeles Rams,12,243,757,4.6,3496,286,1117,3.9,10,435,5.1,272,9.7,2379,14,33,22,66.7%,20,12,8,36,Los Angeles Rams,LAR,DST,102.0,4.0,7.0,4.0,11.0,12.0,0.0,15.0,5.0,,13.0,5.0,15.0,11.0


In [39]:
defense_df.drop(columns=['Player', 'Team_y', 'Position'], inplace=True)
defense_df.rename(columns={'Team_x': 'Team'}, inplace=True)

In [40]:
defense_df.shape

(32, 37)

In [41]:
defense_df.isnull().sum()

Team                         0
GP                           0
Ttl_Pts_Allowed              0
Ttl_Offense_Plays_Allowed    0
Yds_p_Play                   0
Ttl_Yds                      0
Rushing_Att                  0
Rushing_Yds                  0
Rushing_Yds_p_Att            0
Rushing_TDs                  0
Passing_Att                  0
Passing_Yds_p_Att            0
Completions                  0
Yds_p_Completion             0
Passing_Yds                  0
Passing_TDs                  0
RZ_Att                       0
RZ_TD                        0
RZ_Percent                   0
Ttl_Turnovers                0
Interceptions                0
Fumbles                      0
Sacks                        0
TTL                          0
Week_1                       0
Week_2                       0
Week_3                       0
Week_4                       0
Week_5                       4
Week_6                       4
Week_7                       4
Week_8                       4
Week_9  

In [42]:
defense_df

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13
0,Pittsburgh Steelers,12,211,740,4.9,3606,287,1208,4.2,7,409,5.3,229,11.8,2398,17,30,17,56.7%,23,16,7,44,127.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0,7.0,12.0,17.0,14.0,3.0
1,Miami Dolphins,12,212,760,5.6,4262,317,1469,4.6,13,412,6.3,257,11.9,2793,14,36,21,58.3%,21,13,8,31,116.0,2.0,2.0,12.0,3.0,12.0,15.0,,17.0,8.0,4.0,5.0,14.0,14.0
2,Baltimore Ravens,12,231,786,5.2,4052,301,1339,4.4,8,457,5.6,299,9.8,2713,17,34,21,61.8%,18,7,11,28,110.0,15.0,15.0,1.0,6.0,26.0,7.0,,4.0,14.0,1.0,3.0,8.0,4.0
3,New Orleans Saints,12,241,714,4.9,3466,276,913,3.3,5,402,5.8,245,11.3,2553,21,33,22,66.7%,19,13,6,36,97.0,15.0,4.0,-3.0,4.0,3.0,,1.0,7.0,16.0,14.0,16.0,14.0,6.0
4,Los Angeles Rams,12,243,757,4.6,3496,286,1117,3.9,10,435,5.1,272,9.7,2379,14,33,22,66.7%,20,12,8,36,102.0,4.0,7.0,4.0,11.0,12.0,0.0,15.0,5.0,,13.0,5.0,15.0,11.0
5,Kansas City Chiefs,12,254,762,5.6,4298,334,1589,4.8,10,408,6.3,257,11.0,2709,19,33,25,75.8%,19,14,5,20,84.0,7.0,5.0,7.0,20.0,-1.0,3.0,18.0,7.0,1.0,,1.0,5.0,5.0
6,New England Patriots,12,255,717,5.8,4135,330,1426,4.3,11,369,7.0,244,11.6,2709,16,35,22,62.9%,18,14,4,18,94.0,11.0,6.0,15.0,4.0,,7.0,4.0,3.0,3.0,5.0,0.0,5.0,31.0
7,Washington Football Team,12,260,743,5.0,3731,311,1285,4.1,9,396,5.7,255,10.6,2446,18,34,16,47.1%,14,11,3,36,82.0,15.0,4.0,1.0,4.0,2.0,4.0,17.0,,5.0,0.0,12.0,15.0,3.0
8,New York Giants,12,265,758,5.4,4068,292,1160,4.0,10,434,6.2,291,10.9,2908,17,39,21,53.8%,20,10,10,32,89.0,4.0,9.0,-2.0,5.0,11.0,14.0,5.0,4.0,13.0,4.0,,9.0,13.0
9,Indianapolis Colts,12,273,728,5.3,3828,316,1211,3.8,14,384,6.4,244,11.5,2617,15,38,25,65.8%,19,13,6,28,115.0,4.0,15.0,26.0,7.0,5.0,4.0,,15.0,5.0,10.0,8.0,-3.0,12.0


In [43]:
defense_df.fillna('0.0', inplace=True)

In [44]:
def_int = ['GP', 'Ttl_Pts_Allowed', 'Ttl_Offense_Plays_Allowed', 'Ttl_Yds', 'Rushing_Att', 'Rushing_Yds', 
           'Rushing_TDs', 'Passing_Att', 'Completions', 'Passing_Yds', 'Passing_TDs', 'RZ_Att', 'RZ_TD', 
           'Ttl_Turnovers', 'Interceptions', 'Fumbles', 'Sacks']

def_floats = ['Yds_p_Play', 'TTL', 'Week_1', 'Rushing_Yds_p_Att', 'Passing_Yds_p_Att', 'Yds_p_Completion',
              'Week_2', 'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7', 'Week_8', 'Week_9', 'Week_10', 'Week_11', 
              'Week_12', 'Week_13']



In [45]:
remove_comma(defense_df, def_int)

In [46]:
update_int_dtype(defense_df, def_int)

In [47]:
adjust_float(defense_df, def_floats)

In [48]:
update_float_dtype(defense_df, def_floats)

In [49]:
update_percent(defense_df, 'RZ_Percent')

In [50]:
rz = ['RZ_Percent']
update_float_dtype(defense_df, rz)

In [51]:
defense_df.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13
0,Pittsburgh Steelers,12,211,740,4.9,3606,287,1208,4.2,7,409,5.3,229,11.8,2398,17,30,17,56.7,23,16,7,44,127.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0,7.0,12.0,17.0,14.0,3.0
1,Miami Dolphins,12,212,760,5.6,4262,317,1469,4.6,13,412,6.3,257,11.9,2793,14,36,21,58.3,21,13,8,31,116.0,2.0,2.0,12.0,3.0,12.0,15.0,0.0,17.0,8.0,4.0,5.0,14.0,14.0
2,Baltimore Ravens,12,231,786,5.2,4052,301,1339,4.4,8,457,5.6,299,9.8,2713,17,34,21,61.8,18,7,11,28,110.0,15.0,15.0,1.0,6.0,26.0,7.0,0.0,4.0,14.0,1.0,3.0,8.0,4.0
3,New Orleans Saints,12,241,714,4.9,3466,276,913,3.3,5,402,5.8,245,11.3,2553,21,33,22,66.7,19,13,6,36,97.0,15.0,4.0,-3.0,4.0,3.0,0.0,1.0,7.0,16.0,14.0,16.0,14.0,6.0
4,Los Angeles Rams,12,243,757,4.6,3496,286,1117,3.9,10,435,5.1,272,9.7,2379,14,33,22,66.7,20,12,8,36,102.0,4.0,7.0,4.0,11.0,12.0,0.0,15.0,5.0,0.0,13.0,5.0,15.0,11.0


## Kicking Stats

In [54]:
kicking.head()

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS
0,Younghoe Koo,K,ATL,10,30,96.7,54,0-0,7-7,9-9,7-8,6-6,25,88.0,109
1,Ryan Succop,K,TB,12,24,91.7,50,0-0,7-7,8-8,6-7,1-2,38,94.7,102
2,Daniel Carlson,K,LV,11,25,92.0,54,0-0,11-11,6-6,2-4,4-4,32,96.9,100
3,Wil Lutz,K,NO,11,22,90.9,53,0-0,5-6,8-8,6-6,1-2,38,100.0,98
4,Jason Sanders,K,MIA,11,25,96.0,56,0-0,5-5,3-3,8-9,8-8,26,100.0,98


In [55]:
kicking_df = pd.merge(kicking, fantasy_pts, left_on = 'PLAYER', right_on = 'Player')
kicking_df.drop(columns = ['Player', 'Team', 'Position'], inplace = True)
kicking_df

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12
0,Younghoe Koo,K,ATL,10,30,96.7,54,0-0,7-7,9-9,7-8,6-6,25,88.0,109,128.0,9.0,17.0,8.0,,13.0,20.0,2.0,13.0,12.0,,13.0,21.0
1,Ryan Succop,K,TB,12,24,91.7,50,0-0,7-7,8-8,6-7,1-2,38,94.7,102,110.0,5.0,7.0,9.0,8.0,14.0,10.0,9.0,15.0,4.0,17.0,6.0,6.0
2,Daniel Carlson,K,LV,11,25,92.0,54,0-0,11-11,6-6,2-4,4-4,32,96.9,100,110.0,12.0,12.0,8.0,13.0,11.0,,9.0,10.0,7.0,15.0,7.0,6.0
3,Wil Lutz,K,NO,11,22,90.9,53,0-0,5-6,8-8,6-6,1-2,38,100.0,98,106.0,10.0,6.0,13.0,5.0,15.0,,11.0,14.0,8.0,10.0,6.0,8.0
4,Jason Sanders,K,MIA,11,25,96.0,56,0-0,5-5,3-3,8-9,8-8,26,100.0,98,122.0,4.0,11.0,7.0,18.0,22.0,6.0,,4.0,14.0,14.0,10.0,12.0
5,Joey Slye,K,CAR,12,31,77.4,56,0-0,9-10,7-7,7-8,1-6,28,92.9,98,107.0,12.0,5.0,15.0,7.0,11.0,11.0,7.0,5.0,8.0,6.0,10.0,10.0
6,Rodrigo Blankenship,K,IND,11,26,88.5,44,0-0,5-5,10-11,8-9,0-1,29,93.1,96,104.0,8.0,15.0,12.0,14.0,11.0,8.0,,3.0,5.0,11.0,15.0,2.0
7,Tyler Bass,K,BUF,11,27,77.8,58,1-1,4-4,4-7,8-9,4-6,33,97.0,95,111.0,9.0,7.0,5.0,6.0,5.0,6.0,23.0,6.0,15.0,18.0,,11.0
8,Graham Gano,K,NYG,11,26,96.2,55,0-0,4-4,10-10,7-7,4-5,17,100.0,92,107.0,4.0,7.0,13.0,9.0,20.0,8.0,3.0,5.0,13.0,10.0,,15.0
9,Justin Tucker,K,BAL,11,20,95.0,55,0-0,5-5,4-4,8-8,2-3,31,100.0,88,100.0,9.0,16.0,9.0,8.0,10.0,16.0,,8.0,7.0,5.0,10.0,2.0


In [56]:
dash = kicking_df['LNG'][40]
dash

'—'

In [57]:
kicking_df.fillna('0.0', inplace = True)

In [58]:
kick_int = ['GP', 'FGM_A', 'LNG', 'XPM_A', 'PTS']

kick_float = ['FG%', 'XP%', 'TTL', 'Week_1', 'Week_2', 'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7', 'Week_8', 
              'Week_9', 'Week_10', 'Week_11', 'Week_12']


In [59]:
replace_dash(kicking_df, kick_int, dash)

In [60]:
replace_dash(kicking_df, kick_float, dash)

In [61]:
update_int_dtype(kicking_df, kick_int)

In [62]:
adjust_float(kicking_df, kick_float)

In [63]:
update_float_dtype(kicking_df, kick_float)

In [64]:
kicking_df.head()

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12
0,Younghoe Koo,K,ATL,10,30,96.7,54,0-0,7-7,9-9,7-8,6-6,25,88.0,109,128.0,9.0,17.0,8.0,0.0,13.0,20.0,2.0,13.0,12.0,0.0,13.0,21.0
1,Ryan Succop,K,TB,12,24,91.7,50,0-0,7-7,8-8,6-7,1-2,38,94.7,102,110.0,5.0,7.0,9.0,8.0,14.0,10.0,9.0,15.0,4.0,17.0,6.0,6.0
2,Daniel Carlson,K,LV,11,25,92.0,54,0-0,11-11,6-6,2-4,4-4,32,96.9,100,110.0,12.0,12.0,8.0,13.0,11.0,0.0,9.0,10.0,7.0,15.0,7.0,6.0
3,Wil Lutz,K,NO,11,22,90.9,53,0-0,5-6,8-8,6-6,1-2,38,100.0,98,106.0,10.0,6.0,13.0,5.0,15.0,0.0,11.0,14.0,8.0,10.0,6.0,8.0
4,Jason Sanders,K,MIA,11,25,96.0,56,0-0,5-5,3-3,8-9,8-8,26,100.0,98,122.0,4.0,11.0,7.0,18.0,22.0,6.0,0.0,4.0,14.0,14.0,10.0,12.0


In [65]:
kicking_df.dtypes

PLAYER       object
POSITION     object
TEAM         object
GP            int64
FGM_A         int64
FG%         float64
LNG           int64
_1_19        object
_20_29       object
_30_39       object
_40_49       object
OVER50       object
XPM_A         int64
XP%         float64
PTS           int64
TTL         float64
Week_1      float64
Week_2      float64
Week_3      float64
Week_4      float64
Week_5      float64
Week_6      float64
Week_7      float64
Week_8      float64
Week_9      float64
Week_10     float64
Week_11     float64
Week_12     float64
dtype: object

## Pickle Cleaned DataFrames

In [66]:
offense.to_pickle('players')
team_names.to_pickle('long_names')
defense_df.to_pickle('defense')
kicking_df.to_pickle('kicking')