# Week 15 -- Data Cleaning

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import re
import warnings
warnings.simplefilter('ignore')

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.1f' % x)

## Functions

In [3]:
def check_names(df1, df2):
    """
    This funciton compares the list of names from one dataframe to another and updates where the names do not match 
    so they will merge correctly.
    
    Parameters:
        df1: first dataframe to compare to; this should be the dataframe on the left that you will join with when 
        they merge.
        df2: second dataframe, should be the dataframe on the right for the later merge.
        
    Returns:
        Updated name list for df2 that should match df1.
    """
    
    df_players = list(df1['Player'])    
    for i in range(len(df2['Player'])):
        name = df2['Player'][i]
        split_name = name.split(' ')
        if (split_name[0] + ' ' + split_name[1]) in df_players:
            df2['Player'][i] = split_name[0] + ' ' + split_name[1]
        else:
            df2['Player'][i] = name

In [4]:
def name_update(dataframe, column):
    """
    This function updates a column in a dataframe by stripping any excess spaces surrounding the observation.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column: the column you want to update.
    
    Returns:
        Updated dataframe column.    
    """
    dataframe[column] = dataframe[column].map(lambda x: x.strip())

In [5]:
def update_int_dtype(dataframe, column_list):
    """
    This function updates the data type for a list of columns to integer.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe column data types to integer.   
    """
    for column in column_list:
        dataframe[column] = dataframe[column].map(lambda x: int(x))

In [6]:
def update_float_dtype(dataframe, column_list):
    """
    This function takes a dataframe object and converts it to a float.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
    
    Returns:
        Updated dataframe column data types to a float.  
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == '':
                dataframe[column][i] = '0.0'
            else:
                dataframe[column][i] = item 
            updated_item = str(dataframe[column][i])
            ones, tenths = updated_item.split('.')
            ones = int(ones)
            tenths = int(tenths) * .1
            dataframe[column][i] = ones + tenths
        dataframe[column] = dataframe[column].map(lambda x: float(x))

In [7]:
def fill_blanks(dataframe, column_list):
    """
    This function fills blank values with 0. This should be used for a column that will be an integer.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.    
    
    Returns:
        Updated dataframe columns with filled values.      
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == '':
                dataframe[column][i] = 0
            else:
                dataframe[column][i] = item

In [8]:
def remove_comma(dataframe, column_list):
    """
    This function removes the comma from a value in a column.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe columns less any commas that might appear.    
    """  
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if len(item.split(',')) > 1:
                one, two = item.split(',')
                dataframe[column][i] = one + two

In [9]:
def adjust_float(dataframe, column_list):
    """
    This function adjusts values in a column to be workable with the 'update_float_dtype' function.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe columns with objects with a decimal point value behind it.    
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if len(item) < 3:
                dataframe[column][i] = item + '.0'
            else:
                dataframe[column][i] = item

In [10]:
def update_percent(dataframe, column):
    """
    This function updates a column with a percent sign so it can be manipulated into a float with the 
    'update_float_dtype' function.
    
    Parameters:
       dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe column with no percent sign and added decimal point where needed.     
    """
    dataframe[column] = dataframe[column].map(lambda x: x.strip('%'))
    for i in range(len(dataframe[column])):
        item = dataframe[column][i]
        if len(item) < 3:
            dataframe[column][i] = item + '.0'
        else:
            dataframe[column][i] = item

In [11]:
def replace_dash(dataframe, column_list, dash):
    """
    This function replaces a - in a dataframe so datatypes can then be updated.
    
    Parameters:
       dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        dash: set to a value in a specific column in the dataframe.
        
    Returns:
        Updated dataframe column with no percent sign and added decimal point where needed. 
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == dash:
                dataframe[column][i] = '0'
            else:
                dataframe[column][i] = item

## Read in Collected Data

In [12]:
df = pd.read_pickle('player_stats')
fantasy_pts = pd.read_pickle('fantasy_weeks')
defense = pd.read_pickle('defense_data')
kicking = pd.read_pickle('kicking_data')

## Merge Last Week Fantasy Pts with this Week

In [13]:
no_games = 15

In [14]:
lastwk = pd.read_pickle('fantweeks_1_' + str(no_games - 1))
lastwk.head()

Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14
0,Kyler Murray,ARI,QB,336.4,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9,20.3,8.9,17.4,18.5
1,Patrick Mahomes II,KC,QB,334.2,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,,22.5,31.3,19.3,21.6
2,Russell Wilson,SEA,QB,321.6,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9,20.1,14.4,16.0,23.1
3,Aaron Rodgers,GB,QB,316.7,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9,26.4,22.7,25.6,23.5,30.9
4,Josh Allen,BUF,QB,314.9,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4,,16.5,30.1,19.3


In [15]:
fantasy_pts = pd.merge(fantasy_pts, lastwk, 'left', on='Player')
fantasy_pts.head()

Unnamed: 0,Player,Team_x,Position_x,TTL_x,Week_15,Team_y,Position_y,TTL_y,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14
0,Kyler Murray,ARI,QB,370.5,34.1,ARI,QB,336.4,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9,20.3,8.9,17.4,18.5
1,Patrick Mahomes II,KC,QB,360.1,25.9,KC,QB,334.2,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,,22.5,31.3,19.3,21.6
2,Josh Allen,BUF,QB,352.6,37.7,BUF,QB,314.9,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4,,16.5,30.1,19.3
3,Aaron Rodgers,GB,QB,335.0,18.3,GB,QB,316.7,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9,26.4,22.7,25.6,23.5,30.9
4,Russell Wilson,SEA,QB,334.6,13.0,SEA,QB,321.6,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9,20.1,14.4,16.0,23.1


In [16]:
fantasy_pts.drop(columns=['Team_y', 'Position_y', 'TTL_y'], inplace=True)
fantasy_pts.rename(columns = {'Team_x': 'Team', 'Position_x': 'Position', 'TTL_x': 'TTL'}, inplace = True)
fantasy_pts.columns

Index(['Player', 'Team', 'Position', 'TTL', 'Week_15', 'Week_1', 'Week_2',
       'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7', 'Week_8', 'Week_9',
       'Week_10', 'Week_11', 'Week_12', 'Week_13', 'Week_14'],
      dtype='object')

In [17]:
fantasy_pts = fantasy_pts[['Player', 'Team', 'Position', 'TTL', 'Week_1', 'Week_2', 'Week_3', 'Week_4', 'Week_5', 
                           'Week_6', 'Week_7', 'Week_8', 'Week_9', 'Week_10', 'Week_11', 'Week_12', 'Week_13', 
                           'Week_14', 'Week_15']]
fantasy_pts.head()

Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14,Week_15
0,Kyler Murray,ARI,QB,370.5,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9,20.3,8.9,17.4,18.5,34.1
1,Patrick Mahomes II,KC,QB,360.1,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,,22.5,31.3,19.3,21.6,25.9
2,Josh Allen,BUF,QB,352.6,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4,,16.5,30.1,19.3,37.7
3,Aaron Rodgers,GB,QB,335.0,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9,26.4,22.7,25.6,23.5,30.9,18.3
4,Russell Wilson,SEA,QB,334.6,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9,20.1,14.4,16.0,23.1,13.0


In [18]:
print(fantasy_pts.shape)

(706, 19)


### Pickle Combined DataFrame for Next Week

In [19]:
fantasy_pts.to_pickle('fantweeks_1_' + str(no_games))

## Merge Offensive Players & Weekly Fantasy Pts

In [20]:
print(df.shape)
df.head()

(608, 28)


Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank
0,Dalvin Cook*,MIN,RB,25,13,13,0,0,0,0,0,297,1484,5.0,15,52,42,349,8.31,1,4,3,16,3.0,,300.3,1,1
1,Derrick Henry *,TEN,RB,26,14,14,0,0,0,0,0,321,1679,5.23,15,31,19,114,6.0,0,1,1,15,1.0,,278.8,2,2
2,Alvin Kamara*,NOR,RB,25,14,9,0,0,0,0,0,165,777,4.71,10,102,80,739,9.24,5,1,0,15,,,281.6,3,3
3,Tyreek Hill*,KAN,WR,26,14,14,0,0,0,0,0,13,123,9.46,2,129,83,1211,14.59,15,1,0,17,,,276.9,1,4
4,Kyler Murray *,ARI,QB,23,14,14,336,497,3637,26,11,123,741,6.02,11,0,0,0,,0,9,4,11,,,370.6,1,5


In [21]:
print(fantasy_pts.shape)
fantasy_pts.head()

(706, 19)


Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14,Week_15
0,Kyler Murray,ARI,QB,370.5,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9,20.3,8.9,17.4,18.5,34.1
1,Patrick Mahomes II,KC,QB,360.1,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,,22.5,31.3,19.3,21.6,25.9
2,Josh Allen,BUF,QB,352.6,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4,,16.5,30.1,19.3,37.7
3,Aaron Rodgers,GB,QB,335.0,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9,26.4,22.7,25.6,23.5,30.9,18.3
4,Russell Wilson,SEA,QB,334.6,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9,20.1,14.4,16.0,23.1,13.0


In [22]:
check_names(df, fantasy_pts)

In [23]:
name_update(df, 'Player')
name_update(fantasy_pts, 'Player')

In [24]:
offense = pd.merge(df, fantasy_pts, 'left', on='Player')

In [25]:
offense.drop(columns = ['Team_y', 'Position_y'], inplace = True)
offense.rename(columns = {'Team_x': 'Team', 'Position_x': 'Position'}, inplace = True)

In [26]:
print(offense.shape)
offense.head(10)

(608, 44)


Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14,Week_15
0,Dalvin Cook*,MIN,RB,25,13,13,0,0,0,0,0,297,1484,5.0,15,52,42,349,8.31,1,4,3,16,3.0,,300.3,1,1,,,,,,,,,,,,,,,,
1,Derrick Henry *,TEN,RB,26,14,14,0,0,0,0,0,321,1679,5.23,15,31,19,114,6.0,0,1,1,15,1.0,,278.8,2,2,,,,,,,,,,,,,,,,
2,Alvin Kamara*,NOR,RB,25,14,9,0,0,0,0,0,165,777,4.71,10,102,80,739,9.24,5,1,0,15,,,281.6,3,3,,,,,,,,,,,,,,,,
3,Tyreek Hill*,KAN,WR,26,14,14,0,0,0,0,0,13,123,9.46,2,129,83,1211,14.59,15,1,0,17,,,276.9,1,4,,,,,,,,,,,,,,,,
4,Kyler Murray *,ARI,QB,23,14,14,336,497,3637,26,11,123,741,6.02,11,0,0,0,,0,9,4,11,,,370.6,1,5,,,,,,,,,,,,,,,,
5,Travis Kelce*,KAN,TE,31,14,14,1,2,4,0,0,0,0,,0,132,98,1318,13.45,10,1,1,10,1.0,,241.0,1,6,,,,,,,,,,,,,,,,
6,Patrick Mahomes *,KAN,QB,25,14,14,366,544,4462,36,5,59,287,4.86,2,1,0,0,,0,5,2,2,,3.0,360.2,2,7,,,,,,,,,,,,,,,,
7,Josh Allen *,BUF,QB,24,14,14,351,511,4000,30,9,96,383,3.99,8,1,1,12,12.0,1,9,6,9,,,353.0,3,8,,,,,,,,,,,,,,,,
8,James Robinson,JAX,RB,22,14,14,0,0,0,0,0,240,1070,4.46,7,60,49,344,7.02,3,3,1,10,1.0,,225.9,4,9,201.4,9.0,18.0,24.9,10.7,5.0,11.3,27.7,,15.9,11.2,9.4,21.9,16.8,8.3,11.3
9,Davante Adams*,GNB,WR,28,12,12,0,0,0,0,0,0,0,,0,131,98,1186,12.1,14,1,1,14,,,249.6,2,10,,,,,,,,,,,,,,,,


In [27]:
offense.isnull().sum()

Player                     0
Team                       0
Position                   0
Age                        0
Games                      0
GamesStarted               0
CompletedPasses            0
PassesAttempted            0
PassingYds                 0
PassingTDs                 0
Interceptions              0
RushingAttempts            0
RushingYds                 0
RushingYdspAtt             0
RushingTDs                 0
Targeted                   0
Receptions                 0
ReceivingYds               0
YdspReception              0
ReceivingTDs               0
Fumbles                    0
LostFumbles                0
TtlTDs                     0
TwoPTConversions           0
TwoPTConversionPasses      0
FDFantasyPts               0
PositionRank               0
OverallRank                0
TTL                       50
Week_1                   225
Week_2                   228
Week_3                   228
Week_4                   252
Week_5                   279
Week_6        

In [28]:
offense.fillna('0.0', inplace = True)

In [30]:
off_integers = ['Age', 'Games', 'GamesStarted', 'CompletedPasses', 'PassesAttempted', 'PassingYds', 'PassingTDs', 
            'Interceptions', 'RushingAttempts', 'RushingYds', 'RushingTDs', 'Targeted', 'Receptions', 
            'ReceivingYds', 'ReceivingTDs', 'Fumbles', 'LostFumbles', 'TtlTDs']

off_floats = ['RushingYdspAtt', 'YdspReception', 'FDFantasyPts', 'TTL','Week_1', 'Week_2', 'Week_3', 'Week_4', 
              'Week_5', 'Week_6', 'Week_7', 'Week_8', 'Week_9', 'Week_10', 'Week_11', 'Week_12', 'Week_13', 
              'Week_14', 'Week_15']

In [31]:
fill_blanks(offense, off_integers)

In [32]:
update_int_dtype(offense, off_integers)

In [33]:
two_pts = ['TwoPTConversions', 'TwoPTConversionPasses']
fill_blanks(offense, two_pts)
update_int_dtype(offense, two_pts)

In [34]:
update_float_dtype(offense, off_floats)

In [35]:
offense.head()

Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14,Week_15
0,Dalvin Cook*,MIN,RB,25,13,13,0,0,0,0,0,297,1484,5.0,15,52,42,349,11.1,1,4,3,16,3,0,300.3,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Derrick Henry *,TEN,RB,26,14,14,0,0,0,0,0,321,1679,7.3,15,31,19,114,6.0,0,1,1,15,1,0,278.8,2,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Alvin Kamara*,NOR,RB,25,14,9,0,0,0,0,0,165,777,11.1,10,102,80,739,11.4,5,1,0,15,0,0,281.6,3,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Tyreek Hill*,KAN,WR,26,14,14,0,0,0,0,0,13,123,13.6,2,129,83,1211,19.9,15,1,0,17,0,0,276.9,1,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Kyler Murray *,ARI,QB,23,14,14,336,497,3637,26,11,123,741,6.2,11,0,0,0,0.0,0,9,4,11,0,0,370.6,1,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
team_dict = {'LAR': 'Los Angeles Rams', 
             'SEA': 'Seattle Seahawks', 
             'BAL': 'Baltimore Ravens', 
             'KAN': 'Kansas City Chiefs', 
             'JAX': 'Jacksonville Jaguars', 
             'DET': 'Detroit Lions', 
             'PIT': 'Pittsburgh Steelers', 
             'MIN': 'Minnesota Vikings', 
             'CHI': 'Chicago Bears', 
             'ATL': 'Atlanta Falcons', 
             'BUF': 'Buffalo Bills', 
             'GNB': 'Green Bay Packers', 
             'MIA': 'Miami Dolphins', 
             'CLE': 'Cleveland Browns', 
             'CAR': 'Carolina Panthers', 
             'DAL': 'Dallas Cowboys', 
             'ARI': 'Arizona Cardinals', 
             'HOU': 'Houston Texans', 
             'NYG': 'New York Giants', 
             'WAS': 'Washington Football Team', 
             'IND': 'Indianapolis Colts', 
             'LVR': 'Las Vegas Raiders', 
             'DEN': 'Denver Broncos', 
             'TEN': 'Tennessee Titans', 
             'NWE': 'New England Patriots', 
             'CIN': 'Cincinnati Bengals', 
             'NYJ': 'New York Jets', 
             'LAC': 'Los Angeles Chargers', 
             'PHI': 'Philadelphia Eagles', 
             'TAM': 'Tampa Bay Buccaneers', 
             'NOR': 'New Orleans Saints', 
             'SFO': 'San Francisco 49ers'}

team_names = pd.DataFrame()
team_names['Team'] = list(team_dict.keys())
team_names['Long_Name'] = list(team_dict.values())
team_names.head()

Unnamed: 0,Team,Long_Name
0,LAR,Los Angeles Rams
1,SEA,Seattle Seahawks
2,BAL,Baltimore Ravens
3,KAN,Kansas City Chiefs
4,JAX,Jacksonville Jaguars


## Defense Stats

In [37]:
defense.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks
0,Miami Dolphins,14,257,873,5.7,5013,363,1680,4.6,14,473,6.5,298,12.2,3333,16,39,22,56.4%,26,16,10,37
1,Pittsburgh Steelers,14,264,867,4.8,4170,355,1464,4.1,9,465,5.3,260,11.6,2706,20,39,21,53.8%,25,17,8,47
2,Los Angeles Rams,14,269,879,4.6,4005,346,1317,3.8,11,489,5.0,308,9.7,2688,15,41,24,58.5%,21,13,8,44
3,Baltimore Ravens,14,287,918,5.2,4812,351,1539,4.4,12,534,5.8,350,10.0,3273,21,41,27,65.9%,20,8,12,33
4,Washington Football Team,14,295,872,5.0,4377,364,1574,4.3,11,468,5.5,298,10.3,2803,20,37,19,51.4%,18,13,5,40


In [38]:
def_fantasy = fantasy_pts[fantasy_pts['Position'] == 'DST']

In [39]:
defense_df = pd.merge(defense, def_fantasy, 'left', left_on = 'Team', right_on = 'Player')

In [40]:
defense_df.head()

Unnamed: 0,Team_x,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,Player,Team_y,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14,Week_15
0,Miami Dolphins,14,257,873,5.7,5013,363,1680,4.6,14,473,6.5,298,12.2,3333,16,39,22,56.4%,26,16,10,37,Miami Dolphins,MIA,DST,135.0,2.0,2.0,12.0,3.0,12.0,15.0,,17.0,8.0,4.0,5.0,14.0,14.0,10.0,9.0
1,Pittsburgh Steelers,14,264,867,4.8,4170,355,1464,4.1,9,465,5.3,260,11.6,2706,20,39,21,53.8%,25,17,8,47,Pittsburgh Steelers,PIT,DST,135.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0,7.0,12.0,17.0,14.0,3.0,6.0,2.0
2,Los Angeles Rams,14,269,879,4.6,4005,346,1317,3.8,11,489,5.0,308,9.7,2688,15,41,24,58.5%,21,13,8,44,Los Angeles Rams,LAR,DST,125.0,4.0,7.0,4.0,11.0,12.0,0.0,15.0,5.0,,13.0,5.0,15.0,11.0,21.0,2.0
3,Baltimore Ravens,14,287,918,5.2,4812,351,1539,4.4,12,534,5.8,350,10.0,3273,21,41,27,65.9%,20,8,12,33,Baltimore Ravens,BAL,DST,120.0,15.0,15.0,1.0,6.0,26.0,7.0,,4.0,14.0,1.0,3.0,8.0,4.0,0.0,10.0
4,Washington Football Team,14,295,872,5.0,4377,364,1574,4.3,11,468,5.5,298,10.3,2803,20,37,19,51.4%,18,13,5,40,Washington Football Team,WAS,DST,108.0,15.0,4.0,1.0,4.0,2.0,4.0,17.0,,5.0,0.0,12.0,15.0,3.0,23.0,3.0


In [41]:
defense_df.drop(columns=['Player', 'Team_y', 'Position'], inplace=True)
defense_df.rename(columns={'Team_x': 'Team'}, inplace=True)

In [42]:
defense_df.shape

(32, 39)

In [43]:
defense_df.isnull().sum()

Team                         0
GP                           0
Ttl_Pts_Allowed              0
Ttl_Offense_Plays_Allowed    0
Yds_p_Play                   0
Ttl_Yds                      0
Rushing_Att                  0
Rushing_Yds                  0
Rushing_Yds_p_Att            0
Rushing_TDs                  0
Passing_Att                  0
Passing_Yds_p_Att            0
Completions                  0
Yds_p_Completion             0
Passing_Yds                  0
Passing_TDs                  0
RZ_Att                       0
RZ_TD                        0
RZ_Percent                   0
Ttl_Turnovers                0
Interceptions                0
Fumbles                      0
Sacks                        0
TTL                          0
Week_1                       0
Week_2                       0
Week_3                       0
Week_4                       0
Week_5                       4
Week_6                       4
Week_7                       4
Week_8                       4
Week_9  

In [44]:
defense_df

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14,Week_15
0,Miami Dolphins,14,257,873,5.7,5013,363,1680,4.6,14,473,6.5,298,12.2,3333,16,39,22,56.4%,26,16,10,37,135.0,2.0,2.0,12.0,3.0,12.0,15.0,,17.0,8.0,4.0,5.0,14.0,14.0,10.0,9.0
1,Pittsburgh Steelers,14,264,867,4.8,4170,355,1464,4.1,9,465,5.3,260,11.6,2706,20,39,21,53.8%,25,17,8,47,135.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0,7.0,12.0,17.0,14.0,3.0,6.0,2.0
2,Los Angeles Rams,14,269,879,4.6,4005,346,1317,3.8,11,489,5.0,308,9.7,2688,15,41,24,58.5%,21,13,8,44,125.0,4.0,7.0,4.0,11.0,12.0,0.0,15.0,5.0,,13.0,5.0,15.0,11.0,21.0,2.0
3,Baltimore Ravens,14,287,918,5.2,4812,351,1539,4.4,12,534,5.8,350,10.0,3273,21,41,27,65.9%,20,8,12,33,120.0,15.0,15.0,1.0,6.0,26.0,7.0,,4.0,14.0,1.0,3.0,8.0,4.0,0.0,10.0
4,Washington Football Team,14,295,872,5.0,4377,364,1574,4.3,11,468,5.5,298,10.3,2803,20,37,19,51.4%,18,13,5,40,108.0,15.0,4.0,1.0,4.0,2.0,4.0,17.0,,5.0,0.0,12.0,15.0,3.0,23.0,3.0
5,New Orleans Saints,14,297,872,4.9,4290,353,1338,3.8,8,479,5.7,288,11.1,2952,25,41,28,68.3%,21,13,8,40,106.0,15.0,4.0,-3.0,4.0,3.0,,1.0,7.0,16.0,14.0,16.0,14.0,6.0,2.0,7.0
6,New England Patriots,14,301,849,5.7,4836,408,1862,4.6,15,420,6.7,280,11.1,2974,17,42,27,64.3%,20,16,4,21,102.0,11.0,6.0,15.0,4.0,,7.0,4.0,3.0,3.0,5.0,0.0,5.0,31.0,4.0,4.0
7,Kansas City Chiefs,14,310,891,5.6,4950,375,1729,4.6,12,491,6.2,300,11.2,3221,24,39,30,76.9%,21,16,5,25,100.0,7.0,5.0,7.0,20.0,-1.0,3.0,18.0,7.0,1.0,,1.0,5.0,5.0,14.0,2.0
8,New York Giants,14,311,900,5.4,4850,365,1425,3.9,12,501,6.4,342,10.8,3425,20,49,26,53.1%,20,10,10,34,92.0,4.0,9.0,-2.0,5.0,11.0,14.0,5.0,4.0,13.0,4.0,,9.0,13.0,1.0,2.0
9,Chicago Bears,14,318,922,5.3,4924,403,1659,4.1,10,486,6.3,307,11.4,3265,22,48,25,52.1%,15,8,7,33,85.0,3.0,12.0,4.0,2.0,6.0,11.0,9.0,1.0,4.0,12.0,,-4.0,3.0,17.0,5.0


In [45]:
defense_df.fillna('0.0', inplace=True)

In [46]:
def_int = ['GP', 'Ttl_Pts_Allowed', 'Ttl_Offense_Plays_Allowed', 'Ttl_Yds', 'Rushing_Att', 'Rushing_Yds', 
           'Rushing_TDs', 'Passing_Att', 'Completions', 'Passing_Yds', 'Passing_TDs', 'RZ_Att', 'RZ_TD', 
           'Ttl_Turnovers', 'Interceptions', 'Fumbles', 'Sacks']

def_floats = ['Yds_p_Play', 'TTL', 'Week_1', 'Rushing_Yds_p_Att', 'Passing_Yds_p_Att', 'Yds_p_Completion',
              'Week_2', 'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7', 'Week_8', 'Week_9', 'Week_10', 'Week_11', 
              'Week_12', 'Week_13', 'Week_14', 'Week_15']



In [47]:
remove_comma(defense_df, def_int)

In [48]:
update_int_dtype(defense_df, def_int)

In [49]:
adjust_float(defense_df, def_floats)

In [50]:
update_float_dtype(defense_df, def_floats)

In [51]:
update_percent(defense_df, 'RZ_Percent')

In [52]:
rz = ['RZ_Percent']
update_float_dtype(defense_df, rz)

In [53]:
defense_df.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14,Week_15
0,Miami Dolphins,14,257,873,5.7,5013,363,1680,4.6,14,473,6.5,298,12.2,3333,16,39,22,56.4,26,16,10,37,135.0,2.0,2.0,12.0,3.0,12.0,15.0,0.0,17.0,8.0,4.0,5.0,14.0,14.0,10.0,9.0
1,Pittsburgh Steelers,14,264,867,4.8,4170,355,1464,4.1,9,465,5.3,260,11.6,2706,20,39,21,53.8,25,17,8,47,135.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0,7.0,12.0,17.0,14.0,3.0,6.0,2.0
2,Los Angeles Rams,14,269,879,4.6,4005,346,1317,3.8,11,489,5.0,308,9.7,2688,15,41,24,58.5,21,13,8,44,125.0,4.0,7.0,4.0,11.0,12.0,0.0,15.0,5.0,0.0,13.0,5.0,15.0,11.0,21.0,2.0
3,Baltimore Ravens,14,287,918,5.2,4812,351,1539,4.4,12,534,5.8,350,10.0,3273,21,41,27,65.9,20,8,12,33,120.0,15.0,15.0,1.0,6.0,26.0,7.0,0.0,4.0,14.0,1.0,3.0,8.0,4.0,0.0,10.0
4,Washington Football Team,14,295,872,5.0,4377,364,1574,4.3,11,468,5.5,298,10.3,2803,20,37,19,51.4,18,13,5,40,108.0,15.0,4.0,1.0,4.0,2.0,4.0,17.0,0.0,5.0,0.0,12.0,15.0,3.0,23.0,3.0


## Kicking Stats

In [68]:
kicking.head()

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS
0,Younghoe Koo,K,ATL,12,34,97.1,54,0-0,8-8,10-10,8-9,7-7,28,89.3,124
1,Jason Sanders,K,MIA,13,32,93.8,56,1-1,7-7,4-4,10-12,8-8,30,100.0,120
2,Rodrigo Blankenship,K,IND,13,30,90.0,44,0-0,7-7,11-12,9-10,0-1,37,94.6,116
3,Daniel Carlson,K,LV,13,28,92.9,54,0-0,13-13,6-6,3-5,4-4,39,97.4,116
4,Tyler Bass,K,BUF,13,31,80.6,58,1-1,6-6,6-9,8-9,4-6,40,95.0,113


In [69]:
kicking_df = pd.merge(kicking, fantasy_pts, left_on = 'PLAYER', right_on = 'Player')
kicking_df.drop(columns = ['Player', 'Team', 'Position'], inplace = True)
kicking_df

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14
0,Younghoe Koo,K,ATL,12,34,97.1,54,0-0,8-8,10-10,8-9,7-7,28,89.3,124,146.0,9.0,17.0,8.0,,13.0,20.0,2.0,13.0,12.0,,13.0,21.0,12.0,6.0
1,Jason Sanders,K,MIA,13,32,93.8,56,1-1,7-7,4-4,10-12,8-8,30,100.0,120,146.0,4.0,11.0,7.0,18.0,22.0,6.0,,4.0,14.0,14.0,10.0,12.0,14.0,10.0
2,Rodrigo Blankenship,K,IND,13,30,90.0,44,0-0,7-7,11-12,9-10,0-1,37,94.6,116,125.0,8.0,15.0,12.0,14.0,11.0,8.0,,3.0,5.0,11.0,15.0,2.0,7.0,14.0
3,Daniel Carlson,K,LV,13,28,92.9,54,0-0,13-13,6-6,3-5,4-4,39,97.4,116,127.0,12.0,12.0,8.0,13.0,11.0,,9.0,10.0,7.0,15.0,7.0,6.0,8.0,9.0
4,Tyler Bass,K,BUF,13,31,80.6,58,1-1,6-6,6-9,8-9,4-6,40,95.0,113,129.0,9.0,7.0,5.0,6.0,5.0,6.0,23.0,6.0,15.0,18.0,,11.0,10.0,8.0
5,Ryan Succop,K,TB,13,26,92.3,50,1-1,7-7,8-8,7-8,1-2,41,92.7,110,119.0,5.0,7.0,9.0,8.0,14.0,10.0,9.0,15.0,4.0,17.0,6.0,6.0,,9.0
6,Harrison Butker,K,KC,13,25,92.0,58,2-2,6-6,9-9,3-5,3-3,46,87.0,109,118.0,10.0,13.0,4.0,8.0,6.0,8.0,14.0,5.0,11.0,,5.0,9.0,17.0,8.0
7,Joey Slye,K,CAR,13,33,78.8,56,0-0,10-11,8-8,7-8,1-6,31,93.5,107,116.0,12.0,5.0,15.0,7.0,11.0,11.0,7.0,5.0,8.0,6.0,10.0,10.0,,9.0
8,Justin Tucker,K,BAL,13,24,91.7,55,0-0,6-6,5-6,8-8,3-4,40,97.5,105,119.0,9.0,16.0,9.0,8.0,10.0,16.0,,8.0,7.0,5.0,10.0,2.0,10.0,9.0
9,Wil Lutz,K,NO,13,25,80.0,53,0-0,5-6,8-8,6-8,1-3,44,100.0,104,112.0,10.0,6.0,13.0,5.0,15.0,,11.0,14.0,8.0,10.0,6.0,8.0,3.0,3.0


In [70]:
dash = kicking_df['LNG'][39]
dash

'—'

In [71]:
kicking_df.fillna('0.0', inplace = True)

In [73]:
kick_int = ['GP', 'FGM_A', 'LNG', 'XPM_A', 'PTS']

kick_float = ['FG%', 'XP%', 'TTL', 'Week_1', 'Week_2', 'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7', 'Week_8', 
              'Week_9', 'Week_10', 'Week_11', 'Week_12', 'Week_13', 'Week_14']


In [74]:
replace_dash(kicking_df, kick_int, dash)

In [75]:
replace_dash(kicking_df, kick_float, dash)

In [76]:
update_int_dtype(kicking_df, kick_int)

In [77]:
adjust_float(kicking_df, kick_float)

In [78]:
update_float_dtype(kicking_df, kick_float)

In [79]:
kicking_df.head()

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10,Week_11,Week_12,Week_13,Week_14
0,Younghoe Koo,K,ATL,12,34,97.1,54,0-0,8-8,10-10,8-9,7-7,28,89.3,124,146.0,9.0,17.0,8.0,0.0,13.0,20.0,2.0,13.0,12.0,0.0,13.0,21.0,12.0,6.0
1,Jason Sanders,K,MIA,13,32,93.8,56,1-1,7-7,4-4,10-12,8-8,30,100.0,120,146.0,4.0,11.0,7.0,18.0,22.0,6.0,0.0,4.0,14.0,14.0,10.0,12.0,14.0,10.0
2,Rodrigo Blankenship,K,IND,13,30,90.0,44,0-0,7-7,11-12,9-10,0-1,37,94.6,116,125.0,8.0,15.0,12.0,14.0,11.0,8.0,0.0,3.0,5.0,11.0,15.0,2.0,7.0,14.0
3,Daniel Carlson,K,LV,13,28,92.9,54,0-0,13-13,6-6,3-5,4-4,39,97.4,116,127.0,12.0,12.0,8.0,13.0,11.0,0.0,9.0,10.0,7.0,15.0,7.0,6.0,8.0,9.0
4,Tyler Bass,K,BUF,13,31,80.6,58,1-1,6-6,6-9,8-9,4-6,40,95.0,113,129.0,9.0,7.0,5.0,6.0,5.0,6.0,23.0,6.0,15.0,18.0,0.0,11.0,10.0,8.0


In [80]:
kicking_df.dtypes

PLAYER       object
POSITION     object
TEAM         object
GP            int64
FGM_A         int64
FG%         float64
LNG           int64
_1_19        object
_20_29       object
_30_39       object
_40_49       object
OVER50       object
XPM_A         int64
XP%         float64
PTS           int64
TTL         float64
Week_1      float64
Week_2      float64
Week_3      float64
Week_4      float64
Week_5      float64
Week_6      float64
Week_7      float64
Week_8      float64
Week_9      float64
Week_10     float64
Week_11     float64
Week_12     float64
Week_13     float64
Week_14     float64
dtype: object

## Pickle Cleaned DataFrames

In [81]:
offense.to_pickle('players')
team_names.to_pickle('long_names')
defense_df.to_pickle('defense')
kicking_df.to_pickle('kicking')