# Week 11 -- Data Cleaning

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import re
import warnings
warnings.simplefilter('ignore')

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.1f' % x)

## Functions

In [14]:
def check_names(df1, df2):
    """
    This funciton compares the list of names from one dataframe to another and updates where the names do not match 
    so they will merge correctly.
    
    Parameters:
        df1: first dataframe to compare to; this should be the dataframe on the left that you will join with when 
        they merge.
        df2: second dataframe, should be the dataframe on the right for the later merge.
        
    Returns:
        Updated name list for df2 that should match df1.
    """
    
    df_players = list(df1['Player'])    
    for i in range(len(df2['Player'])):
        name = df2['Player'][i]
        split_name = name.split(' ')
        if (split_name[0] + ' ' + split_name[1]) in df_players:
            df2['Player'][i] = split_name[0] + ' ' + split_name[1]
        else:
            df2['Player'][i] = name

In [15]:
def name_update(dataframe, column):
    """
    This function updates a column in a dataframe by stripping any excess spaces surrounding the observation.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column: the column you want to update.
    
    Returns:
        Updated dataframe column.    
    """
    dataframe[column] = dataframe[column].map(lambda x: x.strip())

In [16]:
def update_int_dtype(dataframe, column_list):
    """
    This function updates the data type for a list of columns to integer.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe column data types to integer.   
    """
    for column in column_list:
        dataframe[column] = dataframe[column].map(lambda x: int(x))

In [17]:
def update_float_dtype(dataframe, column_list):
    """
    This function takes a dataframe object and converts it to a float.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
    
    Returns:
        Updated dataframe column data types to a float.  
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == '':
                dataframe[column][i] = '0.0'
            else:
                dataframe[column][i] = item 
            updated_item = str(dataframe[column][i])
            ones, tenths = updated_item.split('.')
            ones = int(ones)
            tenths = int(tenths) * .1
            dataframe[column][i] = ones + tenths
        dataframe[column] = dataframe[column].map(lambda x: float(x))

In [18]:
def fill_blanks(dataframe, column_list):
    """
    This function fills blank values with 0. This should be used for a column that will be an integer.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.    
    
    Returns:
        Updated dataframe columns with filled values.      
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == '':
                dataframe[column][i] = 0
            else:
                dataframe[column][i] = item

In [19]:
def remove_comma(dataframe, column_list):
    """
    This function removes the comma from a value in a column.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe columns less any commas that might appear.    
    """  
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if len(item.split(',')) > 1:
                one, two = item.split(',')
                dataframe[column][i] = one + two

In [20]:
def adjust_float(dataframe, column_list):
    """
    This function adjusts values in a column to be workable with the 'update_float_dtype' function.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe columns with objects with a decimal point value behind it.    
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if len(item) < 3:
                dataframe[column][i] = item + '.0'
            else:
                dataframe[column][i] = item

In [21]:
def update_percent(dataframe, column):
    """
    This function updates a column with a percent sign so it can be manipulated into a float with the 
    'update_float_dtype' function.
    
    Parameters:
       dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe column with no percent sign and added decimal point where needed.     
    """
    dataframe[column] = dataframe[column].map(lambda x: x.strip('%'))
    for i in range(len(dataframe[column])):
        item = dataframe[column][i]
        if len(item) < 3:
            dataframe[column][i] = item + '.0'
        else:
            dataframe[column][i] = item

In [22]:
def replace_dash(dataframe, column_list, dash):
    """
    This function replaces a - in a dataframe so datatypes can then be updated.
    
    Parameters:
       dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        dash: set to a value in a specific column in the dataframe.
        
    Returns:
        Updated dataframe column with no percent sign and added decimal point where needed. 
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == dash:
                dataframe[column][i] = '0'
            else:
                dataframe[column][i] = item

## Read in Collected Data

In [23]:
df = pd.read_pickle('player_stats')
fantasy_pts = pd.read_pickle('fantasy_weeks')
defense = pd.read_pickle('defense_data')
kicking = pd.read_pickle('kicking_data')

## Merge Last Week Fantasy Pts with this Week

In [24]:
no_games = 10

In [25]:
lastwk = pd.read_pickle('fantweeks_1_' + str(no_games - 1))
lastwk.head()

Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9
0,Kyler Murray,ARI,QB,240.4,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9
1,Patrick Mahomes II,KC,QB,238.9,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9
2,Russell Wilson,SEA,QB,236.1,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1
3,Josh Allen,BUF,QB,220.6,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0
4,Aaron Rodgers,GB,QB,187.6,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9


In [26]:
fantasy_pts = pd.merge(fantasy_pts, lastwk, 'left', on='Player')
fantasy_pts.head()

Unnamed: 0,Player,Team_x,Position_x,TTL_x,Week_10,Team_y,Position_y,TTL_y,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9
0,Kyler Murray,ARI,QB,271.3,30.9,ARI,QB,240.4,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9
1,Josh Allen,BUF,QB,249.0,28.4,BUF,QB,220.6,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0
2,Russell Wilson,SEA,QB,248.0,11.9,SEA,QB,236.1,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1
3,Patrick Mahomes II,KC,QB,239.5,,KC,QB,238.9,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9
4,Aaron Rodgers,GB,QB,214.0,26.4,GB,QB,187.6,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9


In [27]:
fantasy_pts.drop(columns=['Team_y', 'Position_y', 'TTL_y'], inplace=True)
fantasy_pts.rename(columns = {'Team_x': 'Team', 'Position_x': 'Position', 'TTL_x': 'TTL'}, inplace = True)
fantasy_pts.columns

Index(['Player', 'Team', 'Position', 'TTL', 'Week_10', 'Week_1', 'Week_2',
       'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7', 'Week_8', 'Week_9'],
      dtype='object')

In [28]:
fantasy_pts = fantasy_pts[['Player', 'Team', 'Position', 'TTL', 'Week_1', 'Week_2', 'Week_3', 'Week_4', 'Week_5', 
                           'Week_6', 'Week_7', 'Week_8', 'Week_9', 'Week_10']]
fantasy_pts.head()

Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10
0,Kyler Murray,ARI,QB,271.3,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9
1,Josh Allen,BUF,QB,249.0,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4
2,Russell Wilson,SEA,QB,248.0,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9
3,Patrick Mahomes II,KC,QB,239.5,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,
4,Aaron Rodgers,GB,QB,214.0,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9,26.4


In [29]:
print(fantasy_pts.shape)

(658, 14)


### Pickle Combined DataFrame for Next Week

In [30]:
fantasy_pts.to_pickle('fantweeks_1_' + str(no_games))

## Merge Offensive Players & Weekly Fantasy Pts

In [31]:
print(df.shape)
df.head()

(569, 28)


Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank
0,Dalvin Cook,MIN,RB,25,8,8,0,0,0,0,0,174,954,5.48,12,25,20,189,9.45,1,2,1,13,3.0,,206.3,1,1
1,Alvin Kamara,NOR,RB,25,9,6,0,0,0,0,0,104,486,4.67,7,80,67,648,9.67,4,1,0,11,,,212.9,2,2
2,Kyler Murray,ARI,QB,23,9,9,212,311,2375,17,8,87,604,6.94,10,0,0,0,,0,4,2,10,,,271.4,1,3
3,Derrick Henry,TEN,RB,26,9,9,0,0,0,0,0,201,946,4.71,8,20,11,87,7.91,0,0,0,8,,,156.8,3,4
4,Josh Allen,BUF,QB,24,10,10,249,364,2871,21,7,72,279,3.88,5,1,1,12,12.0,1,4,4,6,,,249.4,2,5


In [32]:
print(fantasy_pts.shape)
fantasy_pts.head()

(658, 14)


Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10
0,Kyler Murray,ARI,QB,271.3,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9
1,Josh Allen,BUF,QB,249.0,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4
2,Russell Wilson,SEA,QB,248.0,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9
3,Patrick Mahomes II,KC,QB,239.5,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,
4,Aaron Rodgers,GB,QB,214.0,30.8,18.2,24.5,29.6,,5.8,27.3,22.5,28.9,26.4


In [33]:
check_names(df, fantasy_pts)

In [34]:
name_update(df, 'Player')
name_update(fantasy_pts, 'Player')

In [35]:
offense = pd.merge(df, fantasy_pts, 'left', on='Player')

In [36]:
offense.drop(columns = ['Team_y', 'Position_y'], inplace = True)
offense.rename(columns = {'Team_x': 'Team', 'Position_x': 'Position'}, inplace = True)

In [37]:
print(offense.shape)
offense.head(10)

(569, 39)


Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10
0,Dalvin Cook,MIN,RB,25,8,8,0,0,0,0,0,174,954,5.48,12,25,20,189,9.45,1,2,1,13,3.0,,206.3,1,1,196.3,20.8,15.1,23.9,26.6,14.9,,,46.6,37.2,11.2
1,Alvin Kamara,NOR,RB,25,9,6,0,0,0,0,0,104,486,4.67,7,80,67,648,9.67,4,1,0,11,,,212.9,2,2,179.4,18.7,29.4,31.7,17.9,11.9,,14.8,16.3,10.9,27.8
2,Kyler Murray,ARI,QB,23,9,9,212,311,2375,17,8,87,604,6.94,10,0,0,0,,0,4,2,10,,,271.4,1,3,271.3,27.3,33.1,24.7,23.1,27.3,28.9,38.1,,37.9,30.9
3,Derrick Henry,TEN,RB,26,9,9,0,0,0,0,0,201,946,4.71,8,20,11,87,7.91,0,0,0,8,,,156.8,3,4,151.3,13.1,8.4,25.0,,18.3,38.4,13.2,17.2,6.8,10.9
4,Josh Allen,BUF,QB,24,10,10,249,364,2871,21,7,72,279,3.88,5,1,1,12,12.0,1,4,4,6,,,249.4,2,5,249.0,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4
5,Patrick Mahomes,KAN,QB,25,9,9,220,329,2687,25,1,35,171,4.89,2,0,0,0,,0,1,0,2,,2.0,239.6,3,6,239.5,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6,30.9,
6,Russell Wilson,SEA,QB,32,9,9,233,334,2789,28,10,45,325,7.22,1,0,0,0,,0,4,3,1,,1.0,248.1,4,7,248.0,31.8,34.4,36.8,21.9,25.5,,32.9,28.7,24.1,11.9
7,Travis Kelce,KAN,TE,31,9,9,0,0,0,0,0,0,0,,0,80,58,769,13.26,6,1,1,6,,,139.9,1,8,110.9,11.0,15.0,8.7,7.0,16.8,16.5,3.1,16.9,15.9,
8,Tyreek Hill,KAN,WR,26,9,9,0,0,0,0,0,8,62,7.75,1,72,44,650,14.77,9,1,0,10,,,153.2,1,9,131.2,10.6,16.8,16.2,12.4,15.3,2.5,11.5,21.8,24.1,
9,D.K. Metcalf,SEA,WR,23,9,9,0,0,0,0,0,0,0,,0,72,45,816,18.13,8,1,1,8,,,150.1,2,10,127.6,15.5,15.2,15.0,10.6,21.3,,2.3,28.1,16.8,2.8


In [38]:
offense.isnull().sum()

Player                     0
Team                       0
Position                   0
Age                        0
Games                      0
GamesStarted               0
CompletedPasses            0
PassesAttempted            0
PassingYds                 0
PassingTDs                 0
Interceptions              0
RushingAttempts            0
RushingYds                 0
RushingYdspAtt             0
RushingTDs                 0
Targeted                   0
Receptions                 0
ReceivingYds               0
YdspReception              0
ReceivingTDs               0
Fumbles                    0
LostFumbles                0
TtlTDs                     0
TwoPTConversions           0
TwoPTConversionPasses      0
FDFantasyPts               0
PositionRank               0
OverallRank                0
TTL                       22
Week_1                   162
Week_2                   164
Week_3                   165
Week_4                   191
Week_5                   221
Week_6        

In [39]:
offense.fillna('0.0', inplace = True)

In [42]:
off_integers = ['Age', 'Games', 'GamesStarted', 'CompletedPasses', 'PassesAttempted', 'PassingYds', 'PassingTDs', 
            'Interceptions', 'RushingAttempts', 'RushingYds', 'RushingTDs', 'Targeted', 'Receptions', 
            'ReceivingYds', 'ReceivingTDs', 'Fumbles', 'LostFumbles', 'TtlTDs']

off_floats = ['RushingYdspAtt', 'YdspReception', 'FDFantasyPts', 'TTL','Week_1', 'Week_2', 'Week_3', 'Week_4', 
              'Week_5', 'Week_6', 'Week_7', 'Week_8', 'Week_9', 'Week_10']

In [41]:
update_int_dtype(offense, off_integers)

In [43]:
two_pts = ['TwoPTConversions', 'TwoPTConversionPasses']
fill_blanks(offense, two_pts)
update_int_dtype(offense, two_pts)

In [44]:
update_float_dtype(offense, off_floats)

In [45]:
offense.head()

Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10
0,Dalvin Cook,MIN,RB,25,8,8,0,0,0,0,0,174,954,9.8,12,25,20,189,13.5,1,2,1,13,3,0,206.3,1,1,196.3,20.8,15.1,23.9,26.6,14.9,0.0,0.0,46.6,37.2,11.2
1,Alvin Kamara,NOR,RB,25,9,6,0,0,0,0,0,104,486,10.7,7,80,67,648,15.7,4,1,0,11,0,0,212.9,2,2,179.4,18.7,29.4,31.7,17.9,11.9,0.0,14.8,16.3,10.9,27.8
2,Kyler Murray,ARI,QB,23,9,9,212,311,2375,17,8,87,604,15.4,10,0,0,0,0.0,0,4,2,10,0,0,271.4,1,3,271.3,27.3,33.1,24.7,23.1,27.3,28.9,38.1,0.0,37.9,30.9
3,Derrick Henry,TEN,RB,26,9,9,0,0,0,0,0,201,946,11.1,8,20,11,87,16.1,0,0,0,8,0,0,156.8,3,4,151.3,13.1,8.4,25.0,0.0,18.3,38.4,13.2,17.2,6.8,10.9
4,Josh Allen,BUF,QB,24,10,10,249,364,2871,21,7,72,279,11.8,5,1,1,12,12.0,1,4,4,6,0,0,249.4,2,5,249.0,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5,36.0,28.4


In [46]:
team_dict = {'LAR': 'Los Angeles Rams', 
             'SEA': 'Seattle Seahawks', 
             'BAL': 'Baltimore Ravens', 
             'KAN': 'Kansas City Chiefs', 
             'JAX': 'Jacksonville Jaguars', 
             'DET': 'Detroit Lions', 
             'PIT': 'Pittsburgh Steelers', 
             'MIN': 'Minnesota Vikings', 
             'CHI': 'Chicago Bears', 
             'ATL': 'Atlanta Falcons', 
             'BUF': 'Buffalo Bills', 
             'GNB': 'Green Bay Packers', 
             'MIA': 'Miami Dolphins', 
             'CLE': 'Cleveland Browns', 
             'CAR': 'Carolina Panthers', 
             'DAL': 'Dallas Cowboys', 
             'ARI': 'Arizona Cardinals', 
             'HOU': 'Houston Texans', 
             'NYG': 'New York Giants', 
             'WAS': 'Washington Football Team', 
             'IND': 'Indianapolis Colts', 
             'LVR': 'Las Vegas Raiders', 
             'DEN': 'Denver Broncos', 
             'TEN': 'Tennessee Titans', 
             'NWE': 'New England Patriots', 
             'CIN': 'Cincinnati Bengals', 
             'NYJ': 'New York Jets', 
             'LAC': 'Los Angeles Chargers', 
             'PHI': 'Philadelphia Eagles', 
             'TAM': 'Tampa Bay Buccaneers', 
             'NOR': 'New Orleans Saints', 
             'SFO': 'San Francisco 49ers'}

team_names = pd.DataFrame()
team_names['Team'] = list(team_dict.keys())
team_names['Long_Name'] = list(team_dict.values())
team_names.head()

Unnamed: 0,Team,Long_Name
0,LAR,Los Angeles Rams
1,SEA,Seattle Seahawks
2,BAL,Baltimore Ravens
3,KAN,Kansas City Chiefs
4,JAX,Jacksonville Jaguars


In [47]:
offense.dtypes

Player                    object
Team                      object
Position                  object
Age                        int64
Games                      int64
GamesStarted               int64
CompletedPasses            int64
PassesAttempted            int64
PassingYds                 int64
PassingTDs                 int64
Interceptions              int64
RushingAttempts            int64
RushingYds                 int64
RushingYdspAtt           float64
RushingTDs                 int64
Targeted                   int64
Receptions                 int64
ReceivingYds               int64
YdspReception            float64
ReceivingTDs               int64
Fumbles                    int64
LostFumbles                int64
TtlTDs                     int64
TwoPTConversions           int64
TwoPTConversionPasses      int64
FDFantasyPts             float64
PositionRank              object
OverallRank               object
TTL                      float64
Week_1                   float64
Week_2    

## Defense Stats

In [48]:
defense.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks
0,Baltimore Ravens,9,165,570,5.1,2907,219,987,41.5,7,326,5.9,169,9.0,1920,12,21,16,70.4%,14,4,10,25
1,Los Angeles Rams,9,168,558,4.7,2668,214,871,36.1,7,313,5.6,193,8.7,1797,9,26,15,61.1%,13,8,5,31
2,Pittsburgh Steelers,9,171,565,5.1,2863,221,961,37.0,5,308,6.3,145,11.2,1902,15,24,14,63.1%,17,11,6,36
3,Indianapolis Colts,9,177,541,4.8,2614,234,826,30.9,7,286,6.2,142,9.8,1788,11,25,16,64.9%,13,11,2,21
4,Miami Dolphins,9,182,594,5.8,3347,244,1153,42.9,11,328,7.0,168,10.5,2194,13,28,20,69.6%,15,8,7,22


In [49]:
def_fantasy = fantasy_pts[fantasy_pts['Position'] == 'DST']

In [50]:
defense_df = pd.merge(defense, def_fantasy, 'left', left_on = 'Team', right_on = 'Player')

In [51]:
defense_df.head()

Unnamed: 0,Team_x,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,Player,Team_y,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10
0,Baltimore Ravens,9,165,570,5.1,2907,219,987,41.5,7,326,5.9,169,9.0,1920,12,21,16,70.4%,14,4,10,25,Baltimore Ravens,BAL,DST,95.0,15.0,15.0,1.0,6.0,26.0,7.0,,4.0,14.0,1.0
1,Los Angeles Rams,9,168,558,4.7,2668,214,871,36.1,7,313,5.6,193,8.7,1797,9,26,15,61.1%,13,8,5,31,Los Angeles Rams,LAR,DST,71.0,4.0,7.0,4.0,11.0,12.0,0.0,15.0,5.0,,13.0
2,Pittsburgh Steelers,9,171,565,5.1,2863,221,961,37.0,5,308,6.3,145,11.2,1902,15,24,14,63.1%,17,11,6,36,Pittsburgh Steelers,PIT,DST,93.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0,7.0,12.0
3,Indianapolis Colts,9,177,541,4.8,2614,234,826,30.9,7,286,6.2,142,9.8,1788,11,25,16,64.9%,13,11,2,21,Indianapolis Colts,IND,DST,98.0,4.0,15.0,26.0,7.0,5.0,4.0,,15.0,5.0,10.0
4,Miami Dolphins,9,182,594,5.8,3347,244,1153,42.9,11,328,7.0,168,10.5,2194,13,28,20,69.6%,15,8,7,22,Miami Dolphins,MIA,DST,81.0,2.0,2.0,12.0,3.0,12.0,15.0,,17.0,8.0,4.0


In [52]:
defense_df.drop(columns=['Player', 'Team_y', 'Position'], inplace=True)
defense_df.rename(columns={'Team_x': 'Team'}, inplace=True)

In [53]:
defense_df.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10
0,Baltimore Ravens,9,165,570,5.1,2907,219,987,41.5,7,326,5.9,169,9.0,1920,12,21,16,70.4%,14,4,10,25,95.0,15.0,15.0,1.0,6.0,26.0,7.0,,4.0,14.0,1.0
1,Los Angeles Rams,9,168,558,4.7,2668,214,871,36.1,7,313,5.6,193,8.7,1797,9,26,15,61.1%,13,8,5,31,71.0,4.0,7.0,4.0,11.0,12.0,0.0,15.0,5.0,,13.0
2,Pittsburgh Steelers,9,171,565,5.1,2863,221,961,37.0,5,308,6.3,145,11.2,1902,15,24,14,63.1%,17,11,6,36,93.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0,7.0,12.0
3,Indianapolis Colts,9,177,541,4.8,2614,234,826,30.9,7,286,6.2,142,9.8,1788,11,25,16,64.9%,13,11,2,21,98.0,4.0,15.0,26.0,7.0,5.0,4.0,,15.0,5.0,10.0
4,Miami Dolphins,9,182,594,5.8,3347,244,1153,42.9,11,328,7.0,168,10.5,2194,13,28,20,69.6%,15,8,7,22,81.0,2.0,2.0,12.0,3.0,12.0,15.0,,17.0,8.0,4.0


In [54]:
defense_df.isnull().sum()

Team                         0
GP                           0
Ttl_Pts_Allowed              0
Ttl_Offense_Plays_Allowed    0
Yds_p_Play                   0
Ttl_Yds                      0
Rushing_Att                  0
Rushing_Yds                  0
Rushing_Yds_p_Att            0
Rushing_TDs                  0
Passing_Att                  0
Passing_Yds_p_Att            0
Completions                  0
Yds_p_Completion             0
Passing_Yds                  0
Passing_TDs                  0
RZ_Att                       0
RZ_TD                        0
RZ_Percent                   0
Ttl_Turnovers                0
Interceptions                0
Fumbles                      0
Sacks                        0
TTL                          0
Week_1                       0
Week_2                       0
Week_3                       0
Week_4                       0
Week_5                       4
Week_6                       4
Week_7                       4
Week_8                       4
Week_9  

In [55]:
defense_df

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10
0,Baltimore Ravens,9,165,570,5.1,2907,219,987,41.5,7,326,5.9,169,9.0,1920,12,21,16,70.4%,14,4,10,25,95.0,15.0,15.0,1.0,6.0,26.0,7.0,,4.0,14.0,1.0
1,Los Angeles Rams,9,168,558,4.7,2668,214,871,36.1,7,313,5.6,193,8.7,1797,9,26,15,61.1%,13,8,5,31,71.0,4.0,7.0,4.0,11.0,12.0,0.0,15.0,5.0,,13.0
2,Pittsburgh Steelers,9,171,565,5.1,2863,221,961,37.0,5,308,6.3,145,11.2,1902,15,24,14,63.1%,17,11,6,36,93.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0,7.0,12.0
3,Indianapolis Colts,9,177,541,4.8,2614,234,826,30.9,7,286,6.2,142,9.8,1788,11,25,16,64.9%,13,11,2,21,98.0,4.0,15.0,26.0,7.0,5.0,4.0,,15.0,5.0,10.0
4,Miami Dolphins,9,182,594,5.8,3347,244,1153,42.9,11,328,7.0,168,10.5,2194,13,28,20,69.6%,15,8,7,22,81.0,2.0,2.0,12.0,3.0,12.0,15.0,,17.0,8.0,4.0
5,Kansas City Chiefs,9,183,589,5.4,3187,262,1246,43.6,9,308,6.2,150,9.9,1941,11,24,18,69.4%,14,9,5,19,73.0,7.0,5.0,7.0,20.0,-1.0,3.0,18.0,7.0,1.0,
6,Chicago Bears,10,209,658,5.1,3401,287,1152,41.5,6,350,6.5,189,10.8,2249,12,34,15,48.1%,11,6,5,21,64.0,3.0,12.0,4.0,2.0,6.0,11.0,9.0,1.0,4.0,12.0
7,Arizona Cardinals,9,210,609,5.5,3330,238,1082,40.5,5,349,6.6,209,10.2,2248,17,31,16,56%,12,8,4,22,52.0,4.0,9.0,4.0,1.0,7.0,15.0,7.0,,2.0,3.0
8,New England Patriots,9,211,520,6.1,3180,262,1163,39.4,8,245,8.3,143,12.5,2017,14,28,17,58.8%,15,11,4,13,58.0,11.0,6.0,15.0,4.0,,7.0,4.0,3.0,3.0,5.0
9,New Orleans Saints,9,213,549,5.2,2774,208,691,28.4,5,317,6.7,171,10.2,2083,20,28,21,68.7%,13,9,4,24,61.0,15.0,4.0,-3.0,4.0,3.0,,1.0,7.0,16.0,14.0


In [56]:
defense_df.fillna('0.0', inplace=True)

In [57]:
def_int = ['GP', 'Ttl_Pts_Allowed', 'Ttl_Offense_Plays_Allowed', 'Ttl_Yds', 'Rushing_Att', 'Rushing_Yds', 
           'Rushing_TDs', 'Passing_Att', 'Completions', 'Passing_Yds', 'Passing_TDs', 'RZ_Att', 'RZ_TD', 
           'Ttl_Turnovers', 'Interceptions', 'Fumbles', 'Sacks']

def_floats = ['Yds_p_Play', 'TTL', 'Week_1', 
              'Week_2', 'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7', 'Week_8', 'Week_9', 'Week_10']

# 'Rushing_Yds_p_Att', 'Passing_Yds_p_Att', 'Yds_p_Completion',

In [58]:
remove_comma(defense_df, def_int)

In [59]:
update_int_dtype(defense_df, def_int)

In [60]:
adjust_float(defense_df, def_floats)

In [61]:
defense_df['Rushing_Yds_p_Att'] = round(defense_df['Rushing_Yds']/defense_df['Rushing_Att'], 2)
defense_df['Passing_Yds_p_Att'] = round(defense_df['Passing_Yds']/defense_df['Passing_Att'], 2)
defense_df['Yds_p_Completion'] = round(defense_df['Passing_Yds']/defense_df['Completions'], 2)

In [62]:
update_float_dtype(defense_df, def_floats)

In [63]:
update_percent(defense_df, 'RZ_Percent')

In [64]:
rz = ['RZ_Percent']
update_float_dtype(defense_df, rz)

In [65]:
defense_df.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10
0,Baltimore Ravens,9,165,570,5.1,2907,219,987,4.5,7,326,5.9,169,11.4,1920,12,21,16,70.4,14,4,10,25,95.0,15.0,15.0,1.0,6.0,26.0,7.0,0.0,4.0,14.0,1.0
1,Los Angeles Rams,9,168,558,4.7,2668,214,871,4.1,7,313,5.7,193,9.3,1797,9,26,15,61.1,13,8,5,31,71.0,4.0,7.0,4.0,11.0,12.0,0.0,15.0,5.0,0.0,13.0
2,Pittsburgh Steelers,9,171,565,5.1,2863,221,961,4.3,5,308,6.2,145,13.1,1902,15,24,14,63.1,17,11,6,36,93.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0,7.0,12.0
3,Indianapolis Colts,9,177,541,4.8,2614,234,826,3.5,7,286,6.2,142,12.6,1788,11,25,16,64.9,13,11,2,21,98.0,4.0,15.0,26.0,7.0,5.0,4.0,0.0,15.0,5.0,10.0
4,Miami Dolphins,9,182,594,5.8,3347,244,1153,4.7,11,328,6.7,168,13.1,2194,13,28,20,69.6,15,8,7,22,81.0,2.0,2.0,12.0,3.0,12.0,15.0,0.0,17.0,8.0,4.0


In [66]:
defense_df.dtypes

Team                          object
GP                             int64
Ttl_Pts_Allowed                int64
Ttl_Offense_Plays_Allowed      int64
Yds_p_Play                   float64
Ttl_Yds                        int64
Rushing_Att                    int64
Rushing_Yds                    int64
Rushing_Yds_p_Att            float64
Rushing_TDs                    int64
Passing_Att                    int64
Passing_Yds_p_Att            float64
Completions                    int64
Yds_p_Completion             float64
Passing_Yds                    int64
Passing_TDs                    int64
RZ_Att                         int64
RZ_TD                          int64
RZ_Percent                   float64
Ttl_Turnovers                  int64
Interceptions                  int64
Fumbles                        int64
Sacks                          int64
TTL                          float64
Week_1                       float64
Week_2                       float64
Week_3                       float64
W

## Kicking Stats

In [80]:
kicking.head()

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS
0,Ryan Succop,K,TB,10,22,90.9,50,0-0,6-6,7-7,6-7,1-2,32,93.8,90
1,Daniel Carlson,K,LV,9,22,90.9,54,0-0,9-9,5-5,2-4,4-4,28,96.4,87
2,Tyler Bass,K,BUF,10,25,76.0,58,1-1,4-4,4-7,6-7,4-6,30,96.7,86
3,Wil Lutz,K,NO,9,19,94.7,53,0-0,4-5,8-8,5-5,1-1,31,100.0,85
4,Jason Sanders,K,MIA,9,21,95.2,56,0-0,5-5,3-3,7-8,5-5,23,100.0,83


In [81]:
kicking_df = pd.merge(kicking, fantasy_pts, left_on = 'PLAYER', right_on = 'Player')
kicking_df.drop(columns = ['Player', 'Team', 'Position'], inplace = True)
kicking_df

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10
0,Ryan Succop,K,TB,10,22,90.9,50,0-0,6-6,7-7,6-7,1-2,32,93.8,90,98.0,5.0,7.0,9.0,8.0,14.0,10.0,9.0,15.0,4.0,17.0
1,Daniel Carlson,K,LV,9,22,90.9,54,0-0,9-9,5-5,2-4,4-4,28,96.4,87,97.0,12.0,12.0,8.0,13.0,11.0,,9.0,10.0,7.0,15.0
2,Tyler Bass,K,BUF,10,25,76.0,58,1-1,4-4,4-7,6-7,4-6,30,96.7,86,100.0,9.0,7.0,5.0,6.0,5.0,6.0,23.0,6.0,15.0,18.0
3,Wil Lutz,K,NO,9,19,94.7,53,0-0,4-5,8-8,5-5,1-1,31,100.0,85,92.0,10.0,6.0,13.0,5.0,15.0,,11.0,14.0,8.0,10.0
4,Jason Sanders,K,MIA,9,21,95.2,56,0-0,5-5,3-3,7-8,5-5,23,100.0,83,100.0,4.0,11.0,7.0,18.0,22.0,6.0,,4.0,14.0,14.0
5,Younghoe Koo,K,ATL,8,22,95.5,54,0-0,5-5,6-6,7-8,3-3,21,85.7,81,94.0,9.0,17.0,8.0,,13.0,20.0,2.0,13.0,12.0,
6,Joey Slye,K,CAR,10,25,80.0,48,0-0,8-8,6-6,6-7,0-4,23,91.3,81,87.0,12.0,5.0,15.0,7.0,11.0,11.0,7.0,5.0,8.0,6.0
7,Rodrigo Blankenship,K,IND,9,21,90.5,44,0-0,5-5,7-8,7-8,0-0,25,92.0,80,87.0,8.0,15.0,12.0,14.0,11.0,8.0,,3.0,5.0,11.0
8,Graham Gano,K,NYG,10,22,95.5,55,0-0,4-4,8-8,5-5,4-5,16,100.0,79,92.0,4.0,7.0,13.0,9.0,20.0,8.0,3.0,5.0,13.0,10.0
9,Randy Bullock,K,CIN,9,21,90.5,55,0-0,3-3,8-9,6-7,2-2,21,100.0,78,88.0,10.0,13.0,12.0,17.0,3.0,12.0,10.0,7.0,,4.0


In [82]:
dash = kicking_df['LNG'][38]
dash

'—'

In [83]:
kicking_df.fillna('0.0', inplace = True)

In [84]:
kick_int = ['GP', 'FGM_A', 'LNG', 'XPM_A', 'PTS']

kick_float = ['FG%', 'XP%', 'TTL', 'Week_1', 'Week_2', 'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7', 'Week_8', 
              'Week_9', 'Week_10']


In [85]:
replace_dash(kicking_df, kick_int, dash)

In [86]:
replace_dash(kicking_df, kick_float, dash)

In [87]:
update_int_dtype(kicking_df, kick_int)

In [88]:
adjust_float(kicking_df, kick_float)

In [89]:
update_float_dtype(kicking_df, kick_float)

In [90]:
kicking_df.head()

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8,Week_9,Week_10
0,Ryan Succop,K,TB,10,22,90.9,50,0-0,6-6,7-7,6-7,1-2,32,93.8,90,98.0,5.0,7.0,9.0,8.0,14.0,10.0,9.0,15.0,4.0,17.0
1,Daniel Carlson,K,LV,9,22,90.9,54,0-0,9-9,5-5,2-4,4-4,28,96.4,87,97.0,12.0,12.0,8.0,13.0,11.0,0.0,9.0,10.0,7.0,15.0
2,Tyler Bass,K,BUF,10,25,76.0,58,1-1,4-4,4-7,6-7,4-6,30,96.7,86,100.0,9.0,7.0,5.0,6.0,5.0,6.0,23.0,6.0,15.0,18.0
3,Wil Lutz,K,NO,9,19,94.7,53,0-0,4-5,8-8,5-5,1-1,31,100.0,85,92.0,10.0,6.0,13.0,5.0,15.0,0.0,11.0,14.0,8.0,10.0
4,Jason Sanders,K,MIA,9,21,95.2,56,0-0,5-5,3-3,7-8,5-5,23,100.0,83,100.0,4.0,11.0,7.0,18.0,22.0,6.0,0.0,4.0,14.0,14.0


In [91]:
kicking_df.dtypes

PLAYER       object
POSITION     object
TEAM         object
GP            int64
FGM_A         int64
FG%         float64
LNG           int64
_1_19        object
_20_29       object
_30_39       object
_40_49       object
OVER50       object
XPM_A         int64
XP%         float64
PTS           int64
TTL         float64
Week_1      float64
Week_2      float64
Week_3      float64
Week_4      float64
Week_5      float64
Week_6      float64
Week_7      float64
Week_8      float64
Week_9      float64
Week_10     float64
dtype: object

## Pickle Cleaned DataFrames

In [92]:
offense.to_pickle('players')
team_names.to_pickle('long_names')
defense_df.to_pickle('defense')
kicking_df.to_pickle('kicking')