# Week 9 -- Data Cleaning

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import re
import warnings
warnings.simplefilter('ignore')

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.1f' % x)

## Functions

In [3]:
def check_names(df1, df2):
    """
    This funciton compares the list of names from one dataframe to another and updates where the names do not match 
    so they will merge correctly.
    
    Parameters:
        df1: first dataframe to compare to; this should be the dataframe on the left that you will join with when 
        they merge.
        df2: second dataframe, should be the dataframe on the right for the later merge.
        
    Returns:
        Updated name list for df2 that should match df1.
    """
    
    df_players = list(df1['Player'])    
    for i in range(len(df2['Player'])):
        name = df2['Player'][i]
        split_name = name.split(' ')
        if (split_name[0] + ' ' + split_name[1]) in df_players:
            df2['Player'][i] = split_name[0] + ' ' + split_name[1]
        else:
            df2['Player'][i] = name

In [4]:
def name_update(dataframe, column):
    """
    This function updates a column in a dataframe by stripping any excess spaces surrounding the observation.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column: the column you want to update.
    
    Returns:
        Updated dataframe column.    
    """
    dataframe[column] = dataframe[column].map(lambda x: x.strip())

In [5]:
def update_int_dtype(dataframe, column_list):
    """
    This function updates the data type for a list of columns to integer.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe column data types to integer.   
    """
    for column in column_list:
        dataframe[column] = dataframe[column].map(lambda x: int(x))

In [6]:
def update_float_dtype(dataframe, column_list):
    """
    This function takes a dataframe object and converts it to a float.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
    
    Returns:
        Updated dataframe column data types to a float.  
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == '':
                dataframe[column][i] = '0.0'
            else:
                dataframe[column][i] = item 
            updated_item = str(dataframe[column][i])
            ones, tenths = updated_item.split('.')
            ones = int(ones)
            tenths = int(tenths) * .1
            dataframe[column][i] = ones + tenths
        dataframe[column] = dataframe[column].map(lambda x: float(x))

In [7]:
def fill_blanks(dataframe, column_list):
    """
    This function fills blank values with 0. This should be used for a column that will be an integer.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.    
    
    Returns:
        Updated dataframe columns with filled values.      
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == '':
                dataframe[column][i] = 0
            else:
                dataframe[column][i] = item

In [8]:
def remove_comma(dataframe, column_list):
    """
    This function removes the comma from a value in a column.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe columns less any commas that might appear.    
    """  
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if len(item.split(',')) > 1:
                one, two = item.split(',')
                dataframe[column][i] = one + two

In [9]:
def adjust_float(dataframe, column_list):
    """
    This function adjusts values in a column to be workable with the 'update_float_dtype' function.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe columns with objects with a decimal point value behind it.    
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if len(item) < 3:
                dataframe[column][i] = item + '.0'
            else:
                dataframe[column][i] = item

In [10]:
def update_percent(dataframe, column):
    """
    This function updates a column with a percent sign so it can be manipulated into a float with the 
    'update_float_dtype' function.
    
    Parameters:
       dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe column with no percent sign and added decimal point where needed.     
    """
    dataframe[column] = dataframe[column].map(lambda x: x.strip('%'))
    for i in range(len(dataframe[column])):
        item = dataframe[column][i]
        if len(item) < 3:
            dataframe[column][i] = item + '.0'
        else:
            dataframe[column][i] = item

In [11]:
def replace_dash(dataframe, column_list, dash):
    """
    This function replaces a - in a dataframe so datatypes can then be updated.
    
    Parameters:
       dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        dash: set to a value in a specific column in the dataframe.
        
    Returns:
        Updated dataframe column with no percent sign and added decimal point where needed. 
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == dash:
                dataframe[column][i] = '0'
            else:
                dataframe[column][i] = item

## Read in Collected Data

In [12]:
df = pd.read_pickle('player_stats')
fantasy_pts = pd.read_pickle('fantasy_weeks')
defense = pd.read_pickle('defense_data')
kicking = pd.read_pickle('kicking_data')

## Merge Last Week Fantasy Pts with this Week

In [13]:
no_games = 8

In [14]:
lastwk = pd.read_pickle('fantweeks_1_' + str(no_games - 1))
lastwk.head()

Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7
0,Kyler Murray,ARI,QB,202.5,27.3,33.1,24.7,23.1,27.3,28.9,38.1
1,Russell Wilson,SEA,QB,183.3,31.8,34.4,36.8,21.9,25.5,,32.9
2,Patrick Mahomes II,KC,QB,171.4,20.4,27.5,40.0,20.2,30.7,20.6,12.0
3,Josh Allen,BUF,QB,171.1,28.2,34.5,32.2,25.4,18.3,16.1,16.4
4,Deshaun Watson,HOU,QB,159.6,21.8,15.7,18.1,20.9,26.9,32.0,24.2


In [15]:
fantasy_pts = pd.merge(fantasy_pts, lastwk, 'left', on='Player')
fantasy_pts.head()

Unnamed: 0,Player,Team_x,Position_x,TTL_x,Week_8,Team_y,Position_y,TTL_y,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7
0,Russell Wilson,SEA,QB,212.0,28.7,SEA,QB,183.3,31.8,34.4,36.8,21.9,25.5,,32.9
1,Patrick Mahomes II,KC,QB,208.0,36.6,KC,QB,171.4,20.4,27.5,40.0,20.2,30.7,20.6,12.0
2,Kyler Murray,ARI,QB,202.5,,ARI,QB,202.5,27.3,33.1,24.7,23.1,27.3,28.9,38.1
3,Josh Allen,BUF,QB,184.6,13.5,BUF,QB,171.1,28.2,34.5,32.2,25.4,18.3,16.1,16.4
4,Tom Brady,TB,QB,174.3,19.1,TB,QB,155.2,22.5,9.7,23.9,33.5,14.1,14.6,36.9


In [16]:
fantasy_pts.drop(columns=['Team_y', 'Position_y', 'TTL_y'], inplace=True)
fantasy_pts.rename(columns = {'Team_x': 'Team', 'Position_x': 'Position', 'TTL_x': 'TTL'}, inplace = True)
fantasy_pts.columns

Index(['Player', 'Team', 'Position', 'TTL', 'Week_8', 'Week_1', 'Week_2',
       'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7'],
      dtype='object')

In [17]:
fantasy_pts = fantasy_pts[['Player', 'Team', 'Position', 'TTL', 'Week_1', 'Week_2', 'Week_3', 'Week_4', 'Week_5', 
                           'Week_6', 'Week_7', 'Week_8']]
fantasy_pts.head()

Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8
0,Russell Wilson,SEA,QB,212.0,31.8,34.4,36.8,21.9,25.5,,32.9,28.7
1,Patrick Mahomes II,KC,QB,208.0,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6
2,Kyler Murray,ARI,QB,202.5,27.3,33.1,24.7,23.1,27.3,28.9,38.1,
3,Josh Allen,BUF,QB,184.6,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5
4,Tom Brady,TB,QB,174.3,22.5,9.7,23.9,33.5,14.1,14.6,36.9,19.1


In [18]:
print(fantasy_pts.shape)

(642, 12)


### Pickle Combined DataFrame for Next Week

In [19]:
fantasy_pts.to_pickle('fantweeks_1_' + str(no_games))

## Merge Offensive Players & Weekly Fantasy Pts

In [20]:
print(df.shape)
df.head()

(542, 28)


Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank
0,Dalvin Cook,MIN,RB,25,6,6,0,0,0,0,0,122,652,5.34,10,19,14,127,9.07,1,1,1,11,3.0,,154.9,1,1
1,Alvin Kamara,NOR,RB,25,7,5,0,0,0,0,0,87,431,4.95,4,66,55,556,10.11,3,1,0,7,,,168.2,2,2
2,Derrick Henry,TEN,RB,26,7,7,0,0,0,0,0,161,775,4.81,8,18,10,81,8.1,0,0,0,8,,,138.6,3,3
3,Patrick Mahomes,KAN,QB,25,8,8,190,284,2315,21,1,34,165,4.85,2,0,0,0,,0,1,0,2,,2.0,208.1,1,4
4,Russell Wilson,SEA,QB,32,7,7,183,256,2151,26,6,35,260,7.43,0,0,0,0,,0,1,0,0,,1.0,212.0,2,5


In [21]:
print(fantasy_pts.shape)
fantasy_pts.head()

(642, 12)


Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8
0,Russell Wilson,SEA,QB,212.0,31.8,34.4,36.8,21.9,25.5,,32.9,28.7
1,Patrick Mahomes II,KC,QB,208.0,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6
2,Kyler Murray,ARI,QB,202.5,27.3,33.1,24.7,23.1,27.3,28.9,38.1,
3,Josh Allen,BUF,QB,184.6,28.2,34.5,32.2,25.4,18.3,16.1,16.4,13.5
4,Tom Brady,TB,QB,174.3,22.5,9.7,23.9,33.5,14.1,14.6,36.9,19.1


In [22]:
check_names(df, fantasy_pts)

In [23]:
name_update(df, 'Player')
name_update(fantasy_pts, 'Player')

In [24]:
offense = pd.merge(df, fantasy_pts, 'left', on='Player')

In [25]:
offense.drop(columns = ['Team_y', 'Position_y'], inplace = True)
offense.rename(columns = {'Team_x': 'Team', 'Position_x': 'Position'}, inplace = True)

In [26]:
print(offense.shape)
offense.head(10)

(542, 37)


Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8
0,Dalvin Cook,MIN,RB,25,6,6,0,0,0,0,0,122,652,5.34,10,19,14,127,9.07,1,1,1,11,3.0,,154.9,1,1,147.9,20.8,15.1,23.9,26.6,14.9,,,46.6
1,Alvin Kamara,NOR,RB,25,7,5,0,0,0,0,0,87,431,4.95,4,66,55,556,10.11,3,1,0,7,,,168.2,2,2,140.7,18.7,29.4,31.7,17.9,11.9,,14.8,16.3
2,Derrick Henry,TEN,RB,26,7,7,0,0,0,0,0,161,775,4.81,8,18,10,81,8.1,0,0,0,8,,,138.6,3,3,133.6,13.1,8.4,25.0,,18.3,38.4,13.2,17.2
3,Patrick Mahomes,KAN,QB,25,8,8,190,284,2315,21,1,34,165,4.85,2,0,0,0,,0,1,0,2,,2.0,208.1,1,4,208.0,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6
4,Russell Wilson,SEA,QB,32,7,7,183,256,2151,26,6,35,260,7.43,0,0,0,0,,0,1,0,0,,1.0,212.0,2,5,212.0,31.8,34.4,36.8,21.9,25.5,,32.9,28.7
5,Travis Kelce,KAN,TE,31,8,8,0,0,0,0,0,0,0,,0,68,48,610,12.71,6,1,1,6,,,119.0,1,6,95.0,11.0,15.0,8.7,7.0,16.8,16.5,3.1,16.9
6,Kyler Murray,ARI,QB,23,7,7,169,253,1847,13,7,65,437,6.72,7,0,0,0,,0,3,1,7,,,202.6,3,7,202.5,27.3,33.1,24.7,23.1,27.3,28.9,38.1,
7,D.K. Metcalf,SEA,WR,23,7,7,0,0,0,0,0,0,0,,0,59,36,680,18.89,7,1,1,7,,,126.0,1,8,108.0,15.5,15.2,15.0,10.6,21.3,,2.3,28.1
8,Tyreek Hill,KAN,WR,26,8,8,0,0,0,0,0,7,54,7.71,1,54,35,537,15.34,7,1,0,8,,,124.6,2,9,107.1,10.6,16.8,16.2,12.4,15.3,2.5,11.5,21.8
9,Calvin Ridley,ATL,WR,26,8,8,0,0,0,0,0,3,14,4.67,0,68,43,657,15.28,6,0,0,6,1.0,,126.6,3,10,105.1,24.9,22.9,11.7,0.0,13.6,12.9,14.9,4.2


In [27]:
offense.isnull().sum()

Player                     0
Team                       0
Position                   0
Age                        0
Games                      0
GamesStarted               0
CompletedPasses            0
PassesAttempted            0
PassingYds                 0
PassingTDs                 0
Interceptions              0
RushingAttempts            0
RushingYds                 0
RushingYdspAtt             0
RushingTDs                 0
Targeted                   0
Receptions                 0
ReceivingYds               0
YdspReception              0
ReceivingTDs               0
Fumbles                    0
LostFumbles                0
TtlTDs                     0
TwoPTConversions           0
TwoPTConversionPasses      0
FDFantasyPts               0
PositionRank               0
OverallRank                0
TTL                       12
Week_1                   136
Week_2                   138
Week_3                   139
Week_4                   165
Week_5                   194
Week_6        

In [28]:
offense.fillna('0.0', inplace = True)

In [30]:
off_integers = ['Age', 'Games', 'GamesStarted', 'CompletedPasses', 'PassesAttempted', 'PassingYds', 'PassingTDs', 
            'Interceptions', 'RushingAttempts', 'RushingYds', 'RushingTDs', 'Targeted', 'Receptions', 
            'ReceivingYds', 'ReceivingTDs', 'Fumbles', 'LostFumbles', 'TtlTDs']

off_floats = ['RushingYdspAtt', 'YdspReception', 'FDFantasyPts', 'TTL','Week_1', 'Week_2', 'Week_3', 'Week_4', 
              'Week_5', 'Week_6', 'Week_7', 'Week_8']

In [31]:
update_int_dtype(offense, off_integers)

In [32]:
two_pts = ['TwoPTConversions', 'TwoPTConversionPasses']
fill_blanks(offense, two_pts)
update_int_dtype(offense, two_pts)

In [33]:
update_float_dtype(offense, off_floats)

In [34]:
offense.head()

Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8
0,Dalvin Cook,MIN,RB,25,6,6,0,0,0,0,0,122,652,8.4,10,19,14,127,9.7,1,1,1,11,3,0,154.9,1,1,147.9,20.8,15.1,23.9,26.6,14.9,0.0,0.0,46.6
1,Alvin Kamara,NOR,RB,25,7,5,0,0,0,0,0,87,431,13.5,4,66,55,556,11.1,3,1,0,7,0,0,168.2,2,2,140.7,18.7,29.4,31.7,17.9,11.9,0.0,14.8,16.3
2,Derrick Henry,TEN,RB,26,7,7,0,0,0,0,0,161,775,12.1,8,18,10,81,9.0,0,0,0,8,0,0,138.6,3,3,133.6,13.1,8.4,25.0,0.0,18.3,38.4,13.2,17.2
3,Patrick Mahomes,KAN,QB,25,8,8,190,284,2315,21,1,34,165,12.5,2,0,0,0,0.0,0,1,0,2,0,2,208.1,1,4,208.0,20.4,27.5,40.0,20.2,30.7,20.6,12.0,36.6
4,Russell Wilson,SEA,QB,32,7,7,183,256,2151,26,6,35,260,11.3,0,0,0,0,0.0,0,1,0,0,0,1,212.0,2,5,212.0,31.8,34.4,36.8,21.9,25.5,0.0,32.9,28.7


In [35]:
team_dict = {'LAR': 'Los Angeles Rams', 
             'SEA': 'Seattle Seahawks', 
             'BAL': 'Baltimore Ravens', 
             'KAN': 'Kansas City Chiefs', 
             'JAX': 'Jacksonville Jaguars', 
             'DET': 'Detroit Lions', 
             'PIT': 'Pittsburgh Steelers', 
             'MIN': 'Minnesota Vikings', 
             'CHI': 'Chicago Bears', 
             'ATL': 'Atlanta Falcons', 
             'BUF': 'Buffalo Bills', 
             'GNB': 'Green Bay Packers', 
             'MIA': 'Miami Dolphins', 
             'CLE': 'Cleveland Browns', 
             'CAR': 'Carolina Panthers', 
             'DAL': 'Dallas Cowboys', 
             'ARI': 'Arizona Cardinals', 
             'HOU': 'Houston Texans', 
             'NYG': 'New York Giants', 
             'WAS': 'Washington Football Team', 
             'IND': 'Indianapolis Colts', 
             'LVR': 'Las Vegas Raiders', 
             'DEN': 'Denver Broncos', 
             'TEN': 'Tennessee Titans', 
             'NWE': 'New England Patriots', 
             'CIN': 'Cincinnati Bengals', 
             'NYJ': 'New York Jets', 
             'LAC': 'Los Angeles Chargers', 
             'PHI': 'Philadelphia Eagles', 
             'TAM': 'Tampa Bay Buccaneers', 
             'NOR': 'New Orleans Saints', 
             'SFO': 'San Francisco 49ers'}

team_names = pd.DataFrame()
team_names['Team'] = list(team_dict.keys())
team_names['Long_Name'] = list(team_dict.values())
team_names.head()

Unnamed: 0,Team,Long_Name
0,LAR,Los Angeles Rams
1,SEA,Seattle Seahawks
2,BAL,Baltimore Ravens
3,KAN,Kansas City Chiefs
4,JAX,Jacksonville Jaguars


In [36]:
offense.dtypes

Player                    object
Team                      object
Position                  object
Age                        int64
Games                      int64
GamesStarted               int64
CompletedPasses            int64
PassesAttempted            int64
PassingYds                 int64
PassingTDs                 int64
Interceptions              int64
RushingAttempts            int64
RushingYds                 int64
RushingYdspAtt           float64
RushingTDs                 int64
Targeted                   int64
Receptions                 int64
ReceivingYds               int64
YdspReception            float64
ReceivingTDs               int64
Fumbles                    int64
LostFumbles                int64
TtlTDs                     int64
TwoPTConversions           int64
TwoPTConversionPasses      int64
FDFantasyPts             float64
PositionRank              object
OverallRank               object
TTL                      float64
Week_1                   float64
Week_2    

## Defense Stats

In [37]:
defense.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks
0,Miami Dolphins,7,130,467,5.6,2632,178,876,4.9,9,270,6.1,165,11.6,1756,8,22,15,68.2%,13,7,6,19
1,Baltimore Ravens,7,132,447,5.1,2260,159,702,4.4,5,264,5.4,170,10.2,1558,10,16,13,81.3%,12,3,9,24
2,Indianapolis Colts,7,136,418,4.9,2054,164,559,3.4,4,236,5.9,144,11.2,1495,10,18,12,66.7%,12,11,1,18
3,Pittsburgh Steelers,7,142,429,5.1,2175,169,678,4.0,5,230,5.8,131,12.9,1497,13,20,13,65%,13,10,3,30
4,Arizona Cardinals,7,146,485,5.5,2649,195,918,4.7,4,271,6.0,171,11.0,1731,12,27,12,44.4%,10,6,4,19


In [38]:
def_fantasy = fantasy_pts[fantasy_pts['Position'] == 'DST']

In [39]:
defense_df = pd.merge(defense, def_fantasy, 'left', left_on = 'Team', right_on = 'Player')

In [40]:
defense_df.head()

Unnamed: 0,Team_x,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,Player,Team_y,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8
0,Miami Dolphins,7,130,467,5.6,2632,178,876,4.9,9,270,6.1,165,11.6,1756,8,22,15,68.2%,13,7,6,19,Miami Dolphins,MIA,DST,63.0,2.0,2.0,12.0,3.0,12.0,15.0,,17.0
1,Baltimore Ravens,7,132,447,5.1,2260,159,702,4.4,5,264,5.4,170,10.2,1558,10,16,13,81.3%,12,3,9,24,Baltimore Ravens,BAL,DST,74.0,15.0,15.0,1.0,6.0,26.0,7.0,,4.0
2,Indianapolis Colts,7,136,418,4.9,2054,164,559,3.4,4,236,5.9,144,11.2,1495,10,18,12,66.7%,12,11,1,18,Indianapolis Colts,IND,DST,76.0,4.0,15.0,26.0,7.0,5.0,4.0,,15.0
3,Pittsburgh Steelers,7,142,429,5.1,2175,169,678,4.0,5,230,5.8,131,12.9,1497,13,20,13,65%,13,10,3,30,Pittsburgh Steelers,PIT,DST,74.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0
4,Arizona Cardinals,7,146,485,5.5,2649,195,918,4.7,4,271,6.0,171,11.0,1731,12,27,12,44.4%,10,6,4,19,Arizona Cardinals,ARI,DST,47.0,4.0,9.0,4.0,1.0,7.0,15.0,7.0,


In [41]:
defense_df.drop(columns=['Player', 'Team_y', 'Position'], inplace=True)
defense_df.rename(columns={'Team_x': 'Team'}, inplace=True)

In [42]:
defense_df.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8
0,Miami Dolphins,7,130,467,5.6,2632,178,876,4.9,9,270,6.1,165,11.6,1756,8,22,15,68.2%,13,7,6,19,63.0,2.0,2.0,12.0,3.0,12.0,15.0,,17.0
1,Baltimore Ravens,7,132,447,5.1,2260,159,702,4.4,5,264,5.4,170,10.2,1558,10,16,13,81.3%,12,3,9,24,74.0,15.0,15.0,1.0,6.0,26.0,7.0,,4.0
2,Indianapolis Colts,7,136,418,4.9,2054,164,559,3.4,4,236,5.9,144,11.2,1495,10,18,12,66.7%,12,11,1,18,76.0,4.0,15.0,26.0,7.0,5.0,4.0,,15.0
3,Pittsburgh Steelers,7,142,429,5.1,2175,169,678,4.0,5,230,5.8,131,12.9,1497,13,20,13,65%,13,10,3,30,74.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0
4,Arizona Cardinals,7,146,485,5.5,2649,195,918,4.7,4,271,6.0,171,11.0,1731,12,27,12,44.4%,10,6,4,19,47.0,4.0,9.0,4.0,1.0,7.0,15.0,7.0,


In [43]:
defense_df.isnull().sum()

Team                         0
GP                           0
Ttl_Pts_Allowed              0
Ttl_Offense_Plays_Allowed    0
Yds_p_Play                   0
Ttl_Yds                      0
Rushing_Att                  0
Rushing_Yds                  0
Rushing_Yds_p_Att            0
Rushing_TDs                  0
Passing_Att                  0
Passing_Yds_p_Att            0
Completions                  0
Yds_p_Completion             0
Passing_Yds                  0
Passing_TDs                  0
RZ_Att                       0
RZ_TD                        0
RZ_Percent                   0
Ttl_Turnovers                0
Interceptions                0
Fumbles                      0
Sacks                        0
TTL                          0
Week_1                       0
Week_2                       0
Week_3                       0
Week_4                       0
Week_5                       4
Week_6                       4
Week_7                       4
Week_8                       4
dtype: i

In [44]:
defense_df

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8
0,Miami Dolphins,7,130,467,5.6,2632,178,876,4.9,9,270,6.1,165,11.6,1756,8,22,15,68.2%,13,7,6,19,63.0,2.0,2.0,12.0,3.0,12.0,15.0,,17.0
1,Baltimore Ravens,7,132,447,5.1,2260,159,702,4.4,5,264,5.4,170,10.2,1558,10,16,13,81.3%,12,3,9,24,74.0,15.0,15.0,1.0,6.0,26.0,7.0,,4.0
2,Indianapolis Colts,7,136,418,4.9,2054,164,559,3.4,4,236,5.9,144,11.2,1495,10,18,12,66.7%,12,11,1,18,76.0,4.0,15.0,26.0,7.0,5.0,4.0,,15.0
3,Pittsburgh Steelers,7,142,429,5.1,2175,169,678,4.0,5,230,5.8,131,12.9,1497,13,20,13,65%,13,10,3,30,74.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0
4,Arizona Cardinals,7,146,485,5.5,2649,195,918,4.7,4,271,6.0,171,11.0,1731,12,27,12,44.4%,10,6,4,19,47.0,4.0,9.0,4.0,1.0,7.0,15.0,7.0,
5,Kansas City Chiefs,8,152,513,5.4,2752,238,1142,4.8,7,258,5.9,155,11.0,1610,9,20,14,70%,14,9,5,17,66.0,7.0,5.0,7.0,20.0,-1.0,3.0,18.0,7.0
6,Los Angeles Rams,8,152,493,4.7,2335,192,758,3.9,6,276,5.2,179,9.7,1577,9,23,14,60.9%,10,6,4,25,58.0,4.0,7.0,4.0,11.0,12.0,0.0,15.0,5.0
7,Tampa Bay Buccaneers,8,165,429,4.8,2039,154,462,3.0,5,250,5.7,167,10.6,1577,9,19,11,57.9%,12,9,3,25,74.0,0.0,14.0,16.0,6.0,6.0,19.0,6.0,7.0
8,Washington Football Team,7,165,424,5.1,2164,205,863,4.2,8,197,5.9,125,11.6,1301,10,21,11,52.4%,9,8,1,22,47.0,15.0,4.0,1.0,4.0,2.0,4.0,17.0,
9,Chicago Bears,8,166,533,5.2,2788,223,960,4.3,6,293,5.9,179,10.8,1828,8,30,12,40%,9,5,4,17,48.0,3.0,12.0,4.0,2.0,6.0,11.0,9.0,1.0


In [45]:
defense_df.fillna('0.0', inplace=True)

In [46]:
def_int = ['GP', 'Ttl_Pts_Allowed', 'Ttl_Offense_Plays_Allowed', 'Ttl_Yds', 'Rushing_Att', 'Rushing_Yds', 
           'Rushing_TDs', 'Passing_Att', 'Completions', 'Passing_Yds', 'Passing_TDs', 'RZ_Att', 'RZ_TD', 
           'Ttl_Turnovers', 'Interceptions', 'Fumbles', 'Sacks']

def_floats = ['Yds_p_Play', 'Rushing_Yds_p_Att', 'Passing_Yds_p_Att', 'Yds_p_Completion', 'TTL', 'Week_1', 
              'Week_2', 'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7', 'Week_8']

In [47]:
remove_comma(defense_df, def_int)

In [48]:
update_int_dtype(defense_df, def_int)

In [49]:
adjust_float(defense_df, def_floats)

In [50]:
update_float_dtype(defense_df, def_floats)

In [51]:
update_percent(defense_df, 'RZ_Percent')

In [52]:
rz = ['RZ_Percent']
update_float_dtype(defense_df, rz)

In [53]:
defense_df.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8
0,Miami Dolphins,7,130,467,5.6,2632,178,876,4.9,9,270,6.1,165,11.6,1756,8,22,15,68.2,13,7,6,19,63.0,2.0,2.0,12.0,3.0,12.0,15.0,0.0,17.0
1,Baltimore Ravens,7,132,447,5.1,2260,159,702,4.4,5,264,5.4,170,10.2,1558,10,16,13,81.3,12,3,9,24,74.0,15.0,15.0,1.0,6.0,26.0,7.0,0.0,4.0
2,Indianapolis Colts,7,136,418,4.9,2054,164,559,3.4,4,236,5.9,144,11.2,1495,10,18,12,66.7,12,11,1,18,76.0,4.0,15.0,26.0,7.0,5.0,4.0,0.0,15.0
3,Pittsburgh Steelers,7,142,429,5.1,2175,169,678,4.0,5,230,5.8,131,12.9,1497,13,20,13,65.0,13,10,3,30,74.0,8.0,13.0,7.0,10.0,8.0,18.0,2.0,18.0
4,Arizona Cardinals,7,146,485,5.5,2649,195,918,4.7,4,271,6.0,171,11.0,1731,12,27,12,44.4,10,6,4,19,47.0,4.0,9.0,4.0,1.0,7.0,15.0,7.0,0.0


In [54]:
defense_df.dtypes

Team                          object
GP                             int64
Ttl_Pts_Allowed                int64
Ttl_Offense_Plays_Allowed      int64
Yds_p_Play                   float64
Ttl_Yds                        int64
Rushing_Att                    int64
Rushing_Yds                    int64
Rushing_Yds_p_Att            float64
Rushing_TDs                    int64
Passing_Att                    int64
Passing_Yds_p_Att            float64
Completions                    int64
Yds_p_Completion             float64
Passing_Yds                    int64
Passing_TDs                    int64
RZ_Att                         int64
RZ_TD                          int64
RZ_Percent                   float64
Ttl_Turnovers                  int64
Interceptions                  int64
Fumbles                        int64
Sacks                          int64
TTL                          float64
Week_1                       float64
Week_2                       float64
Week_3                       float64
W

## Kicking Stats

In [55]:
kicking.head()

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS
0,Randy Bullock,K,CIN,8,20,90.0,55,0-0,3-3,7-8,6-7,2-2,20,100.0,74
1,Younghoe Koo,K,ATL,7,20,95.0,54,0-0,5-5,5-5,7-8,2-2,17,82.4,71
2,Ryan Succop,K,TB,8,17,88.2,50,0-0,3-3,7-7,4-5,1-2,27,96.3,71
3,Joey Slye,K,CAR,8,21,85.7,48,0-0,8-8,6-6,4-5,0-2,17,88.2,69
4,Wil Lutz,K,NO,7,16,93.8,53,0-0,3-4,7-7,4-4,1-1,23,100.0,68


In [56]:
kicking_df = pd.merge(kicking, fantasy_pts, left_on = 'PLAYER', right_on = 'Player')
kicking_df.drop(columns = ['Player', 'Team', 'Position'], inplace = True)
kicking_df

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8
0,Randy Bullock,K,CIN,8,20,90.0,55,0-0,3-3,7-8,6-7,2-2,20,100.0,74,84.0,10.0,13.0,12.0,17.0,3.0,12.0,10.0,7.0
1,Younghoe Koo,K,ATL,7,20,95.0,54,0-0,5-5,5-5,7-8,2-2,17,82.4,71,82.0,9.0,17.0,8.0,,13.0,20.0,2.0,13.0
2,Ryan Succop,K,TB,8,17,88.2,50,0-0,3-3,7-7,4-5,1-2,27,96.3,71,77.0,5.0,7.0,9.0,8.0,14.0,10.0,9.0,15.0
3,Joey Slye,K,CAR,8,21,85.7,48,0-0,8-8,6-6,4-5,0-2,17,88.2,69,73.0,12.0,5.0,15.0,7.0,11.0,11.0,7.0,5.0
4,Wil Lutz,K,NO,7,16,93.8,53,0-0,3-4,7-7,4-4,1-1,23,100.0,68,74.0,10.0,6.0,13.0,5.0,15.0,,11.0,14.0
5,Daniel Carlson,K,LV,7,18,88.9,54,0-0,7-7,4-4,2-4,3-3,20,95.0,67,75.0,12.0,12.0,8.0,13.0,11.0,,9.0,10.0
6,Rodrigo Blankenship,K,IND,7,18,88.9,44,0-0,4-4,7-8,5-6,0-0,20,90.0,66,71.0,8.0,15.0,12.0,14.0,11.0,8.0,,3.0
7,Justin Tucker,K,BAL,7,15,93.3,55,0-0,2-2,3-3,7-7,2-3,23,100.0,65,76.0,9.0,16.0,9.0,8.0,10.0,16.0,,8.0
8,Harrison Butker,K,KC,8,14,92.9,58,1-1,3-3,6-6,1-2,2-2,29,82.8,63,68.0,10.0,13.0,4.0,8.0,6.0,8.0,14.0,5.0
9,Jason Sanders,K,MIA,7,15,100.0,52,0-0,5-5,2-2,6-6,2-2,17,100.0,62,72.0,4.0,11.0,7.0,18.0,22.0,6.0,,4.0


In [57]:
dash = kicking_df['LNG'][38]
dash

'—'

In [58]:
kicking_df.fillna('0.0', inplace = True)

In [59]:
kick_int = ['GP', 'FGM_A', 'LNG', 'XPM_A', 'PTS']

kick_float = ['FG%', 'XP%', 'TTL', 'Week_1', 'Week_2', 'Week_3', 'Week_4', 'Week_5', 'Week_6', 'Week_7', 'Week_8']

In [60]:
replace_dash(kicking_df, kick_int, dash)

In [61]:
replace_dash(kicking_df, kick_float, dash)

In [62]:
update_int_dtype(kicking_df, kick_int)

In [63]:
adjust_float(kicking_df, kick_float)

In [64]:
update_float_dtype(kicking_df, kick_float)

In [65]:
kicking_df.head()

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS,TTL,Week_1,Week_2,Week_3,Week_4,Week_5,Week_6,Week_7,Week_8
0,Randy Bullock,K,CIN,8,20,90.0,55,0-0,3-3,7-8,6-7,2-2,20,100.0,74,84.0,10.0,13.0,12.0,17.0,3.0,12.0,10.0,7.0
1,Younghoe Koo,K,ATL,7,20,95.0,54,0-0,5-5,5-5,7-8,2-2,17,82.4,71,82.0,9.0,17.0,8.0,0.0,13.0,20.0,2.0,13.0
2,Ryan Succop,K,TB,8,17,88.2,50,0-0,3-3,7-7,4-5,1-2,27,96.3,71,77.0,5.0,7.0,9.0,8.0,14.0,10.0,9.0,15.0
3,Joey Slye,K,CAR,8,21,85.7,48,0-0,8-8,6-6,4-5,0-2,17,88.2,69,73.0,12.0,5.0,15.0,7.0,11.0,11.0,7.0,5.0
4,Wil Lutz,K,NO,7,16,93.8,53,0-0,3-4,7-7,4-4,1-1,23,100.0,68,74.0,10.0,6.0,13.0,5.0,15.0,0.0,11.0,14.0


In [66]:
kicking_df.dtypes

PLAYER       object
POSITION     object
TEAM         object
GP            int64
FGM_A         int64
FG%         float64
LNG           int64
_1_19        object
_20_29       object
_30_39       object
_40_49       object
OVER50       object
XPM_A         int64
XP%         float64
PTS           int64
TTL         float64
Week_1      float64
Week_2      float64
Week_3      float64
Week_4      float64
Week_5      float64
Week_6      float64
Week_7      float64
Week_8      float64
dtype: object

## Pickle Cleaned DataFrames

In [67]:
offense.to_pickle('players')
team_names.to_pickle('long_names')
defense_df.to_pickle('defense')
kicking_df.to_pickle('kicking')