# Week 6 -- Data Cleaning

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import re
import warnings
warnings.simplefilter('ignore')

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.1f' % x)

## Functions

In [3]:
def check_names(df1, df2):
    """
    This funciton compares the list of names from one dataframe to another and updates where the names do not match 
    so they will merge correctly.
    
    Parameters:
        df1: first dataframe to compare to; this should be the dataframe on the left that you will join with when 
        they merge.
        df2: second dataframe, should be the dataframe on the right for the later merge.
        
    Returns:
        Updated name list for df2 that should match df1.
    """
    
    df_players = list(df1['Player'])    
    for i in range(len(df2['Player'])):
        name = df2['Player'][i]
        split_name = name.split(' ')
        if (split_name[0] + ' ' + split_name[1]) in df_players:
            df2['Player'][i] = split_name[0] + ' ' + split_name[1]
        else:
            df2['Player'][i] = name

In [4]:
def name_update(dataframe, column):
    """
    This function updates a column in a dataframe by stripping any excess spaces surrounding the observation.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column: the column you want to update.
    
    Returns:
        Updated dataframe column.    
    """
    dataframe[column] = dataframe[column].map(lambda x: x.strip())

In [5]:
def update_int_dtype(dataframe, column_list):
    """
    This function updates the data type for a list of columns to integer.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe column data types to integer.   
    """
    for column in column_list:
        dataframe[column] = dataframe[column].map(lambda x: int(x))

In [6]:
def update_float_dtype(dataframe, column_list):
    """
    This function takes a dataframe object and converts it to a float.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
    
    Returns:
        Updated dataframe column data types to a float.  
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == '':
                dataframe[column][i] = '0.0'
            else:
                dataframe[column][i] = item 
            updated_item = str(dataframe[column][i])
            ones, tenths = updated_item.split('.')
            ones = int(ones)
            tenths = int(tenths) * .1
            dataframe[column][i] = ones + tenths
        dataframe[column] = dataframe[column].map(lambda x: float(x))

In [7]:
def fill_blanks(dataframe, column_list):
    """
    This function fills blank values with 0. This should be used for a column that will be an integer.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.    
    
    Returns:
        Updated dataframe columns with filled values.      
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == '':
                dataframe[column][i] = 0
            else:
                dataframe[column][i] = item

In [8]:
def remove_comma(dataframe, column_list):
    """
    This function removes the comma from a value in a column.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe columns less any commas that might appear.    
    """  
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if len(item.split(',')) > 1:
                one, two = item.split(',')
                dataframe[column][i] = one + two

In [9]:
def adjust_float(dataframe, column_list):
    """
    This function adjusts values in a column to be workable with the 'update_float_dtype' function.
    
    Parameters:
        dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe columns with objects with a decimal point value behind it.    
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if len(item) < 3:
                dataframe[column][i] = item + '.0'
            else:
                dataframe[column][i] = item

In [10]:
def update_percent(dataframe, column):
    """
    This function updates a column with a percent sign so it can be manipulated into a float with the 
    'update_float_dtype' function.
    
    Parameters:
       dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        
    Returns:
        Updated dataframe column with no percent sign and added decimal point where needed.     
    """
    dataframe[column] = dataframe[column].map(lambda x: x.strip('%'))
    for i in range(len(dataframe[column])):
        item = dataframe[column][i]
        if len(item) < 3:
            dataframe[column][i] = item + '.0'
        else:
            dataframe[column][i] = item

In [13]:
def replace_dash(dataframe, column_list, dash):
    """
    This function replaces a - in a dataframe so datatypes can then be updated.
    
    Parameters:
       dataframe: the dataframe you want to update.
        column_list: list of columns to iterate through.
        dash: set to a value in a specific column in the dataframe.
        
    Returns:
        Updated dataframe column with no percent sign and added decimal point where needed. 
    """
    for column in column_list:
        for i in range(len(dataframe[column])):
            item = dataframe[column][i]
            if item == dash:
                dataframe[column][i] = '0'
            else:
                dataframe[column][i] = item

## Read in Collected Data

In [12]:
df = pd.read_pickle('player_stats')
fantasy_pts = pd.read_pickle('fantasy_weeks')
defense = pd.read_pickle('defense_data')
kicking = pd.read_pickle('kicking_data')

## Merge Last Week Fantasy Pts with this Week

In [14]:
no_games = 6

In [251]:
lastwk = pd.read_pickle('fantweeks_1_' + str(no_games - 1))
lastwk.head()

Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4
0,Dak Prescott,DAL,QB,125.2,17.6,39.8,29.5,38.3
1,Russell Wilson,SEA,QB,124.9,31.8,34.4,36.8,21.9
2,Josh Allen,BUF,QB,120.3,28.2,34.5,32.2,25.4
3,Kyler Murray,ARI,QB,108.2,27.3,33.1,24.7,23.1
4,Patrick Mahomes II,KC,QB,108.1,20.4,27.5,40.0,20.2


In [252]:
fantasy_pts = pd.merge(fantasy_pts, lastwk, 'left', on='Player')
fantasy_pts.head()

Unnamed: 0,Player,Team_x,Position_x,TTL_x,Week_5,Team_y,Position_y,TTL_y,Week_1,Week_2,Week_3,Week_4
0,Russell Wilson,SEA,QB,150.4,25.5,SEA,QB,124.9,31.8,34.4,36.8,21.9
1,Patrick Mahomes II,KC,QB,138.8,30.7,KC,QB,108.1,20.4,27.5,40.0,20.2
2,Josh Allen,BUF,QB,138.6,18.3,BUF,QB,120.3,28.2,34.5,32.2,25.4
3,Dak Prescott,DAL,QB,138.6,13.4,DAL,QB,125.2,17.6,39.8,29.5,38.3
4,Kyler Murray,ARI,QB,135.5,27.3,ARI,QB,108.2,27.3,33.1,24.7,23.1


In [253]:
fantasy_pts.drop(columns=['Team_y', 'Position_y', 'TTL_y'], inplace=True)
fantasy_pts.rename(columns = {'Team_x': 'Team', 'Position_x': 'Position', 'TTL_x': 'TTL'}, inplace = True)
fantasy_pts.columns

Index(['Player', 'Team', 'Position', 'TTL', 'Week_5', 'Week_1', 'Week_2',
       'Week_3', 'Week_4'],
      dtype='object')

In [254]:
fantasy_pts = fantasy_pts[['Player', 'Team', 'Position', 'TTL', 'Week_1', 'Week_2', 'Week_3', 'Week_4', 'Week_5']]
fantasy_pts.head()

Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5
0,Russell Wilson,SEA,QB,150.4,31.8,34.4,36.8,21.9,25.5
1,Patrick Mahomes II,KC,QB,138.8,20.4,27.5,40.0,20.2,30.7
2,Josh Allen,BUF,QB,138.6,28.2,34.5,32.2,25.4,18.3
3,Dak Prescott,DAL,QB,138.6,17.6,39.8,29.5,38.3,13.4
4,Kyler Murray,ARI,QB,135.5,27.3,33.1,24.7,23.1,27.3


In [255]:
print(fantasy_pts.shape)

(596, 9)


### Pickle Combined DataFrame for Next Week

In [256]:
fantasy_pts.to_pickle('fantweeks_1_' + str(no_games))

## Merge Offensive Players & Weekly Fantasy Pts

In [257]:
print(df.shape)
df.head()

(501, 28)


Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank
0,Alvin Kamara,NOR,RB,25,5,4,0,0,0,0,0,61,281,4.61,4,45,38,395,10.39,3,0,0,7,,,128.6,1,1
1,Russell Wilson,SEA,QB,32,5,5,123,169,1502,19,3,23,153,6.65,0,0,0,0,,0,1,0,0,,1.0,150.4,1,2
2,Dalvin Cook,MIN,RB,25,5,5,0,0,0,0,0,92,489,5.32,7,16,12,64,5.33,0,1,1,7,3.0,,107.3,2,3
3,Patrick Mahomes,KAN,QB,25,5,5,123,193,1474,13,1,24,129,5.38,2,0,0,0,,0,0,0,2,,2.0,138.9,2,4
4,Josh Allen,BUF,QB,24,5,5,131,189,1589,14,3,29,101,3.48,3,0,0,0,,0,3,3,3,,,138.7,3,5


In [258]:
print(fantasy_pts.shape)
fantasy_pts.head()

(596, 9)


Unnamed: 0,Player,Team,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5
0,Russell Wilson,SEA,QB,150.4,31.8,34.4,36.8,21.9,25.5
1,Patrick Mahomes II,KC,QB,138.8,20.4,27.5,40.0,20.2,30.7
2,Josh Allen,BUF,QB,138.6,28.2,34.5,32.2,25.4,18.3
3,Dak Prescott,DAL,QB,138.6,17.6,39.8,29.5,38.3,13.4
4,Kyler Murray,ARI,QB,135.5,27.3,33.1,24.7,23.1,27.3


In [259]:
check_names(df, fantasy_pts)

In [260]:
name_update(df, 'Player')
name_update(fantasy_pts, 'Player')

In [261]:
offense = pd.merge(df, fantasy_pts, 'left', on='Player')

In [262]:
offense.drop(columns = ['Team_y', 'Position_y'], inplace = True)
offense.rename(columns = {'Team_x': 'Team', 'Position_x': 'Position'}, inplace = True)

In [263]:
print(offense.shape)
offense.head(10)

(501, 34)


Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank,TTL,Week_1,Week_2,Week_3,Week_4,Week_5
0,Alvin Kamara,NOR,RB,25,5,4,0,0,0,0,0,61,281,4.61,4,45,38,395,10.39,3,0,0,7,,,128.6,1,1,109.6,18.7,29.4,31.7,17.9,11.9
1,Russell Wilson,SEA,QB,32,5,5,123,169,1502,19,3,23,153,6.65,0,0,0,0,,0,1,0,0,,1.0,150.4,1,2,150.4,31.8,34.4,36.8,21.9,25.5
2,Dalvin Cook,MIN,RB,25,5,5,0,0,0,0,0,92,489,5.32,7,16,12,64,5.33,0,1,1,7,3.0,,107.3,2,3,101.3,20.8,15.1,23.9,26.6,14.9
3,Patrick Mahomes,KAN,QB,25,5,5,123,193,1474,13,1,24,129,5.38,2,0,0,0,,0,0,0,2,,2.0,138.9,2,4,138.8,20.4,27.5,40.0,20.2,30.7
4,Josh Allen,BUF,QB,24,5,5,131,189,1589,14,3,29,101,3.48,3,0,0,0,,0,3,3,3,,,138.7,3,5,138.6,28.2,34.5,32.2,25.4,18.3
5,Dak Prescott,DAL,QB,27,5,5,151,222,1856,9,4,18,93,5.17,3,1,1,11,11.0,1,3,3,4,1.0,1.0,139.1,4,6,138.6,17.6,39.8,29.5,38.3,13.4
6,D.K. Metcalf,SEA,WR,23,5,5,0,0,0,0,0,0,0,,0,39,22,496,22.55,5,1,1,5,,,88.6,1,7,77.6,15.5,15.2,15.0,10.6,21.3
7,Ezekiel Elliott,DAL,RB,25,5,5,0,0,0,0,0,89,364,4.09,5,32,24,173,7.21,1,3,2,6,1.0,,99.7,3,8,87.7,24.7,16.2,11.8,12.5,22.5
8,Aaron Jones,GNB,RB,26,4,4,0,0,0,0,0,65,374,5.75,4,23,15,135,9.0,2,1,0,6,,,94.4,4,9,86.9,13.6,41.6,14.6,17.1,
9,Adam Thielen,MIN,WR,30,5,5,0,0,0,0,0,3,15,5.0,0,44,29,364,12.55,6,0,0,6,1.0,,90.4,2,10,75.9,25.0,3.1,9.3,18.2,20.3


In [264]:
offense.isnull().sum()

Player                     0
Team                       0
Position                   0
Age                        0
Games                      0
GamesStarted               0
CompletedPasses            0
PassesAttempted            0
PassingYds                 0
PassingTDs                 0
Interceptions              0
RushingAttempts            0
RushingYds                 0
RushingYdspAtt             0
RushingTDs                 0
Targeted                   0
Receptions                 0
ReceivingYds               0
YdspReception              0
ReceivingTDs               0
Fumbles                    0
LostFumbles                0
TtlTDs                     0
TwoPTConversions           0
TwoPTConversionPasses      0
FDFantasyPts               0
PositionRank               0
OverallRank                0
TTL                        9
Week_1                    96
Week_2                    97
Week_3                    99
Week_4                   126
Week_5                   155
dtype: int64

In [265]:
offense.fillna('0.0', inplace = True)

In [266]:
off_integers = ['Age', 'Games', 'GamesStarted', 'CompletedPasses', 'PassesAttempted', 'PassingYds', 'PassingTDs', 
            'Interceptions', 'RushingAttempts', 'RushingYds', 'RushingTDs', 'Targeted', 'Receptions', 
            'ReceivingYds', 'ReceivingTDs', 'Fumbles', 'LostFumbles', 'TtlTDs']

off_floats = ['RushingYdspAtt', 'YdspReception', 'FDFantasyPts', 'TTL','Week_1', 'Week_2', 'Week_3', 'Week_4', 
              'Week_5']

In [267]:
update_int_dtype(offense, off_integers)

In [268]:
two_pts = ['TwoPTConversions', 'TwoPTConversionPasses']
fill_blanks(offense, two_pts)
update_int_dtype(offense, two_pts)

In [269]:
update_float_dtype(offense, off_floats)

In [270]:
offense.head()

Unnamed: 0,Player,Team,Position,Age,Games,GamesStarted,CompletedPasses,PassesAttempted,PassingYds,PassingTDs,Interceptions,RushingAttempts,RushingYds,RushingYdspAtt,RushingTDs,Targeted,Receptions,ReceivingYds,YdspReception,ReceivingTDs,Fumbles,LostFumbles,TtlTDs,TwoPTConversions,TwoPTConversionPasses,FDFantasyPts,PositionRank,OverallRank,TTL,Week_1,Week_2,Week_3,Week_4,Week_5
0,Alvin Kamara,NOR,RB,25,5,4,0,0,0,0,0,61,281,10.1,4,45,38,395,13.9,3,0,0,7,0,0,128.6,1,1,109.6,18.7,29.4,31.7,17.9,11.9
1,Russell Wilson,SEA,QB,32,5,5,123,169,1502,19,3,23,153,12.5,0,0,0,0,0.0,0,1,0,0,0,1,150.4,1,2,150.4,31.8,34.4,36.8,21.9,25.5
2,Dalvin Cook,MIN,RB,25,5,5,0,0,0,0,0,92,489,8.2,7,16,12,64,8.3,0,1,1,7,3,0,107.3,2,3,101.3,20.8,15.1,23.9,26.6,14.9
3,Patrick Mahomes,KAN,QB,25,5,5,123,193,1474,13,1,24,129,8.8,2,0,0,0,0.0,0,0,0,2,0,2,138.9,2,4,138.8,20.4,27.5,40.0,20.2,30.7
4,Josh Allen,BUF,QB,24,5,5,131,189,1589,14,3,29,101,7.8,3,0,0,0,0.0,0,3,3,3,0,0,138.7,3,5,138.6,28.2,34.5,32.2,25.4,18.3


In [271]:
team_dict = {'LAR': 'Los Angeles Raiders', 
             'SEA': 'Seattle Seahawks', 
             'BAL': 'Baltimore Ravens', 
             'KAN': 'Kansas City Chiefs', 
             'JAX': 'Jacksonville Jaguars', 
             'DET': 'Detroit Lions', 
             'PIT': 'Pittsburgh Steelers', 
             'MIN': 'Minnesota Vikings', 
             'CHI': 'Chicago Bears', 
             'ATL': 'Atlanta Falcons', 
             'BUF': 'Buffalo Bills', 
             'GNB': 'Green Bay Packers', 
             'MIA': 'Miami Dolphins', 
             'CLE': 'Cleveland Browns', 
             'CAR': 'Carolina Panthers', 
             'DAL': 'Dallas Cowboys', 
             'ARI': 'Arizona Cardinals', 
             'HOU': 'Houston Texans', 
             'NYG': 'New York Giants', 
             'WAS': 'Washington Football Team', 
             'IND': 'Indianapolis Colts', 
             'LVR': 'Las Vegas Raiders', 
             'DEN': 'Denver Broncos', 
             'TEN': 'Tennessee Titans', 
             'NWE': 'New England Patriots', 
             'CIN': 'Cincinnati Bengals', 
             'NYJ': 'New York Jets', 
             'LAC': 'Los Angeles Chargers', 
             'PHI': 'Philadelphia Eagles', 
             'TAM': 'Tampa Bay Buccaneers', 
             'NOR': 'New Orleans Saints', 
             'SFO': 'San Francisco 49ers'}

team_names = pd.DataFrame()
team_names['Team'] = list(team_dict.keys())
team_names['Long_Name'] = list(team_dict.values())
team_names.head()

Unnamed: 0,Team,Long_Name
0,LAR,Los Angeles Raiders
1,SEA,Seattle Seahawks
2,BAL,Baltimore Ravens
3,KAN,Kansas City Chiefs
4,JAX,Jacksonville Jaguars


In [272]:
offense.dtypes

Player                    object
Team                      object
Position                  object
Age                        int64
Games                      int64
GamesStarted               int64
CompletedPasses            int64
PassesAttempted            int64
PassingYds                 int64
PassingTDs                 int64
Interceptions              int64
RushingAttempts            int64
RushingYds                 int64
RushingYdspAtt           float64
RushingTDs                 int64
Targeted                   int64
Receptions                 int64
ReceivingYds               int64
YdspReception            float64
ReceivingTDs               int64
Fumbles                    int64
LostFumbles                int64
TtlTDs                     int64
TwoPTConversions           int64
TwoPTConversionPasses      int64
FDFantasyPts             float64
PositionRank              object
OverallRank               object
TTL                      float64
Week_1                   float64
Week_2    

## Defense Stats

In [273]:
defense.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks
0,Baltimore Ravens,5,76,333,5.0,1675,125,460,3.7,3,192,5.8,128,10.5,1215,6,10,7,70%,10,3,7,16
1,Pittsburgh Steelers,4,87,240,5.0,1206,77,256,3.3,3,143,5.8,85,12.9,950,8,12,7,58.3%,7,6,1,20
2,Indianapolis Colts,5,88,285,4.7,1330,119,432,3.6,1,155,5.4,95,10.2,898,7,12,7,58.3%,9,9,0,11
3,Tennessee Titans,4,90,264,6.2,1637,108,593,5.5,3,151,6.7,97,11.2,1044,9,12,10,83.3%,9,6,3,5
4,Los Angeles Rams,5,90,314,4.8,1521,113,532,4.7,5,181,4.9,116,9.7,989,5,17,10,58.8%,6,4,2,20


In [274]:
def_fantasy = fantasy_pts[fantasy_pts['Position'] == 'DST']

In [275]:
defense_df = pd.merge(defense, def_fantasy, 'left', left_on = 'Team', right_on = 'Player')

In [276]:
defense_df.head()

Unnamed: 0,Team_x,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,Player,Team_y,Position,TTL,Week_1,Week_2,Week_3,Week_4,Week_5
0,Baltimore Ravens,5,76,333,5.0,1675,125,460,3.7,3,192,5.8,128,10.5,1215,6,10,7,70%,10,3,7,16,Baltimore Ravens,BAL,DST,63.0,15.0,15.0,1.0,6.0,26.0
1,Pittsburgh Steelers,4,87,240,5.0,1206,77,256,3.3,3,143,5.8,85,12.9,950,8,12,7,58.3%,7,6,1,20,Pittsburgh Steelers,PIT,DST,36.0,8.0,13.0,7.0,10.0,8.0
2,Indianapolis Colts,5,88,285,4.7,1330,119,432,3.6,1,155,5.4,95,10.2,898,7,12,7,58.3%,9,9,0,11,Indianapolis Colts,IND,DST,57.0,4.0,15.0,26.0,7.0,5.0
3,Tennessee Titans,4,90,264,6.2,1637,108,593,5.5,3,151,6.7,97,11.2,1044,9,12,10,83.3%,9,6,3,5,Tennessee Titans,TEN,DST,23.0,3.0,5.0,7.0,10.0,8.0
4,Los Angeles Rams,5,90,314,4.8,1521,113,532,4.7,5,181,4.9,116,9.7,989,5,17,10,58.8%,6,4,2,20,Los Angeles Rams,LAR,DST,38.0,4.0,7.0,4.0,11.0,12.0


In [277]:
defense_df.drop(columns=['Player', 'Team_y', 'Position'], inplace=True)
defense_df.rename(columns={'Team_x': 'Team'}, inplace=True)

In [278]:
defense_df.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5
0,Baltimore Ravens,5,76,333,5.0,1675,125,460,3.7,3,192,5.8,128,10.5,1215,6,10,7,70%,10,3,7,16,63.0,15.0,15.0,1.0,6.0,26.0
1,Pittsburgh Steelers,4,87,240,5.0,1206,77,256,3.3,3,143,5.8,85,12.9,950,8,12,7,58.3%,7,6,1,20,36.0,8.0,13.0,7.0,10.0,8.0
2,Indianapolis Colts,5,88,285,4.7,1330,119,432,3.6,1,155,5.4,95,10.2,898,7,12,7,58.3%,9,9,0,11,57.0,4.0,15.0,26.0,7.0,5.0
3,Tennessee Titans,4,90,264,6.2,1637,108,593,5.5,3,151,6.7,97,11.2,1044,9,12,10,83.3%,9,6,3,5,23.0,3.0,5.0,7.0,10.0,8.0
4,Los Angeles Rams,5,90,314,4.8,1521,113,532,4.7,5,181,4.9,116,9.7,989,5,17,10,58.8%,6,4,2,20,38.0,4.0,7.0,4.0,11.0,12.0


In [279]:
defense_df.isnull().sum()

Team                         0
GP                           0
Ttl_Pts_Allowed              0
Ttl_Offense_Plays_Allowed    0
Yds_p_Play                   0
Ttl_Yds                      0
Rushing_Att                  0
Rushing_Yds                  0
Rushing_Yds_p_Att            0
Rushing_TDs                  0
Passing_Att                  0
Passing_Yds_p_Att            0
Completions                  0
Yds_p_Completion             0
Passing_Yds                  0
Passing_TDs                  0
RZ_Att                       0
RZ_TD                        0
RZ_Percent                   0
Ttl_Turnovers                0
Interceptions                0
Fumbles                      0
Sacks                        0
TTL                          0
Week_1                       0
Week_2                       0
Week_3                       0
Week_4                       0
Week_5                       4
dtype: int64

In [280]:
defense_df

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5
0,Baltimore Ravens,5,76,333,5.0,1675,125,460,3.7,3,192,5.8,128,10.5,1215,6,10,7,70%,10,3,7,16,63.0,15.0,15.0,1.0,6.0,26.0
1,Pittsburgh Steelers,4,87,240,5.0,1206,77,256,3.3,3,143,5.8,85,12.9,950,8,12,7,58.3%,7,6,1,20,36.0,8.0,13.0,7.0,10.0,8.0
2,Indianapolis Colts,5,88,285,4.7,1330,119,432,3.6,1,155,5.4,95,10.2,898,7,12,7,58.3%,9,9,0,11,57.0,4.0,15.0,26.0,7.0,5.0
3,Tennessee Titans,4,90,264,6.2,1637,108,593,5.5,3,151,6.7,97,11.2,1044,9,12,10,83.3%,9,6,3,5,23.0,3.0,5.0,7.0,10.0,8.0
4,Los Angeles Rams,5,90,314,4.8,1521,113,532,4.7,5,181,4.9,116,9.7,989,5,17,10,58.8%,6,4,2,20,38.0,4.0,7.0,4.0,11.0,12.0
5,New England Patriots,4,92,229,6.1,1396,104,461,4.4,1,119,7.5,84,11.6,935,9,12,7,58.3%,8,4,4,6,36.0,11.0,6.0,15.0,4.0,
6,Denver Broncos,4,98,281,5.2,1461,105,436,4.2,2,166,5.8,108,10.2,1025,7,13,6,46.2%,2,1,1,10,14.0,2.0,5.0,1.0,6.0,
7,Chicago Bears,5,100,331,5.2,1720,130,566,4.4,4,190,5.7,109,11.2,1154,4,19,7,36.8%,5,3,2,11,27.0,3.0,12.0,4.0,2.0,6.0
8,Green Bay Packers,4,101,233,6.1,1413,88,423,4.8,5,133,6.8,96,11.2,990,7,13,9,69.2%,3,2,1,12,25.0,5.0,12.0,3.0,5.0,
9,Arizona Cardinals,5,102,319,5.4,1733,138,621,4.5,3,167,6.1,104,11.8,1112,8,21,9,42.9%,3,1,2,14,25.0,4.0,9.0,4.0,1.0,7.0


In [281]:
defense_df.fillna('0.0', inplace=True)

In [282]:
def_int = ['GP', 'Ttl_Pts_Allowed', 'Ttl_Offense_Plays_Allowed', 'Ttl_Yds', 'Rushing_Att', 'Rushing_Yds', 
           'Rushing_TDs', 'Passing_Att', 'Completions', 'Passing_Yds', 'Passing_TDs', 'RZ_Att', 'RZ_TD', 
           'Ttl_Turnovers', 'Interceptions', 'Fumbles', 'Sacks']

def_floats = ['Yds_p_Play', 'Rushing_Yds_p_Att', 'Passing_Yds_p_Att', 'Yds_p_Completion', 'TTL', 'Week_1', 
              'Week_2', 'Week_3', 'Week_4', 'Week_5']

In [283]:
remove_comma(defense_df, def_int)

In [284]:
update_int_dtype(defense_df, def_int)

In [285]:
adjust_float(defense_df, def_floats)

In [286]:
update_float_dtype(defense_df, def_floats)

In [287]:
update_percent(defense_df, 'RZ_Percent')

In [288]:
rz = ['RZ_Percent']
update_float_dtype(defense_df, rz)

In [289]:
defense_df.head()

Unnamed: 0,Team,GP,Ttl_Pts_Allowed,Ttl_Offense_Plays_Allowed,Yds_p_Play,Ttl_Yds,Rushing_Att,Rushing_Yds,Rushing_Yds_p_Att,Rushing_TDs,Passing_Att,Passing_Yds_p_Att,Completions,Yds_p_Completion,Passing_Yds,Passing_TDs,RZ_Att,RZ_TD,RZ_Percent,Ttl_Turnovers,Interceptions,Fumbles,Sacks,TTL,Week_1,Week_2,Week_3,Week_4,Week_5
0,Baltimore Ravens,5,76,333,5.0,1675,125,460,3.7,3,192,5.8,128,10.5,1215,6,10,7,70.0,10,3,7,16,63.0,15.0,15.0,1.0,6.0,26.0
1,Pittsburgh Steelers,4,87,240,5.0,1206,77,256,3.3,3,143,5.8,85,12.9,950,8,12,7,58.3,7,6,1,20,36.0,8.0,13.0,7.0,10.0,8.0
2,Indianapolis Colts,5,88,285,4.7,1330,119,432,3.6,1,155,5.4,95,10.2,898,7,12,7,58.3,9,9,0,11,57.0,4.0,15.0,26.0,7.0,5.0
3,Tennessee Titans,4,90,264,6.2,1637,108,593,5.5,3,151,6.7,97,11.2,1044,9,12,10,83.3,9,6,3,5,23.0,3.0,5.0,7.0,10.0,8.0
4,Los Angeles Rams,5,90,314,4.8,1521,113,532,4.7,5,181,4.9,116,9.7,989,5,17,10,58.8,6,4,2,20,38.0,4.0,7.0,4.0,11.0,12.0


In [290]:
defense_df.dtypes

Team                          object
GP                             int64
Ttl_Pts_Allowed                int64
Ttl_Offense_Plays_Allowed      int64
Yds_p_Play                   float64
Ttl_Yds                        int64
Rushing_Att                    int64
Rushing_Yds                    int64
Rushing_Yds_p_Att            float64
Rushing_TDs                    int64
Passing_Att                    int64
Passing_Yds_p_Att            float64
Completions                    int64
Yds_p_Completion             float64
Passing_Yds                    int64
Passing_TDs                    int64
RZ_Att                         int64
RZ_TD                          int64
RZ_Percent                   float64
Ttl_Turnovers                  int64
Interceptions                  int64
Fumbles                        int64
Sacks                          int64
TTL                          float64
Week_1                       float64
Week_2                       float64
Week_3                       float64
W

## Kicking Stats

In [291]:
kicking.head()

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS
0,Rodrigo Blankenship,K,IND,4,14,85.7,44,0-0,3-3,5-6,4-5,0-0,9,100.0,45
1,Randy Bullock,K,CIN,4,13,92.3,50,0-0,2-2,4-5,5-5,1-1,9,100.0,45
2,Ryan Succop,K,TB,5,11,81.8,46,0-0,2-2,5-5,2-3,0-1,15,93.3,41
3,Mason Crosby,K,GB,4,8,100.0,52,0-0,0-0,3-3,4-4,1-1,17,94.1,40
4,Daniel Carlson,K,LV,4,10,90.0,54,0-0,5-5,1-1,0-1,3-3,12,100.0,39


In [292]:
kicking_df = pd.merge(kicking, fantasy_pts, left_on = 'PLAYER', right_on = 'Player')
kicking_df.drop(columns = ['Player', 'Team', 'Position'], inplace = True)
kicking_df

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS,TTL,Week_1,Week_2,Week_3,Week_4,Week_5
0,Rodrigo Blankenship,K,IND,4,14,85.7,44,0-0,3-3,5-6,4-5,0-0,9,100.0,45,60.0,8.0,15.0,12.0,14.0,11.0
1,Randy Bullock,K,CIN,4,13,92.3,50,0-0,2-2,4-5,5-5,1-1,9,100.0,45,55.0,10.0,13.0,12.0,17.0,3.0
2,Ryan Succop,K,TB,5,11,81.8,46,0-0,2-2,5-5,2-3,0-1,15,93.3,41,43.0,5.0,7.0,9.0,8.0,14.0
3,Mason Crosby,K,GB,4,8,100.0,52,0-0,0-0,3-3,4-4,1-1,17,94.1,40,46.0,12.0,11.0,16.0,7.0,
4,Daniel Carlson,K,LV,4,10,90.0,54,0-0,5-5,1-1,0-1,3-3,12,100.0,39,56.0,12.0,12.0,8.0,13.0,11.0
5,Justin Tucker,K,BAL,4,8,100.0,47,0-0,2-2,2-2,4-4,0-0,14,100.0,38,52.0,9.0,16.0,9.0,8.0,10.0
6,Joey Slye,K,CAR,4,11,90.9,47,0-0,5-5,3-3,2-3,0-0,9,77.8,37,50.0,12.0,5.0,15.0,7.0,11.0
7,Wil Lutz,K,NO,4,6,100.0,45,0-0,2-2,3-3,1-1,0-0,15,100.0,33,49.0,10.0,6.0,13.0,5.0,15.0
8,Jason Sanders,K,MIA,4,9,100.0,52,0-0,2-2,1-1,5-5,1-1,6,100.0,33,62.0,4.0,11.0,7.0,18.0,22.0
9,Stephen Gostkowski,K,TEN,3,12,75.0,55,0-0,1-1,3-3,1-4,4-4,7,71.4,32,47.0,4.0,12.0,25.0,,6.0


In [293]:
dash = kicking_df['LNG'][35]
dash

'—'

In [294]:
kicking_df['LNG'].value_counts()

52    4
50    4
47    3
44    3
54    3
—     3
56    2
41    2
46    2
55    2
43    2
49    1
35    1
39    1
45    1
23    1
58    1
Name: LNG, dtype: int64

In [295]:
kicking_df.fillna('0.0', inplace = True)

In [296]:
kick_int = ['GP', 'FGM_A', 'LNG', 'XPM_A', 'PTS']

kick_float = ['FG%', 'XP%', 'TTL', 'Week_1', 'Week_2', 'Week_3', 'Week_4', 'Week_5']

In [297]:
replace_dash(kicking_df, kick_int, dash)

In [298]:
replace_dash(kicking_df, kick_float, dash)

In [299]:
update_int_dtype(kicking_df, kick_int)

In [300]:
adjust_float(kicking_df, kick_float)

In [301]:
update_float_dtype(kicking_df, kick_float)

In [302]:
kicking_df.head()

Unnamed: 0,PLAYER,POSITION,TEAM,GP,FGM_A,FG%,LNG,_1_19,_20_29,_30_39,_40_49,OVER50,XPM_A,XP%,PTS,TTL,Week_1,Week_2,Week_3,Week_4,Week_5
0,Rodrigo Blankenship,K,IND,4,14,85.7,44,0-0,3-3,5-6,4-5,0-0,9,100.0,45,60.0,8.0,15.0,12.0,14.0,11.0
1,Randy Bullock,K,CIN,4,13,92.3,50,0-0,2-2,4-5,5-5,1-1,9,100.0,45,55.0,10.0,13.0,12.0,17.0,3.0
2,Ryan Succop,K,TB,5,11,81.8,46,0-0,2-2,5-5,2-3,0-1,15,93.3,41,43.0,5.0,7.0,9.0,8.0,14.0
3,Mason Crosby,K,GB,4,8,100.0,52,0-0,0-0,3-3,4-4,1-1,17,94.1,40,46.0,12.0,11.0,16.0,7.0,0.0
4,Daniel Carlson,K,LV,4,10,90.0,54,0-0,5-5,1-1,0-1,3-3,12,100.0,39,56.0,12.0,12.0,8.0,13.0,11.0


In [303]:
kicking_df.dtypes

PLAYER       object
POSITION     object
TEAM         object
GP            int64
FGM_A         int64
FG%         float64
LNG           int64
_1_19        object
_20_29       object
_30_39       object
_40_49       object
OVER50       object
XPM_A         int64
XP%         float64
PTS           int64
TTL         float64
Week_1      float64
Week_2      float64
Week_3      float64
Week_4      float64
Week_5      float64
dtype: object

## Pickle Cleaned DataFrames

In [304]:
offense.to_pickle('players')
team_names.to_pickle('long_names')
defense_df.to_pickle('defense')
kicking_df.to_pickle('kicking')