# Packages

In [40]:
import pandas as pd
import numpy as np

# Load Data Function

In [41]:
def load_data():
    
    # path
    folder = '../data/'
    file = 'ssl_stats_raw.pkl'
    path = folder + file
    
    # read with pandas
    return pd.read_pickle(path)

# Data Cleaning Functions

## Drop NA Rows and Cols

In [42]:
def drop_row_col(df):
    
    # save headers from row index 8
    new_headers = df.iloc[8].tolist()[1:]
    
    # remove empty rows head and tail 
    head_rm = np.arange(0, 9).tolist()
    tail_rm = np.arange(167, 171).tolist() 
    idx_rm = head_rm + tail_rm
    
    df = df.drop(labels = idx_rm, axis = 0)
    
    # set new headers
    df = df.drop(labels = df.columns[0], axis = 1)
    
    df.columns = new_headers

    return df

## Change Names

In [43]:
def name_changer(df):
    
    # fill nas in last
    df.Last = df.Last.fillna('')
    
    # fill nas in first
    df.First = df.First.fillna('')
    
    # comine first and last name
    df['First'] = df.First + ' ' + df.Last
        
    # drop last name columns
    df = df.drop(columns = 'Last')

    df = df.reset_index()
    
    # rename name column
    return df.rename(columns = {'First': 'Name'})

## Drop Cols 

In [44]:
def col_dropper(df):
    
    # set cols to drop
    drop_cols = {'index', 'Number'}

    # drop cols
    return df.drop(columns = drop_cols)

## Title Case

In [45]:
def title_case(df):
    
    lst = []
    
    for n in df.Name:
        n = n.title()
        lst.append(n)
    
    df.Name = lst
    
    return df

## Strip Whitespace

In [46]:
def stripper(df):
    
    lst0 = []
    
    for n in df.Name:
        n = n.lstrip()
        lst0.append(n)
        
    lst1 = []
    
    for n in lst0:
        n = n.rstrip()
        lst1.append(n)
    
    df.Name = lst1
    
    return df

## Convert Dtypes

In [47]:
def dtype_converter(df):
    
    df = df.infer_objects()
    
    return df

In [48]:
def sorter(df):
    
    return df.sort_values(by = ['Team', 'Name'])

# Final Pipeline

drop_row_col
name_changer
col_dropper
title_case
stripper
dtype_converter

In [49]:
df_final = (load_data()
            .pipe(drop_row_col)
            .pipe(name_changer)
            .pipe(col_dropper)
            .pipe(title_case)
            .pipe(stripper)
            .pipe(dtype_converter)
            .pipe(sorter)
           )

# Export Data

In [50]:
export_folder = '../data/'

export_file = 'ssl_stats_clean.pkl'

export_path = export_folder + export_file

df_final.to_pickle(export_path)