In [39]:
# Test out cleaning with fantasy_1970.csv

# Import the necessary libraries
import pandas as pd
import numpy as np
import os

# Set directory to 'csv files' subfolder of current working directory
# paste the path to the 'csv files' folder in the os.chdir() function
# if the 'csv files' folder is not in the current working directory
if 'csv files' not in os.getcwd():
    os.chdir(f'{os.getcwd()}/csv files')

dict_rename = {'G': 'games', 'GS': 'games_started', 'Tm': 'team', 'FantPos': 'position', 
'FantPt': 'fantasy_points', 'PPR': 'ppr_points', 'DKPt': 'draft_kings_points', 
'FDPt': 'fan_duel_points', 'VBD': 'value_based_draft', 'PosRank': 'position_rank', 
'OvRank': 'overall_rank', 'Cmp': 'passing_completions', 'Att': 'passing_attempts', 'Yds': 'passing_yards', 
'TD': 'passing_touchdowns', 'Int': 'interceptions', 'Att.1': 'rushing_attempts', 'Yds.1': 'rushing_yards', 
'Y/A': 'yards_per_attempt', 'TD.1': 'rushing_touchdowns', 'Tgt': 'targets', 'Rec': 'receptions', 
'Yds.2': 'receiving_yards', 'Y/R': 'yards_per_reception', 'TD.2': 'receiving_touchdowns', 
'Fmb': 'fumbles', 'FL': 'fumbles_lost', 'TD.3': 'total_touchdowns', '2PM': 'two_point_conversions_made', 
'2PP': 'two_point_conversion_attempts', 'Player': 'player', 'Age': 'age'}

def clean_data(df):
    # Drop rows with all NaN values
    df = df.dropna(axis=0, how='all')
    # Rename columns
    df = df.rename(columns=dict_rename)
    # Fill NaN values with 0
    df = df.fillna(0)
    # Convert columns
    df = df.astype({'games': int, 'games_started': int, 'passing_completions': int, 'passing_attempts': int, 
    'passing_yards': int, 'passing_touchdowns': int, 'interceptions': int, 'rushing_attempts': int, 
    'rushing_yards': int, 'rushing_touchdowns': int, 'receptions': int, 'receiving_yards': int, 
    'receiving_touchdowns': int, 'fumbles': int, 'fumbles_lost': int, 'total_touchdowns': int, 
    'yards_per_attempt': float, 'yards_per_reception': float, 
    'team': str, 'position': str, 'player': str})
    # Remove non-alphanumeric characters from player names
    df['player'] = df['player'].str.replace(r'[^\w\s]', '', regex=True)
    return df

# For columns in directory starting with 'fantasy_', rename the columns and save the cleaned data
for file in os.listdir():
    if file.startswith('fantasy_'):
        df = pd.read_csv(file)
        cleaned_df = clean_data(df)
        cleaned_df.to_csv(f'cleaned_{file}', index=False)


Cleaning to do: remove empty rows, remove non-alphanumeric characters from player column.