In [60]:
import csv
import os
import pandas as pd
import mysql.connector

mydb = mysql.connector.connect(
  host="localhost",
  user="root",
  passwd="",
  database="NRL_data"
)
mycursor = mydb.cursor()

In [68]:
def clean_and_insert_csv_into_db(file):
    position_id_mapping = {
    'Fullback': 1,
    'Winger': 2,
    'Centre': 3,
    'Five-Eighth': 4,
    'Halfback': 5,
    'Prop': 6,
    'Hooker': 7,
    '2nd Row': 8,
    'Lock': 9,
    'Interchange': 10
    }
    
    stat_df = pd.read_csv(file, index_col='index').drop(columns='Unnamed: 0')
    stat_df['player_id'] = stat_df['player_id'].astype('int')
    stat_df['position_id'] = stat_df['position'].map(position_id_mapping)
    try:
        stat_df['average_play_the_ball_seconds'] = stat_df['average_play_the_ball_seconds'].str.replace('s', '').astype('float')
    except:
        stat_df['average_play_the_ball_seconds'] = stat_df['average_play_the_ball_seconds'].astype('float')
    
    for column in ['conversion_percentage', 'tackle_percentage']:
        try:
            stat_df[column] = stat_df[column].str.replace('%', '').astype('float') / 100
            stat_df[column] = stat_df[column].round(3)
        except:
            stat_df[column] = 0
        
    for column in ['minutes_played', 'stint_one', 'stint_two']:
        stat_df[column] = stat_df[column].apply(lambda x: convert_time_columns_to_numeric(x))
    print(stat_df)
    for row in stat_df.iterrows():
        insert_stats_into_db(row)

#Convert columns from [minutes : seconds] format to a float
def convert_time_columns_to_numeric(x):
    if type(x) == int:
        return x
    else:
        x = x.split(':')
        minutes = x[0]
        try:
            seconds = x[1]
        except:
            seconds = 0
        return round(((int(minutes) * 60) + int(seconds)) / 60, 2)
        
    
def insert_stats_into_db(row):
    insert_query = '''INSERT INTO PlayerMatchStats (match_id, player_id, team_id, position_id, minutes_played,
        points, tries, conversions, conversion_attempts, penalty_goals,
        conversion_percentage, field_goals, total_runs, total_run_metres, kick_return_metres,
        post_contact_metres, line_breaks, line_break_assists, try_assists, line_engaged_runs,
        tackle_breaks, hit_ups, play_the_ball, average_play_ball_seconds, dummy_half_runs,
        dummy_half_run_metres, steals, offloads, dummy_passes, passes,
        receipts, pass_to_run_ratio, tackle_percentage, tackles_made, tackles_missed, ineffective_tackles,
        intercepts, kicks_defused, kicks, kicking_metres, forced_drop_outs,
        bomb_kicks, grubbers, fourty_twenty, cross_field_kicks, kicked_dead,
        errors, handling_errors, one_on_ones_lost, penalties, on_report,
        sin_bins, send_offs, stint_one, stint_two)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
        %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
        %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
        '''
    r = row[1]
    insert_tuple = (r['match_id'],
        r['player_id'],
        r['team_id'],
        r['position_id'],
        r['minutes_played'],
        r['points'],
        r['tries'],
        r['conversions'],
        r['conversion_attempts'],
        r['penalty_goals'],
        r['conversion_percentage'],
        r['field_goals'],
        r['total_runs'],
        r['total_run_metres'],
        r['kick_return_metres'],
        r['post_contact_metres'],
        r['line_breaks'],
        r['line_break_assists'],
        r['try_assists'],
        r['line_engaged_runs'],
        r['tackle_breaks'],
        r['hit_ups'],
        r['play_the_ball'],
        r['average_play_the_ball_seconds'],
        r['dummy_half_runs'],
        r['dummy_half_run_metres'],
        r['steals'],
        r['offloads'],
        r['dummy_passes'],
        r['passes'],
        r['receipts'],
        r['pass_to_run_ratio'],
        r['tackle_percentage'],
        r['tackles_made'],
        r['tackles_missed'],
        r['ineffective_tackles'],
        r['intercepts'],
        r['kicks_defused'],
        r['kicks'],
        r['kicking_metres'],
        r['forced_drop_outs'],
        r['bomb_kicks'],
        r['grubbers'],
        r['fourty_twenty'],
        r['cross_field_kicks'],
        r['kicked_dead'],
        r['errors'],
        r['handling_errors'],
        r['one_on_ones_lost'],
        r['penalties'],
        r['on_report'],
        r['sin_bins'],
        r['send_offs'],
        r['stint_one'],
        r['stint_two'])
    print(row[0])
    print(insert_tuple)
    try:
        mycursor.execute(insert_query, insert_tuple)
        mydb.commit()
        print('success: ' + str(row[0]))
    except:
        print('error: ' + str(row[0]) + ' match_id = ' + str(r['match_id']))

In [69]:
#Loop through all stat files in ./csv_files
for root, dirs, files in os.walk('./csv_files'):
    for year in dirs:
        if len(year) == 4:
            for root, dirs, files in os.walk('./csv_files/' + str(year)):
                for month in dirs:
                    for root, dirs, files in os.walk('./csv_files/' + str(year) + '/' + str(month)):
                        for file in files:
                            path = './csv_files/' + str(year) + '/' + str(month) + '/' + file
                            print(path)
                            clean_and_insert_csv_into_db(path)
                            
                            #Move uploaded data into an archive folder
                            archive_year = './csv_files/archive/' + str(year)
                            if not os.path.exists(archive_year):
                                os.mkdir(archive_year)
                            os.rename(path, archive_year + '/' + file)
                            print('success moving to ' + archive_year + '/' + file)

./csv_files/2013/9/2013_9_MatchID_1469.csv
                         player_id  team_id  match_id  number     position  \
index                                                                        
Anthony_Minichiello_15        1672       15      1469       1     Fullback   
Daniel_Tupou_15                431       15      1469       2       Winger   
Michael_Jennings_15           1673       15      1469       3       Centre   
Shaun_Kenny-dowall_15         1674       15      1469       4       Centre   
Roger_Tuivasa-sheck_15        1675       15      1469       5       Winger   
James_Maloney_15              1676       15      1469       6  Five-Eighth   
Mitchell_Pearce_15            1677       15      1469       7     Halfback   
Luke_O'donnell_15             1893       15      1469      18         Prop   
Jake_Friend_15                 413       15      1469       9       Hooker   
Sam_Moa_15                    1678       15      1469      10         Prop   
Aidan_Guerra_15      

                        player_id  team_id  match_id  number     position  \
index                                                                       
Michael_Gordon_4             1828        4      1468       1     Fullback   
Sosaia_Feki_4                  97        4      1468       2       Winger   
Ben_Pomeroy_4                1925        4      1468       3       Centre   
Jonathan_Wright_4            1829        4      1468       4       Centre   
Beau_Ryan_4                  1831        4      1468       5       Winger   
Todd_Carney_4                1832        4      1468       6  Five-Eighth   
Jeff_Robson_4                1833        4      1468       7     Halfback   
Andrew_Fifita_4               103        4      1468       8         Prop   
John_Morris_4                1838        4      1468       9       Hooker   
Sam_Tagataese_4              1906        4      1468      10         Prop   
Luke_Lewis_4                  582        4      1468      11      2nd Row   

success: Jayden_Hodges_10
Tariq_Sims_10
(1468, 1775, 10, 10, 18.0, 0, 0, 0, 0, 0, 0.0, 0, 5, 42, 0, 13, 0, 0, 0, 0, 1, 5, 0, 0.0, 0, 0, 0, 1, 0, 0, 6, 0.0, 1.0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18.0, 0)
success: Tariq_Sims_10
Scott_Bolton_10
(1468, 276, 10, 10, 41.0, 0, 0, 0, 0, 0, 0.0, 0, 13, 118, 18, 47, 0, 0, 0, 0, 0, 12, 0, 0.0, 0, 0, 0, 1, 0, 0, 13, 0.0, 0.933, 28, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41.0, 0)
success: Scott_Bolton_10
Jason_Taumalolo_10
(1468, 266, 10, 10, 42.0, 4, 1, 0, 0, 0, 0.0, 0, 12, 142, 0, 52, 1, 0, 0, 0, 11, 12, 0, 0.0, 0, 0, 0, 0, 0, 2, 13, 0.17, 0.8, 12, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42.0, 0)
success: Jason_Taumalolo_10
success moving to ./csv_files/archive/2013/2013_9_MatchID_1468.csv
./csv_files/2013/10/2013_10_MatchID_1475.csv
                            player_id  team_id  match_id  number     position  \
index                                                                    

success: Brent_Kite_6
Anthony_Watmough_6
(1475, 1716, 6, 8, 76.0, 0, 0, 0, 0, 0, 0.0, 0, 14, 112, 0, 30, 0, 0, 0, 0, 6, 12, 0, 0.0, 1, 6, 0, 3, 2, 2, 16, 0.14, 0.902, 37, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76.0, 0)
success: Anthony_Watmough_6
Justin_Horo_6
(1475, 1717, 6, 8, 77.0, 0, 0, 0, 0, 0, 0.0, 0, 8, 83, 0, 37, 0, 0, 0, 0, 2, 8, 0, 0.0, 0, 0, 0, 0, 0, 1, 9, 0.13, 0.806, 25, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 77.0, 0)
success: Justin_Horo_6
Glenn_Stewart_6
(1475, 1963, 6, 9, 80.0, 0, 0, 0, 0, 0, 0.0, 0, 10, 65, 0, 12, 0, 0, 0, 0, 0, 5, 0, 0.0, 0, 0, 0, 1, 0, 15, 21, 1.5, 0.833, 30, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80.0, 0)
success: Glenn_Stewart_6
David_Gower_6
(1475, 1721, 6, 10, 3.0, 0, 0, 0, 0, 0, 0.0, 0, 1, 15, 0, 6, 0, 0, 0, 0, 0, 1, 0, 0.0, 0, 0, 0, 0, 0, 0, 1, 0.0, 1.0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.0, 0)
success: David_Gower_6
Jamie_Buhrer_6
(1475, 1718, 6, 10, 41.0, 0, 0, 0

In [46]:
mycursor.close()

True