In [1]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn import preprocessing
import math

In [2]:
def make_prediction(position, league, year, model):

    train_data = pd.read_csv(league.lower() + '-' + position.lower() + '+2014.csv')
    if year == "2020":
        test_data = pd.read_csv('2020Predictions.csv')
    else:
        test_data = pd.read_csv(year + 'PredictionsAndActual.csv')

    years = [int(year) - 1, int(year) - 2, int(year) - 3]

    prev_year_data = train_data[train_data["Season"] == int(year) - 1]

    train_data = train_data[train_data["Season"].isin(years)]

    train_features = [ "R", "HR", "RBI", "AVG", "OPS"]
    if model == "linear":
        test_features = ["runsPredicted", "HRsPredicted", "RBIsPredicted", "AVGPredicted", "opsPredicted"]
    elif model == "linearSVR":
        test_features = ["linearSVRRuns", "linearSvrHR", "linearSvrRBI", "linearSvrAVG", "linearSvrOPS"]
    elif model == "SVR":
        test_features = ["svrRuns", "SvrHR", "svrRBI", "svrAVG", "svrOPS"]

    gnb = GaussianNB()

    gnb.fit(train_data[train_features].values, train_data["SS"])

    pred = gnb.predict_proba(test_data[test_features])

    players = {}

    max_prob = 0
    for i in range(len(pred)):
        if test_data.iloc[i, 0] in prev_year_data["Name"].values:
            players[test_data.iloc[i, 0]] = pred[i][1]

    players_sorted = sorted(players, key=players.get, reverse=True)

    return players_sorted

In [3]:
def recall(predictions, winners, year, value):
    counter = 0
    for league in winners:
        for position in winners[league]:
            if position == "ofs":
                for name in winners[league][position]['stats'][year]['names']:
                    try:
                        index_elem = predictions[league][position].index(name)
                        if index_elem + 1 < value*3:
                            counter = counter + 1
                    except ValueError:
                        continue
            else:
                try:
                    index_elem = predictions[league][position].index(winners[league][position]['stats'][year]['name'])
                    if index_elem + 1 < value:
                        counter = counter + 1
                except ValueError:
                    continue
    return counter

In [4]:
def ranking(predictions, winners, year):
    accum = 0
    for league in winners:
        for position in winners[league]:
            if position == 'ofs':
                for name in winners[league][position]['stats'][year]['names']:
                    try:
                        index_elem = predictions[league][position].index(name)
                        accum = accum + 1 / (math.ceil((index_elem + 1) / 3))
                    except ValueError:
                        continue
            else:
                try:
                    index_elem = predictions[league][position].index(winners[league][position]['stats'][year]['name'])
                    accum = accum + 1 / (index_elem + 1)
                except ValueError:
                    continue
    return accum

In [5]:
def print_winners(winners, model, year):

    players_c_al = make_prediction("C", "AL", year, model)
    players_c_nl = make_prediction("C", "NL", year, model)
    players_1b_al = make_prediction("1B", "AL", year, model)
    players_1b_nl = make_prediction("1B", "NL", year, model)
    players_2b_al = make_prediction("2B", "AL", year, model)
    players_2b_nl = make_prediction("2B", "NL", year, model)
    players_3b_al = make_prediction("3B", "AL", year, model)
    players_3b_nl = make_prediction("3B", "NL", year, model)
    players_ss_al = make_prediction("SS", "AL", year, model)
    players_ss_nl = make_prediction("SS", "NL", year, model)
    players_of_al = make_prediction("OF", "AL", year, model)
    players_of_nl = make_prediction("OF", "NL", year, model)
    players_dh_al = make_prediction("DH", "AL", year, model)

    predictions = {
        'al': {
            'c': players_c_al,
            '1b': players_1b_al,
            '2b': players_2b_al,
            '3b': players_3b_al,
            'ss': players_ss_al,
            'ofs': players_of_al,
            'dh': players_dh_al
        },
        'nl': {
            'c': players_c_nl,
            '1b': players_1b_nl,
            '2b': players_2b_nl,
            '3b': players_3b_nl,
            'ss': players_ss_nl,
            'ofs': players_of_nl
        }
    }

    print(year + " " + model + '-----------------------------------------')
    print('C')
    print('AL: ' + players_c_al[0] + ', ' + players_c_al[1] + ', ' + players_c_al[2] + ', ' + players_c_al[3] + ', ' + players_c_al[4])
    print('NL: ' + players_c_nl[0] + ', ' + players_c_nl[1] + ', ' + players_c_nl[2] + ', ' + players_c_nl[3] + ', ' + players_c_nl[4])
    print('1B')
    print('AL: ' + players_1b_al[0])
    print(players_1b_al[0:5])
    print('NL: ' + players_1b_nl[0])
    print(players_1b_nl[0:5])
    print('2B')
    print('AL: ' + players_2b_al[0])
    print(players_2b_al[0:5])
    print('NL: ' + players_2b_nl[0])
    print(players_2b_nl[0:5])
    print('3B')
    print('AL: ' + players_3b_al[0])
    print(players_3b_al[0:5])
    print('NL: ' + players_3b_nl[0])
    print(players_3b_nl[0:5])
    print('SS')
    print('AL: ' + players_ss_al[0])
    print(players_ss_al[0:5])
    print('NL: ' + players_ss_nl[0])
    print(players_ss_nl[0:5])
    print('OF')
    print('AL: ' + players_of_al[0] + ', ' + players_of_al[1] + ', ' + players_of_al[2])
    print(players_of_al[0:15])
    print('NL: ' + players_of_nl[0] + ', ' + players_of_nl[1] + ', ' + players_of_nl[2])
    print(players_of_nl[0:15])
    print('DH')
    print('AL: ' + players_dh_al[0])
    print(players_dh_al[0:5])
    
    

    if year != '2020':
        print('Recall')
        recall_1 = recall(predictions, winners, year, 1)
        recall_2 = recall(predictions, winners, year, 2)
        recall_3 = recall(predictions, winners, year, 3)
        recall_4 = recall(predictions, winners, year, 4)
        recall_5 = recall(predictions, winners, year, 5)
        print(recall_1)
        print(recall_2)
        print(recall_3)
        print(recall_4)
        print(recall_5)
        print('Ranking')
        rank = ranking(predictions, winners, year)
        print(rank)

In [6]:
winners = {
    'al': {
        '1b': {
            'file': 'al-1b+2014.csv',
            'stats': {
                '2014': { 'name': 'Jose Abreu'},
                '2015': { 'name': 'Miguel Cabrera'},
                '2016': { 'name': 'Miguel Cabrera'},
                '2017': { 'name': 'Eric Hosmer'},
                '2018': { 'name': 'Jose Abreu'},
                '2019': { 'name': 'Carlos Santana'}
            }
        },
        '2b': {
            'file': 'al-2b+2014.csv',
            'stats': {
                '2014': { 'name': 'Jose Altuve'},
                '2015': { 'name': 'Jose Altuve'},
                '2016': { 'name': 'Jose Altuve'},
                '2017': { 'name': 'Jose Altuve'},
                '2018': { 'name': 'Jose Altuve'},
                '2019': { 'name': 'DJ LeMahieu'}
            }
        },
        '3b': {
            'file': 'al-3b+2014.csv',
            'stats': {
                '2014': { 'name': 'Adrian Beltre'},
                '2015': { 'name': 'Josh Donaldson'},
                '2016': { 'name': 'Josh Donaldson'},
                '2017': { 'name': 'Jose Ramirez'},
                '2018': { 'name': 'Jose Ramirez'},
                '2019': { 'name': 'Alex Bregman'}
            }
        },
        'ss': {
            'file': 'al-ss+2014.csv',
            'stats': {
                '2014': { 'name': 'Alexei Ramirez'},
                '2015': { 'name': 'Xander Bogaerts'},
                '2016': { 'name': 'Xander Bogaerts'},
                '2017': { 'name': 'Francisco Lindor'},
                '2018': { 'name': 'Francisco Lindor'},
                '2019': { 'name': 'Xander Bogaerts'}

            }
        },
        'ofs': {
            'file': 'al-of+2014.csv',
            'stats': {
                '2014': {
                    'names': ['Jose Bautista', 'Mike Trout', 'Michael Brantley']

                },
                '2015': {
                    'names': ['Nelson Cruz', 'Mike Trout', 'J.D. Martinez']

                },
                '2016': {
                    'names': ['Mookie Betts', 'Mike Trout', 'Mark Trumbo']

                },
                '2017': {
                    'names': ['Aaron Judge', 'George Springer', 'Justin Upton']

                },
                '2018': {
                    'names': ['Mookie Betts', 'Mike Trout', 'J.D. Martinez']

                },
                '2019': {
                    'names': ['Mookie Betts', 'Mike Trout', 'George Springer']

                }
            }
        },
        'c': {
            'file': 'al-c+2014.csv',
            'stats': {
                '2014': { 'name': 'Yan Gomes'},
                '2015': { 'name': 'Brian McCann'},
                '2016': { 'name': 'Salvador Perez'},
                '2017': { 'name': 'Gary Sanchez'},
                '2018': { 'name': 'Salvador Perez'},
                '2019': { 'name': 'Mitch Garver'}
            }
        },
        'dh': {
            'file': 'al-dh+2014.csv',
            'stats': {
                '2014': { 'name': 'Victor Martinez'},
                '2015': { 'name': 'Kendrys Morales'},
                '2016': { 'name': 'David Ortiz'},
                '2017': { 'name': 'Nelson Cruz'},
                '2018': { 'name': 'J.D. Martinez'},
                '2019': { 'name': 'Nelson Cruz'}
            }
        }
    },
    'nl': {
        '1b': {
            'file': 'nl-1b+2014.csv',
            'stats': {
                '2014': { 'name': 'Adrian Gonzalez'},
                '2015': { 'name': 'Paul Goldschmidt'},
                '2016': { 'name': 'Anthony Rizzo'},
                '2017': { 'name': 'Paul Goldschmidt'},
                '2018': { 'name': 'Paul Goldschmidt'},
                '2019': { 'name': 'Freddie Freeman'}
            }
        },
        '2b': {
            'file': 'nl-2b+2014.csv',
            'stats': {
                '2014': { 'name': 'Neil Walker'},
                '2015': { 'name': 'Dee Gordon'},
                '2016': { 'name': 'Daniel Murphy'},
                '2017': { 'name': 'Daniel Murphy'},
                '2018': { 'name': 'Javier Baez'},
                '2019': { 'name': 'Ozzie Albies'}
            }
        },
        '3b': {
            'file': 'nl-3b+2014.csv',
            'stats': {
                '2014': { 'name': 'Anthony Rendon'},
                '2015': { 'name': 'Nolan Arenado'},
                '2016': { 'name': 'Nolan Arenado'},
                '2017': { 'name': 'Nolan Arenado'},
                '2018': { 'name': 'Nolan Arenado'},
                '2019': { 'name': 'Anthony Rendon'}
            }
        },
        'ss': {
            'file': 'nl-ss+2014.csv',
            'stats': {
                '2014': { 'name': 'Ian Desmond'},
                '2015': { 'name': 'Brandon Crawford'},
                '2016': { 'name': 'Corey Seager'},
                '2017': { 'name': 'Corey Seager'},
                '2018': { 'name': 'Trevor Story'},
                '2019': { 'name': 'Trevor Story'}
            }
        },
        'ofs': {
            'file': 'nl-of+2014.csv',
            'stats': {
                '2014': {
                    'names': ['Andrew McCutchen', 'Giancarlo Stanton', 'Justin Upton']

                },
                '2015': {
                    'names': ['Andrew McCutchen', 'Bryce Harper', 'Carlos Gonzalez']

                },
                '2016': {
                    'names': ['Christian Yelich', 'Yoenis Cespedes', 'Charlie Blackmon']

                },
                '2017': {
                    'names': ['Marcell Ozuna', 'Giancarlo Stanton', 'Charlie Blackmon']

                },
                '2018': {
                    'names': ['Christian Yelich', 'Nick Markakis', 'David Peralta']

                },
                '2019': {
                    'names': ['Cody Bellinger', 'Christian Yelich', 'Ronald Acuna Jr.']

                }
            }
        },
        'c': {
            'file': 'nl-c+2014.csv',
            'stats': {
                '2014': { 'name': 'Buster Posey'},
                '2015': { 'name': 'Buster Posey'},
                '2016': { 'name': 'Wilson Ramos'},
                '2017': { 'name': 'Buster Posey'},
                '2018': { 'name': 'J.T. Realmuto'},
                '2019': { 'name': 'J.T. Realmuto'}
            }
        }
    }
}

In [7]:
print_winners(winners, "linear", "2017")
print_winners(winners, "linearSVR", "2017")
print_winners(winners, "SVR", "2017")

print_winners(winners, "linear", "2018")
print_winners(winners, "linearSVR", "2018")
print_winners(winners, "SVR", "2018")

print_winners(winners, "linear", "2019")
print_winners(winners, "linearSVR", "2019")
print_winners(winners, "SVR", "2019")

print_winners(winners, "linear", "2020")
print_winners(winners, "linearSVR", "2020")
print_winners(winners, "SVR", "2020")


2017 linear-----------------------------------------
C
AL: Evan Gattis, Russell Martin, Salvador Perez, Brian McCann, Jonathan Lucroy
NL: Willson Contreras, Buster Posey, Jonathan Lucroy, Devin Mesoraco, Yasmani Grandal
1B
AL: Miguel Cabrera
['Miguel Cabrera', 'Edwin Encarnacion', 'Jose Abreu', 'Chris Davis', 'Carlos Santana']
NL: Anthony Rizzo
['Anthony Rizzo', 'Paul Goldschmidt', 'Joey Votto', 'Freddie Freeman', 'Chris Carter']
2B
AL: Jose Altuve
['Jose Altuve', 'Robinson Cano', 'Ian Kinsler', 'Jason Kipnis', 'Dustin Pedroia']
NL: Matt Carpenter
['Matt Carpenter', 'Daniel Murphy', 'Jean Segura', 'Ben Zobrist', 'DJ LeMahieu']
3B
AL: Josh Donaldson
['Josh Donaldson', 'Manny Machado', 'Adrian Beltre', 'Kyle Seager', 'Evan Longoria']
NL: Nolan Arenado
['Nolan Arenado', 'Kris Bryant', 'Matt Carpenter', 'Justin Turner', 'Jonathan Villar']
SS
AL: Xander Bogaerts
['Xander Bogaerts', 'Carlos Correa', 'Manny Machado', 'Francisco Lindor', 'Troy Tulowitzki']
NL: Corey Seager
['Corey Seager', 'Tr

2018 SVR-----------------------------------------
C
AL: Gary Sanchez, Salvador Perez, Mike Zunino, Evan Gattis, Brian McCann
NL: Willson Contreras, Buster Posey, Yasmani Grandal, Austin Barnes, Kurt Suzuki
1B
AL: Jose Abreu
['Jose Abreu', 'Eric Hosmer', 'Carlos Santana', 'Joey Gallo', 'Trey Mancini']
NL: Anthony Rizzo
['Anthony Rizzo', 'Paul Goldschmidt', 'Joey Votto', 'Freddie Freeman', 'Cody Bellinger']
2B
AL: Jose Altuve
['Jose Altuve', 'Robinson Cano', 'Brian Dozier', 'Jonathan Schoop', 'Jose Ramirez']
NL: Daniel Murphy
['Daniel Murphy', 'DJ LeMahieu', 'Brandon Phillips', 'Cesar Hernandez', 'Asdrubal Cabrera']
3B
AL: Josh Donaldson
['Josh Donaldson', 'Jose Ramirez', 'Adrian Beltre', 'Joey Gallo', 'Manny Machado']
NL: Nolan Arenado
['Nolan Arenado', 'Kris Bryant', 'Jake Lamb', 'Anthony Rendon', 'Justin Turner']
SS
AL: Francisco Lindor
['Francisco Lindor', 'Carlos Correa', 'Didi Gregorius', 'Elvis Andrus', 'Xander Bogaerts']
NL: Corey Seager
['Corey Seager', 'Trevor Story', 'Paul DeJ

2020 linearSVR-----------------------------------------
C
AL: Gary Sanchez, Mitch Garver, Robinson Chirinos, Tom Murphy, James McCann
NL: Yasmani Grandal, J.T. Realmuto, Willson Contreras, Wilson Ramos, Carson Kelly
1B
AL: Trey Mancini
['Trey Mancini', 'Carlos Santana', 'Jose Abreu', 'Matt Olson', 'Yuli Gurriel']
NL: Freddie Freeman
['Freddie Freeman', 'Paul Goldschmidt', 'Anthony Rizzo', 'Josh Bell', 'Christian Walker']
2B
AL: Whit Merrifield
['Whit Merrifield', 'DJ LeMahieu', 'Jose Altuve', 'Gleyber Torres', 'Jonathan Villar']
NL: Ketel Marte
['Ketel Marte', 'Ozzie Albies', 'Max Muncy', 'Mike Moustakas', 'Cesar Hernandez']
3B
AL: Alex Bregman
['Alex Bregman', 'Matt Chapman', 'Miguel Andujar', 'Jose Ramirez', 'Rafael Devers']
NL: Nolan Arenado
['Nolan Arenado', 'Eugenio Suarez', 'Anthony Rendon', 'Manny Machado', 'Eduardo Escobar']
SS
AL: Alex Bregman
['Alex Bregman', 'Xander Bogaerts', 'Francisco Lindor', 'Gleyber Torres', 'Marcus Semien']
NL: Trevor Story
['Trevor Story', 'Javier Ba