In [30]:
## import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Lasso
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from math import sqrt
from pybaseball import statcast
from bs4 import BeautifulSoup
import requests
import lxml
import datetime
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By


In [47]:
# functions



# get today's date formatted as yyyy-mm-dd
def get_today_date():
    # get the current date
    today = datetime.date.today()
    # format the date as yyyy-mm-dd
    date = today.strftime('%Y-%m-%d')
    # return the date
    return date

def get_starters():
    driver = webdriver.Chrome()
    # today's date
    date = get_today_date()
    # url for the website
    url = 'https://www.fangraphs.com/livescoreboard.aspx?date=' + date
    # get the website rendered with selenium
    driver.get(url)
    # get the html of the website
    #driver.implicitly_wait(5)
    tables = driver.find_elements(by=By.CLASS_NAME, value='lineup')
    starting = []
    for table in tables:
        row = table.find_elements(by=By.TAG_NAME, value='tbody')[0].find_elements(by=By.TAG_NAME, value='tr')
        for r in row:
            col = r.find_elements(by=By.TAG_NAME, value='td')
            for c in col:
                a = c.find_elements(by=By.TAG_NAME, value='a')
                if len(a) > 0:
                    for i in a:
                        starting.append(i.text)
    return starting

            
def calculate_batting_fantasy_points(row):
  points = 0
  if row['events']=='single' or row['events'] == 'walk':
    points = points + 3
  elif row['events']=='double':
    points = points + 6
  elif row['events']=='triple':
    points = points + 9
  elif row['events']=='home_run':
    points = points + 12

  points = points + 3 * (row['post_bat_score']- row['bat_score'])

  return points

def calculate_pitching_fantasy_points(row):
  points = 0
  if row['events']=='srikeout':
    points = points + 2.75
  elif row['events']=='field_out':
    points = points + 0.75
  else:
    points = points - .6

  points = points - 2 * (row['post_bat_score']- row['bat_score'])
  return points

def total_ball_movement(row):
  return sqrt(row['pfx_x']**2 + row['pfx_z']**2)

def batter_adjusted_plate_x(row):
  if row['stand'] == 'R':
    return row['plate_x']
  else:
    return -row['plate_x']

    



In [50]:
# get starters and convert starters to csv
starting = get_starters()
starting_df = pd.DataFrame(starting)
starting_df.to_csv('C:\\Users\\jorda\\Downloads\\starting_lineups.csv', index=False)
starting_df

Unnamed: 0,0
0,Austin Gomber
1,Ranger Suarez
2,Charlie Blackmon
3,Ezequiel Tovar
4,Ryan McMahon
5,Elias Diaz
6,Nolan Jones
7,Elehuris Montero
8,Brenton Doyle
9,Michael Toglia


In [None]:
# get base df

# Get all Statcast data from 2024
df = statcast(start_dt='2024-03-28', end_dt=get_today_date())

# Only include events
df = df.dropna(subset=['events']).dropna(subset=['pitch_type'])


df['pitcher_fp'] = df.apply(lambda row: calculate_pitching_fantasy_points(row), axis=1)
df['batter_fp'] = df.apply(lambda row: calculate_batting_fantasy_points(row), axis=1)
df['total_ball_movement'] = df.apply(lambda row: total_ball_movement(row), axis=1)
df['batter_adjusted_plate_x'] = df.apply(lambda row: batter_adjusted_plate_x(row), axis=1)



In [49]:
# get mean pitch coordinates
mean_pitch_coordinates = df.groupby(['pitcher', 'player_name'])[['plate_x', 'plate_z', 'batter_adjusted_plate_x']].mean()
mean_pitch_coordinates.rename(columns={'plate_x':'mean_pitcher_plate_x', 'plate_z':'mean_pitcher_plate_z', 'batter_adjusted_plate_x':'mean_pitcher_batter_adjusted_plate_x'}, inplace=True)
mean_pitch_coordinates.to_csv("C:\\Users\\jorda\\Downloads\\pitch_coordinates.csv")
mean_pitch_coordinates.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_pitcher_plate_x,mean_pitcher_plate_z,mean_pitcher_batter_adjusted_plate_x
pitcher,player_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
445276,"Jansen, Kenley",-0.318148,2.781111,0.158148
445926,"Chavez, Jesse",0.271515,2.451515,0.464848
450203,"Morton, Charlie",0.213099,2.18493,0.117324
455119,"Martin, Chris",-0.12931,2.58931,-0.234138
458677,"Wilson, Justin",-0.165,2.4675,0.135
