In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime

## Full game money line

In [None]:
dates = pd.date_range("2024-03-28", "2024-10-30")
omit_dates = ["2024-03-29", "2024-03-30", "2024-04-13", "2024-05-08", "2024-07-15", "2024-07-17", "2024-07-18", "2024-07-27", "2024-07-16",
              "2024-08-26", "2024-10-04", "2024-10-21", "2024-10-22", "2024-10-23", "2024-10-24", "2024-10-27"]
dates = [date for date in dates if date.strftime("%Y-%m-%d") not in omit_dates]

In [None]:
all_data = []

In [None]:
for date in dates:
  date_str = date.strftime("%Y-%m-%d")
  url = f"https://www.sportsbookreview.com/betting-odds/mlb-baseball/?date={date_str}"

  response = requests.get(url)
  soup = BeautifulSoup(response.content, "html.parser")

  section = soup.find("section", id="section-mlb")
  time_divs = section.find_all("div", class_="d-flex text-align-center justify-content-around flex-column ps-1")
  match_times = [div.find("span", class_="fs-9").text.strip() for div in time_divs if div.find("span", class_="fs-9")]
  teams = section.find_all("b")
  team_names = [team.text.strip() for team in teams]

  caesars_divs = section.find_all("a", {"data-aatracker": "Odds Table - Odds Cell CTA - caesars"})
  caesars_odds = []
  for div in caesars_divs:
      spans = div.find_all("span", class_="fs-9")
      for span in spans:
          if span.text.strip().startswith('+') or span.text.strip().startswith('-'):
              caesars_odds.append(span.text.strip())

  data = [[date_str, match_times[i], team_names[2 * i], team_names[2 * i + 1],
           caesars_odds[2 * i], caesars_odds[2 * i + 1]] for i in range(len(match_times))]
  all_data.extend(data)
df = pd.DataFrame(all_data, columns=["Date", "Match Time", "Team 1", "Team 2", "Team 1 Odds", "Team 2 Odds"])
df.head()

Unnamed: 0,Date,Match Time,Team 1,Team 2,Team 1 Odds,Team 2 Odds
0,2024-03-28,3:05 PM EDT,LAA,BAL,158,-189
1,2024-03-28,4:10 PM EDT,MIN,KC,-125,105
2,2024-03-28,4:10 PM EDT,WAS,CIN,130,-154
3,2024-03-28,4:10 PM EDT,SF,SD,-105,-115
4,2024-03-28,4:10 PM EDT,NYY,HOU,130,-154


In [None]:
df.to_csv("full_game_moneyline.csv", index=False)

## Full game total

In [None]:
dates = pd.date_range("2024-03-28", "2024-10-30")
omit_dates = ["2024-03-29", "2024-03-30", "2024-04-13", "2024-05-08", "2024-07-15", "2024-07-17", "2024-07-18", "2024-07-27", "2024-07-16",
              "2024-08-26", "2024-10-04", "2024-10-21", "2024-10-22", "2024-10-23", "2024-10-24", "2024-10-27"]
dates = [date for date in dates if date.strftime("%Y-%m-%d") not in omit_dates]

In [None]:
all_data = []

In [None]:
for date in dates:
  date_str = date.strftime("%Y-%m-%d")
  url = f"https://www.sportsbookreview.com/betting-odds/mlb-baseball/totals/full-game/?date={date_str}"

  response = requests.get(url)
  soup = BeautifulSoup(response.content, "html.parser")

  section = soup.find("section", id="section-mlb")
  time_divs = section.find_all("div", class_="d-flex text-align-center justify-content-around flex-column ps-1")
  match_times = [div.find("span", class_="fs-9").text.strip() for div in time_divs if div.find("span", class_="fs-9")]
  teams = section.find_all("b")
  team_names = [team.text.strip() for team in teams]

  caesars_divs = section.find_all("a", {"data-aatracker": "Odds Table - Odds Cell CTA - caesars"})
  caesars_odds = []
  for div in caesars_divs:
      spans = div.find_all("span")
      for span in spans:
          if span.text.strip().startswith('+') or span.text.strip().startswith('-'):
              caesars_odds.append(span.text.strip())

  caesars_line = []
  for div in caesars_divs:
      spans1 = div.find_all("span", class_="me-1 me-lg-2")
      spans2 = div.find_all("span", class_="me-1 me-lg-2 fs-9")
      for span1, span2 in zip(spans1, spans2):
          caesars_line.append(span1.text.strip())
          caesars_line.append(span2.text.strip())

  data = [[date_str, match_times[i], team_names[2 * i], team_names[2 * i + 1],
           caesars_odds[2 * i], caesars_odds[2 * i + 1], caesars_line[2 * i], caesars_line[2 * i + 1]] for i in range(len(match_times))]
  all_data.extend(data)
df = pd.DataFrame(all_data, columns=["Date", "Match Time", "Team 1", "Team 2", "Team 1 Odds", "Team 2 Odds", "Team 1 line", "Team 2 line"])
df.head()

Unnamed: 0,Date,Match Time,Team 1,Team 2,Team 1 Odds,Team 2 Odds,Team 1 line,Team 2 line
0,2024-03-28,3:05 PM EDT,LAA,BAL,-110,-110,7.5,7.5
1,2024-03-28,4:10 PM EDT,MIN,KC,105,-125,8.0,8.0
2,2024-03-28,4:10 PM EDT,WAS,CIN,-115,-105,9.0,9.0
3,2024-03-28,4:10 PM EDT,SF,SD,-110,-110,7.5,7.5
4,2024-03-28,4:10 PM EDT,NYY,HOU,-120,100,8.5,8.5


In [None]:
df.to_csv("full_game_total.csv", index=False)

## 1st 5 total

In [None]:
dates = pd.date_range("2024-03-30", "2024-10-30")
omit_dates = ["2024-04-02", "2024-04-13", "2024-04-16", "2024-04-24", "2024-05-08", "2024-05-18", "2024-05-19", "2024-05-28", "2024-06-16",
              "2024-07-15", "2024-07-17", "2024-07-18", "2024-07-27", "2024-07-16", "2024-08-01", "2024-08-02", "2024-08-03", "2024-08-04",
              "2024-08-05", "2024-08-06", "2024-08-07", "2024-08-09",
              "2024-08-26", "2024-10-04", "2024-10-21", "2024-10-22", "2024-10-23", "2024-10-24", "2024-10-27"]
dates = [date for date in dates if date.strftime("%Y-%m-%d") not in omit_dates]

In [None]:
all_data = []

In [None]:
for date in dates:
  date_str = date.strftime("%Y-%m-%d")
  url = f"https://www.sportsbookreview.com/betting-odds/mlb-baseball/totals/1st-half/?date={date}"

  response = requests.get(url)
  soup = BeautifulSoup(response.content, "html.parser")

  section = soup.find("section", id="section-mlb")
  time_divs = section.find_all("div", class_="d-flex text-align-center justify-content-around flex-column ps-1")
  match_times = [div.find("span", class_="fs-9").text.strip() for div in time_divs if div.find("span", class_="fs-9")]
  teams = section.find_all("b")
  team_names = [team.text.strip() for team in teams]

  caesars_divs = section.find_all("a", {"data-aatracker": "Odds Table - Odds Cell CTA - caesars"})
  caesars_odds = []
  for div in caesars_divs:
      spans = div.find_all("span")
      for span in spans:
          if span.text.strip().startswith('+') or span.text.strip().startswith('-'):
              caesars_odds.append(span.text.strip())

  caesars_line = []
  for div in caesars_divs:
      spans1 = div.find_all("span", class_="me-1 me-lg-2")
      spans2 = div.find_all("span", class_="me-1 me-lg-2 fs-9")
      for span1, span2 in zip(spans1, spans2):
          caesars_line.append(span1.text.strip())
          caesars_line.append(span2.text.strip())

  data = [[date_str, match_times[i], team_names[2 * i], team_names[2 * i + 1],
           caesars_odds[2 * i], caesars_odds[2 * i + 1], caesars_line[2 * i], caesars_line[2 * i + 1]] for i in range(len(match_times))]
  all_data.extend(data)
df = pd.DataFrame(all_data, columns=["Date", "Match Time", "Team 1", "Team 2", "Team 1 Odds", "Team 2 Odds", "Team 1 line", "Team 2 line"])
df.head()

Unnamed: 0,Date,Match Time,Team 1,Team 2,Team 1 Odds,Team 2 Odds,Team 1 line,Team 2 line
0,2024-03-30,1:40 PM EDT,MIL,NYM,-145,115,7.5,7.5
1,2024-03-30,2:10 PM EDT,DET,CHW,-125,-105,8.5,8.5
2,2024-03-30,4:05 PM EDT,LAA,BAL,-115,-115,4.5,4.5
3,2024-03-30,4:05 PM EDT,ATL,PHI,-130,100,4.0,4.0
4,2024-03-30,4:07 PM EDT,CLE,ATH,110,-141,4.0,4.0


In [None]:
df.to_csv("1st_5_total.csv", index=False)