In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [2]:
#Chicago Bears,  Chicago Cubs, Chicago White Sox,  Chicago Blackhawks,  Chicago Fire FC

# URL of the webpage containing the NFL 2018 season data
football = 'https://www.pro-football-reference.com/years/2018/games.htm'
response_football = requests.get(football)
if response_football.status_code == 200:
    soup = BeautifulSoup(response_football.content, 'html.parser')
    table = soup.find('table', {'id': 'games'})
    headers = [th.getText() for th in table.find('thead').findAll('th')]
    rows = []
    for row in table.find('tbody').findAll('tr'):
        rows.append([td.getText() for td in row.findAll('td')])
    nfldf = pd.DataFrame(rows, columns=headers[1:])  # Skipping the first header ('Week') which is an index
    nfldf.dropna(how='all', inplace=True)
    nfldf.reset_index(drop=True, inplace=True)
    
    # Filter rows where Chicago Bears played
    nfldf = nfldf[nfldf['Winner/tie'].str.contains('Chicago Bears', na=False) | nfldf['Loser/tie'].str.contains('Chicago Bears', na=False)]    
else:
    print(f'Failed to retrieve football data: {response.status_code}')

# URL of the webpage containing the MLB 2018 season data
baseball = 'https://www.baseball-reference.com/teams/CHC/2018-schedule-scores.shtml'
response_baseball = requests.get(baseball)
if response_baseball.status_code == 200:
    soup = BeautifulSoup(response_baseball.content, 'html.parser')
    table = soup.find('table', {'id': 'team_schedule'})
    headers = [th.getText() for th in table.find('thead').findAll('th')]
    rows = []
    for row in table.find('tbody').findAll('tr'):
        rows.append([td.getText() for td in row.findAll(['th', 'td'])])
    mlbdf = pd.DataFrame(rows, columns=headers)
    mlbdf["Gm#"] = pd.to_numeric(mlbdf["Gm#"], errors="coerce")
    mlbdf.dropna(subset=["Gm#"], inplace=True)
    mlbdf.reset_index(drop=True, inplace=True)
else:
    print(f'Failed to retrieve baseball data: {response.status_code}')

In [4]:
chicago_bulls_raw = pd.read_csv(r'Chicago_Bulls.csv', index_col=None)
chicago_bulls_raw = chicago_bulls_raw.loc[chicago_bulls_raw['Unnamed: 5'] =='@']

def extract_date_basketball(row):
    date = pd.to_datetime(row['Date'] + ',' +
                          row['Start (ET)'].replace("p", "PM"),
                          format='%a %b %d %Y,%I:%M%p')
    return pd.Series(data={'date': date})
chicago_bulls = pd.DataFrame(
    chicago_bulls_raw.apply(lambda row: extract_date_basketball(row), axis=1))
chicago_bulls.head(1)

Unnamed: 0,date
0,2018-10-18 20:00:00
