# Compiling different sources of sports data

In [24]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import json

## Chicago Bears

In [7]:
# URL of the webpage containing the NFL 2018 season data
football = 'https://www.pro-football-reference.com/years/2018/games.htm'
response_football = requests.get(football)
if response_football.status_code == 200:
    soup = BeautifulSoup(response_football.content, 'html.parser')
    table = soup.find('table', {'id': 'games'})
    headers = [th.getText() for th in table.find('thead').findAll('th')]
    rows = []
    for row in table.find('tbody').findAll('tr'):
        rows.append([td.getText() for td in row.findAll('td')])
    nfldf = pd.DataFrame(rows, columns=headers[1:])  # Skipping the first header ('Week') which is an index
    nfldf.dropna(how='all', inplace=True)
    nfldf.reset_index(drop=True, inplace=True)
    
    # Filter rows where Chicago Bears played
    nfldf = nfldf[nfldf['Winner/tie'].str.contains('Chicago Bears', na=False) | nfldf['Loser/tie'].str.contains('Chicago Bears', na=False)]    
else:
    print(f'Failed to retrieve football data: {response.status_code}')

nfldf.head()

Unnamed: 0,Day,Date,Time,Winner/tie,Unnamed: 5,Loser/tie,Unnamed: 7,Pts,Pts.1,YdsW,TOW,YdsL,TOL
13,Sun,2018-09-09,8:20PM,Green Bay Packers,,Chicago Bears,boxscore,24,23,370,2,294,1
31,Mon,2018-09-17,8:15PM,Chicago Bears,,Seattle Seahawks,boxscore,24,17,271,2,276,2
44,Sun,2018-09-23,4:25PM,Chicago Bears,@,Arizona Cardinals,boxscore,16,14,316,2,221,4
51,Sun,2018-09-30,1:00PM,Chicago Bears,,Tampa Bay Buccaneers,boxscore,48,10,483,0,311,3
80,Sun,2018-10-14,1:00PM,Miami Dolphins,,Chicago Bears,boxscore,31,28,541,3,467,3


## Chicago Cubs

In [8]:
# URL of the webpage containing the MLB 2018 season data
baseball = 'https://www.baseball-reference.com/teams/CHC/2018-schedule-scores.shtml'
response_baseball = requests.get(baseball)
if response_baseball.status_code == 200:
    soup = BeautifulSoup(response_baseball.content, 'html.parser')
    table = soup.find('table', {'id': 'team_schedule'})
    headers = [th.getText() for th in table.find('thead').findAll('th')]
    rows = []
    for row in table.find('tbody').findAll('tr'):
        rows.append([td.getText() for td in row.findAll(['th', 'td'])])
    mlbdf = pd.DataFrame(rows, columns=headers)
    mlbdf["Gm#"] = pd.to_numeric(mlbdf["Gm#"], errors="coerce")
    mlbdf.dropna(subset=["Gm#"], inplace=True)
    mlbdf.reset_index(drop=True, inplace=True)
else:
    print(f'Failed to retrieve baseball data: {response.status_code}')
    
mlbdf.head()

Unnamed: 0,Gm#,Date,Unnamed: 3,Tm,Unnamed: 5,Opp,W/L,R,RA,Inn,...,GB,Win,Loss,Save,Time,D/N,Attendance,cLI,Streak,Orig. Scheduled
0,1.0,"Thursday, Mar 29",boxscore,CHC,@,MIA,W,8,4,,...,Tied,Cishek,Ureña,,3:18,D,32151,0.98,+,
1,2.0,"Friday, Mar 30",boxscore,CHC,@,MIA,L-wo,1,2,17.0,...,1.0,Despaigne,Butler,,5:18,N,12034,0.98,-,
2,3.0,"Saturday, Mar 31",boxscore,CHC,@,MIA,W,10,6,10.0,...,1.0,Strop,Ziegler,,3:57,N,13422,0.95,+,
3,4.0,"Sunday, Apr 1",boxscore,CHC,@,MIA,L,0,6,,...,1.5,Peters,Quintana,,2:36,D,10428,1.0,-,
4,5.0,"Monday, Apr 2",boxscore,CHC,@,CIN,L,0,1,,...,2.5,Mahle,Chatwood,Iglesias,2:33,D,18963,1.03,--,


## Chicago White Sox

## Chicago Blackhawks

In [36]:
# URL of the API containing the NHL 2017/2018 season data for the Chicago Blackhawks
hockey = "https://api-web.nhle.com/v1/club-schedule-season/CHI/20172018"

# Send a GET request to the API
response_hockey = requests.get(hockey)
data = response_hockey.json()

nhldf = pd.DataFrame(data["games"])
nhldf
#ES MUSS BEI ALLEN ZEITEN NOCH DER OFFSET ABGEZOGEN WERDEN !!! -5 bei chicago

Unnamed: 0,id,season,gameType,gameDate,venue,neutralSite,startTimeUTC,easternUTCOffset,venueUTCOffset,venueTimezone,gameState,gameScheduleState,tvBroadcasts,awayTeam,homeTeam,periodDescriptor,gameOutcome,gameCenterLink
0,2017010015,20172018,1,2017-09-19,{'default': 'Nationwide Arena'},False,2017-09-19T23:00:00Z,-04:00,-04:00,US/Eastern,FINAL,OK,"[{'id': 107, 'market': 'N', 'countryCode': 'US...","{'id': 16, 'placeName': {'default': 'Chicago'}...","{'id': 29, 'placeName': {'default': 'Columbus'...","{'periodType': 'REG', 'maxRegulationPeriods': 3}",{'lastPeriodType': 'REG'},/gamecenter/chi-vs-cbj/2017/09/19/2017010015
1,2017010034,20172018,1,2017-09-21,{'default': 'United Center'},False,2017-09-22T00:30:00Z,-04:00,-05:00,America/Chicago,FINAL,OK,"[{'id': 312, 'market': 'N', 'countryCode': 'US...","{'id': 17, 'placeName': {'default': 'Detroit'}...","{'id': 16, 'placeName': {'default': 'Chicago'}...","{'periodType': 'REG', 'maxRegulationPeriods': 3}",{'lastPeriodType': 'REG'},/gamecenter/det-vs-chi/2017/09/21/2017010034
2,2017010051,20172018,1,2017-09-23,{'default': 'United Center'},False,2017-09-24T00:30:00Z,-04:00,-05:00,America/Chicago,FINAL,OK,"[{'id': 312, 'market': 'N', 'countryCode': 'US...","{'id': 29, 'placeName': {'default': 'Columbus'...","{'id': 16, 'placeName': {'default': 'Chicago'}...","{'periodType': 'REG', 'maxRegulationPeriods': 3}",{'lastPeriodType': 'REG'},/gamecenter/cbj-vs-chi/2017/09/23/2017010051
3,2017010067,20172018,1,2017-09-25,{'default': 'TD Garden'},False,2017-09-25T23:00:00Z,-04:00,-04:00,US/Eastern,FINAL,OK,"[{'id': 128, 'market': 'H', 'countryCode': 'US...","{'id': 16, 'placeName': {'default': 'Chicago'}...","{'id': 6, 'placeName': {'default': 'Boston'}, ...","{'periodType': 'REG', 'maxRegulationPeriods': 3}",{'lastPeriodType': 'REG'},/gamecenter/chi-vs-bos/2017/09/25/2017010067
4,2017010087,20172018,1,2017-09-28,{'default': 'Little Caesars Arena'},False,2017-09-28T23:30:00Z,-04:00,-04:00,America/Detroit,FINAL,OK,"[{'id': 107, 'market': 'N', 'countryCode': 'US...","{'id': 16, 'placeName': {'default': 'Chicago'}...","{'id': 17, 'placeName': {'default': 'Detroit'}...","{'periodType': 'REG', 'maxRegulationPeriods': 3}",{'lastPeriodType': 'REG'},/gamecenter/chi-vs-det/2017/09/28/2017010087
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,2017021196,20172018,2,2018-03-29,{'default': 'United Center'},False,2018-03-30T00:30:00Z,-04:00,-05:00,America/Chicago,OFF,OK,"[{'id': 292, 'market': 'A', 'countryCode': 'CA...","{'id': 52, 'placeName': {'default': 'Winnipeg'...","{'id': 16, 'placeName': {'default': 'Chicago'}...","{'periodType': 'REG', 'maxRegulationPeriods': 3}",{'lastPeriodType': 'REG'},/gamecenter/wpg-vs-chi/2018/03/29/2017021196
84,2017021203,20172018,2,2018-03-30,{'default': 'Pepsi Center'},False,2018-03-31T01:00:00Z,-04:00,-06:00,America/Denver,OFF,OK,"[{'id': 47, 'market': 'H', 'countryCode': 'US'...","{'id': 16, 'placeName': {'default': 'Chicago'}...","{'id': 21, 'placeName': {'default': 'Colorado'...","{'periodType': 'REG', 'maxRegulationPeriods': 3}",{'lastPeriodType': 'REG'},/gamecenter/chi-vs-col/2018/03/30/2017021203
85,2017021239,20172018,2,2018-04-04,{'default': 'Scottrade Center'},False,2018-04-05T00:00:00Z,-04:00,-05:00,US/Central,OFF,OK,"[{'id': 241, 'market': 'N', 'countryCode': 'US...","{'id': 16, 'placeName': {'default': 'Chicago'}...","{'id': 19, 'placeName': {'default': 'St. Louis...","{'periodType': 'REG', 'maxRegulationPeriods': 3}",{'lastPeriodType': 'REG'},/gamecenter/chi-vs-stl/2018/04/04/2017021239
86,2017021255,20172018,2,2018-04-06,{'default': 'United Center'},False,2018-04-07T00:30:00Z,-04:00,-05:00,America/Chicago,OFF,OK,"[{'id': 13, 'market': 'A', 'countryCode': 'US'...","{'id': 19, 'placeName': {'default': 'St. Louis...","{'id': 16, 'placeName': {'default': 'Chicago'}...","{'periodType': 'REG', 'maxRegulationPeriods': 3}",{'lastPeriodType': 'REG'},/gamecenter/stl-vs-chi/2018/04/06/2017021255


## Chicago Fire FC

## Chicago Bulls

In [9]:
chicago_bulls_raw = pd.read_csv(r'Chicago_Bulls.csv', index_col=None)
chicago_bulls_raw = chicago_bulls_raw.loc[chicago_bulls_raw['Unnamed: 5'] =='@']

def extract_date_basketball(row):
    date = pd.to_datetime(row['Date'] + ',' +
                          row['Start (ET)'].replace("p", "PM"),
                          format='%a %b %d %Y,%I:%M%p')
    return pd.Series(data={'date': date})
chicago_bulls = pd.DataFrame(
    chicago_bulls_raw.apply(lambda row: extract_date_basketball(row), axis=1))
chicago_bulls.head()

Unnamed: 0,date
0,2018-10-18 20:00:00
2,2018-10-22 20:30:00
4,2018-10-26 19:00:00
5,2018-10-27 19:30:00
10,2018-11-05 19:30:00
