In [1]:
import datetime
from bs4 import BeautifulSoup
import requests
import collections
import json
import pandas as pd

In [2]:
today = datetime.datetime.now().strftime('%Y%m%d')
after30day = (datetime.datetime.now() + datetime.timedelta(days=30)).strftime('%Y%m%d')

In [3]:
today

'20181115'

In [4]:
def convert_time(date_str):
    def utc_to_local(utc_dt):
        return utc_dt.replace(tzinfo=datetime.timezone.utc).astimezone(tz=None)    
    
    date_str = date_str.replace('T',' ')
    date_str = date_str.replace('Z','')  
    datetime_obj = datetime.datetime.strptime(date_str, "%Y-%m-%d %H:%M")
    return utc_to_local(datetime_obj).strftime('%Y-%m-%d %H:%M')

In [5]:
def extract_info(soup):
    league_match = collections.defaultdict(list)
    all_leagues = soup.findAll(attrs={'class' : 'table-caption'})
    for i in range(len(all_leagues)):
        league = soup.findAll(attrs={'class' : 'table-caption'})[i]
        soup1 = league.next.next
        clubs = []
        for abbr in soup1.findAll(['abbr']):
            clubs.append(abbr['title'])
        
        time = []
        for td in soup1.findAll('td'):
            if 'data-date' in td.attrs:
                time.append(convert_time(td.attrs['data-date']))
                
        for i in range(len(time)):
            league_match[league.text].append([clubs[i*2],clubs[i*2+1],time[i]])
    return league_match

In [6]:
schedule = []
for daydiff in range(1,30):
    day = (datetime.datetime.now() + datetime.timedelta(days=daydiff)).strftime('%Y%m%d')
    r = requests.get("http://www.espn.com/soccer/fixtures/_/date/" + day)
    data = r.text
    soup = BeautifulSoup(data, "html.parser")
    res = extract_info(soup)
    schedule.append(res)

In [7]:
schedule

[defaultdict(list,
             {'International Friendly': [['Switzerland',
                'Qatar',
                '2018-11-14 13:00']],
              'Mexican Ascenso MX': [['U. de G.',
                'FC Juarez',
                '2018-11-14 20:00'],
               ['Dorados de Sinaloa',
                'Mineros de Zacatecas',
                '2018-11-14 22:00']],
              'Brazilian Serie A': [['Vasco da Gama',
                'Atlético-PR',
                '2018-11-14 16:30'],
               ['Bahia', 'Ceará', '2018-11-14 18:00'],
               ['Paraná', 'Atlético-MG', '2018-11-14 18:00'],
               ['Cruzeiro', 'Corinthians', '2018-11-14 18:45'],
               ['Palmeiras', 'Fluminense', '2018-11-14 18:45'],
               ['Sport', 'Vitória', '2018-11-14 18:45']],
              'English EFL Trophy': [['Notts County',
                'Doncaster Rovers',
                '2018-11-14 14:45'],
               ['Southend United', 'Southampton U21', '2018-11-14 14:45']],
 

In [8]:
json.dumps(schedule)

'[{"International Friendly": [["Switzerland", "Qatar", "2018-11-14 13:00"]], "Mexican Ascenso MX": [["U. de G.", "FC Juarez", "2018-11-14 20:00"], ["Dorados de Sinaloa", "Mineros de Zacatecas", "2018-11-14 22:00"]], "Brazilian Serie A": [["Vasco da Gama", "Atl\\u00e9tico-PR", "2018-11-14 16:30"], ["Bahia", "Cear\\u00e1", "2018-11-14 18:00"], ["Paran\\u00e1", "Atl\\u00e9tico-MG", "2018-11-14 18:00"], ["Cruzeiro", "Corinthians", "2018-11-14 18:45"], ["Palmeiras", "Fluminense", "2018-11-14 18:45"], ["Sport", "Vit\\u00f3ria", "2018-11-14 18:45"]], "English EFL Trophy": [["Notts County", "Doncaster Rovers", "2018-11-14 14:45"], ["Southend United", "Southampton U21", "2018-11-14 14:45"]], "Colombian Primera A": [["Independiente Medell\\u00edn", "Bucaramanga", "2018-11-14 20:00"], ["Independiente Santa Fe", "Deportes Tolima", "2018-11-14 20:00"]], "Colombian Primera B": [["C\\u00facuta Deportivo", "Llaneros", "2018-11-14 18:00"], ["Real Cartagena", "Cortulu\\u00e1", "2018-11-14 18:00"]], "Ind

In [9]:
with open('schedule.json', 'w') as outfile:
    json.dump(schedule, outfile)

In [5]:
def extract_info2(soup):
    league_match = []
    all_leagues = soup.findAll(attrs={'class' : 'table-caption'})
    for i in range(len(all_leagues)):
        league = soup.findAll(attrs={'class' : 'table-caption'})[i]
        soup1 = league.next.next
        clubs = []
        for abbr in soup1.findAll(['abbr']):
            clubs.append(abbr['title'])
        
        time = []
        for td in soup1.findAll('td'):
            if 'data-date' in td.attrs:
                time.append(convert_time(td.attrs['data-date']))
                
        for i in range(len(time)):
            league_match.append([league.text,clubs[i*2],clubs[i*2+1],time[i]])
    return league_match

In [6]:
schedule2 = []
for daydiff in range(1,30):
    day = (datetime.datetime.now() + datetime.timedelta(days=daydiff)).strftime('%Y%m%d')
    r = requests.get("http://www.espn.com/soccer/fixtures/_/date/" + day)
    data = r.text
    soup = BeautifulSoup(data, "html.parser")
    res = extract_info2(soup)
    schedule2 += res

In [7]:
schedule2[0]

['UEFA Nations League', 'Cyprus', 'Bulgaria', '2018-11-16 14:45']

In [8]:
schedule2 = pd.DataFrame(schedule2, columns=['premier','home','away','time'])

In [9]:
schedule2.to_csv('schedule.csv')