# Elo-Crawler

### First download the Elo list for one day and each season starting from 2005

In [48]:
import requests
import pandas as pd
import os
PATH_ELO_DATES = 'data/elo/elo-date/'
PATH_ELO_CLUBS = 'data/elo/elo-clubs'

In [17]:
elo_dates = [
    '2005-01-01',
    '2006-01-01',
    '2007-01-01',
    '2008-01-01',
    '2009-01-01',
    '2010-01-01',
    '2011-01-01',
    '2012-01-01',
    '2013-01-01',
    '2014-01-01',
    '2015-01-01',
    '2016-01-01',
    '2017-01-01',
    '2018-01-01',
    '2019-01-01',
    '2020-01-01',
    '2021-01-01',
    '2022-01-01',
    '2023-01-01',
]

In [28]:
for date in elo_dates:
    base = f'http://api.clubelo.com/{date}'
    resp = requests.get(base)
    if resp.status_code == 200:
        with open(f'{PATH_ELO_DATES}{date}.csv', 'wb') as output_file:
            output_file.write(resp.content)
    else:
         print(f'Request failed for {base}')

### Collect all club names from selected country

In [50]:
country = 'GER' # <= select your country here

In [42]:
dataframes = []
club_names = []
for filename in os.listdir(PATH_ELO_DATES):
    if filename.endswith('.csv'):
        df = pd.read_csv(os.path.join(PATH_ELO_DATES, filename))
        dataframes.append(df)

for tmp_df in dataframes:
    tmp_df = tmp_df[tmp_df['Country'] == country]
    for club in tmp_df['Club']:
        if club not in club_names:
            club_names.append(club)

In [45]:
print(f'Number of clubs {len(club_names)}')
print(club_names)

Number of clubs 60
['Bayern', 'Werder', 'Stuttgart', 'Leverkusen', 'Schalke', 'Hertha', 'Hamburg', 'Dortmund', 'Hannover', 'Wolfsburg', 'Bochum', 'Nuernberg', 'Lautern', 'Gladbach', 'Bielefeld', 'Mainz', 'Rostock', 'Aachen', 'Koeln', 'Duisburg', 'Frankfurt', 'Fuerth', 'Freiburg', 'Muenchen 60', 'Cottbus', 'Aue', 'Burghausen', 'Ahlen', 'Trier', 'Saarbruecken', 'Oberhausen', 'Karlsruhe', 'Unterhaching', 'Erfurt', 'Dresden', 'Essen', 'Paderborn', 'Braunschweig', 'Siegen', 'Offenbach', 'Augsburg', 'Koblenz', 'Jena', 'Wehen', 'Hoffenheim', 'St Pauli', 'Osnabrueck', 'Ingolstadt', 'FSV Frankfurt', 'Duesseldorf', 'Union Berlin', 'Aalen', 'Sandhausen', 'Regensburg', 'Darmstadt', 'RB Leipzig', 'Heidenheim', 'Wuerzburg', 'Holstein', 'Magdeburg']


### Download elo score list for all clubs from selected country

In [54]:
dir_path = f'{PATH_ELO_CLUBS}/{country}'

In [58]:
if not os.path.exists(dir_path):
    os.makedirs(dir_path)
    
for club in club_names:
    base = f'http://api.clubelo.com/{club}'
    resp = requests.get(base)
    if resp.status_code == 200:
        with open(f'{dir_path}/{club}.csv', 'wb') as output_file:
            output_file.write(resp.content)
    else:
        print(f'Request failed for {base}')
    

### Cut elo lists to a selected year

In [None]:
year = 2005 # <= select year here

In [60]:
for filename in os.listdir(dir_path):
    if filename.endswith('.csv'):
        df = pd.read_csv(os.path.join(dir_path, filename))
        # Convert 'from' column to datetime objects
        df['From'] = pd.to_datetime(df['From'])
        df = df[df['From'].dt.year >= year ]
        df.to_csv(f'{dir_path}/{filename}')
