## Web scraping LFC

Poniższy kod pobiera dane z witryny www.transfermarkt.pl dotyczące wyników Liverpoolu na koniec sezonu w latach 63/64 - 19/20*. 

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [2]:
# Pobranie zawartości strony
url = 'https://www.transfermarkt.pl/fc-liverpool/platzierungen/verein/31'
headers = {'User-Agent':
          'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')

In [114]:
# Parsowanie interesującej nas zawartość (tabeli z danymi dot. sezonów)
table_content = soup.find_all(class_=['zentriert', 'rechts'])

data = []
seasons = {}

# Pętla do utworzenia listy 'data' z pobranymi danymi dot. poszczególnych sezonów
for i in range(10,(len(table_content))):
    
    if i % 11 != 0:
        
        if i % 11 == 5:
            goals = table_content[i].text.split(':')
            data.append(goals[0])
            data.append(goals[1])
        elif i % 11 == 7:
            if ':' in table_content[i].text:
                points = table_content[i].text.split(':')
                data.append(points[0])
            else:
                data.append(table_content[i].text)
        else:
            data.append(table_content[i].text)

# Pętla tworząca słownik, który posłuży do zmiany formatu zapisu sezonów
for year in range(1963, 2020):
    
    start = year
    stop = year + 1
    short = str(year)[2:]
    old_format = '{}/{}'.format(str(start)[2:], str(stop)[2:])
    new_format = '{}-{}'.format(start, stop)
    seasons[old_format] = new_format

In [123]:
# Kolumny do DataFrame
columns = ['season', 'league', 'W', 'D', 'L', 'goals_scored', 'goals_conceded', 'goals_diff', 'points', 'place', 'coach']

# Dane do DataFrame
data_dict = {}

# Pętla tworząca klucze słownika i odpowiadające im puste listy
for c in columns:   
    data_dict[c] = []

# Pętla do uzupełnienia zescrapowanymi danymi listy wartości dla każdego klucza słownika 'data_dict'
for j in range(0, len(data)):
    
    if j % 11 == 0:
        new_format = seasons[data[j]]
        data_dict['season'].append(new_format)
    
    elif j % 11 == 1:
        if j < data.index('91/92'):
            data_dict['league'].append('Premier League') 
        else:
            data_dict['league'].append('First Division')

    elif j % 11 == 2:
        data_dict['W'].append(data[j])

    elif j % 11 == 3:
        data_dict['D'].append(data[j])

    elif j % 11 == 4:
        data_dict['L'].append(data[j])

    elif j % 11 == 5:
        data_dict['goals_scored'].append(data[j])

    elif j % 11 == 6:
        data_dict['goals_conceded'].append(data[j])

    elif j % 11 == 7:
        data_dict['goals_diff'].append(data[j])        

    elif j % 11 == 8:
        data_dict['points'].append(data[j])        

    elif j % 11 == 9:
        data_dict['place'].append(data[j])           

    elif j % 11 == 10:
        data_dict['coach'].append(data[j])   



In [125]:
# Utworzenie DataFrame i zapisanie do pliku CSV
df = pd.DataFrame(data_dict)
df.to_csv('LFC_seasons.csv', index=False)