## Get Data

Downloads bets on today's and tomorrow's matches

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime
import os

def is_csv_empty(file_path):
    try:
        df = pd.read_csv(file_path)
        
        if df.empty:
            return True
        else:
            return False
    except pd.errors.EmptyDataError:
        return True
    except Exception as e:
        return True
    
urls = [
    'https://www.efortuna.pl/zaklady-bukmacherskie/pilka-nozna/1-niemcy',
    'https://www.efortuna.pl/zaklady-bukmacherskie/pilka-nozna/1-anglia',
    'https://www.efortuna.pl/zaklady-bukmacherskie/pilka-nozna/1-belgia',
    'https://www.efortuna.pl/zaklady-bukmacherskie/pilka-nozna/1-francja',
    'https://www.efortuna.pl/zaklady-bukmacherskie/pilka-nozna/1-hiszpania',
    'https://www.efortuna.pl/zaklady-bukmacherskie/pilka-nozna/1-holandia',
    'https://www.efortuna.pl/zaklady-bukmacherskie/pilka-nozna/1-portugalia',
    'https://www.efortuna.pl/zaklady-bukmacherskie/pilka-nozna/liga-mistrzow',
    'https://www.efortuna.pl/zaklady-bukmacherskie/pilka-nozna/liga-europy'
]

today = datetime.date.today()
tomorrow = today + datetime.timedelta(days=1)
tomorrow_date_format = tomorrow.strftime('%d.%m.')

data_list_today = []
data_list_tomorrow = []

for url in urls:
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Wyszukiwanie wierszy tabeli
        rows = soup.find_all('tr', class_='')

        for row in rows:
            match_element = row.find('td', class_='col-title')
            if match_element:
                league = url.split('/')[-1]
                match_text = match_element.find('span', class_='market-name')
                if match_text:
                    match = match_text.text.strip()
                    match = match.replace('\n', '')
                    match = match.split('-')
                odds_values = [odds_span.text.strip() for odds_span in row.find_all('span', class_='odds-value')]
                date_element = row.find('span', class_='event-datetime')
                if date_element:
                    date = date_element.text.strip()
                    date = date.replace('\n', '')

                    data_dict = {
                        'Liga': league,
                        'Zespół_1': match[0],
                        'Zespół_2': match[1],
                        '1': odds_values[0],
                        '0': odds_values[1],
                        '2': odds_values[2],
                        '10': odds_values[3],
                        '02': odds_values[4],
                        '12': odds_values[5],
                        'Data': date,
                        'Wygrany': None,
                        'Bramki_1': None,
                        'Bramki_2': None
                    }

                    parsed_date = datetime.datetime.strptime(date, "%d.%m. %H:%M")

                    if parsed_date.strftime('%d.%m.') == tomorrow_date_format:
                        data_list_tomorrow.append(data_dict)
                    elif parsed_date.strftime('%d.%m.') == today.strftime('%d.%m.'):
                        data_list_today.append(data_dict)
    except:
        pass

# Tworzenie i zapisanie danych do plików CSV
if is_csv_empty(f'{today}.csv') and is_csv_empty(f'archive_csv/{today}.csv'):
    df_today = pd.DataFrame(data_list_today)
    df_today.to_csv(f'{today}.csv', index=False)

if is_csv_empty(f'{tomorrow}.csv') and is_csv_empty(f'archive_csv/{tomorrow}.csv'):
    df_tomorrow = pd.DataFrame(data_list_tomorrow)
    df_tomorrow.to_csv(f'{tomorrow}.csv', index=False)


## Combine Data

Combines all csvs into one. CSVs move to archive

In [1]:
import pandas as pd
import os
import datetime

files = []
today = datetime.date.today()

for file in os.listdir():
    if file.endswith('.csv'):
        files.append(file)

combined_data = pd.read_csv(files[0])
for f in files[1:]:
    try:
        data = pd.read_csv(f)
        combined_data = pd.concat([combined_data, data], ignore_index=True)
    except pd.errors.EmptyDataError:
            pass

for file in files:
    os.replace(file, f'archive_csv/{file}')
combined_data.to_csv(f'{today}-merged.csv', index=False)

## Fill Scores

Completes match results in a csv file

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import datetime

today = datetime.date.today()
k = 0

def get_score(match):
    '''
    Match - variable contains: CLUB1 vs CLUB2 DATE
    '''
    url = "https://www.google.co.in/search?q=" + match
    
    headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/118.0'
    }
    try:
        print('Function get_score used.')
        page = requests.get(url, headers=headers)
        soup = BeautifulSoup(page.content, 'html.parser')
        result = soup.find(class_='imso_mh__ma-sc-cont').get_text()
    except:
        print(f'Question: {match} raise ERROR')
        result = 'NaN'
    
    return result

data = pd.read_csv(f'2024-01-26-filled.csv')

for index, row in data.iterrows():
    line = data.iloc[index]
    if pd.isna(line["Wygrany"]):
        k += 1
        time = line['Data'].split(' ')
        question = f'Match result {line["Zespół_1"]} vs {line["Zespół_2"]} {time[0]}2024'
        result = get_score(question)
        
        if result != 'NaN':
            result = result.split('-')
            if result[0] > result[1]:
                data.at[index, 'Wygrany'] = 1
            elif result[0] == result[1]:
                data.at[index, 'Wygrany'] = 0
            else:
                data.at[index, 'Wygrany'] = 2
            data.at[index, 'Bramki_1'] = int(result[0])
            data.at[index, 'Bramki_2'] = int(result[1])
        else:
            pass

data.to_csv(f'{today}-filled.csv', index=False)
print("bez wyniku: ", k)

Function get_score used.
Question: Match result Bayern M. vs Un. Berlin 02.12.2024 raise ERROR
Function get_score used.
Question: Match result Granada vs Bilbao 10.12.2024 raise ERROR
Function get_score used.
Question: Match result Inter M. vs Real Sociedad 12.12.2024 raise ERROR
Function get_score used.
Question: Match result FC Porto vs S.Donieck 13.12.2024 raise ERROR
Function get_score used.
Question: Match result Slavia P. vs Servette 14.12.2024 raise ERROR
Function get_score used.
Question: Match result Sporting L. vs Sturm Graz 14.12.2024 raise ERROR
Function get_score used.
Question: Match result Betis vs Gl.Rangers 14.12.2024 raise ERROR
Function get_score used.
Question: Match result M gladbach vs Werder B. 15.12.2024 raise ERROR
Function get_score used.
Question: Match result Bilbao vs Atl. Madryt 16.12.2024 raise ERROR
Function get_score used.
Question: Match result UD L.Palmas vs Cadiz 17.12.2024 raise ERROR
Function get_score used.
Question: Match result Werder B. vs Leip