# Pegando próximas corridas em função do tempo definido

In [1]:
import cloudscraper
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
from dateutil import tz
import re
import undetected_chromedriver as uc
import time
from selenium.webdriver.remote.webdriver import By
import psycopg2


url_base = "https://www.oddschecker.com/greyhounds"
pattern = r"/greyhounds/[a-zA-Z-]+/\d{2}:\d{2}/winner"
top_2_finish = 'top-2-finish'
top_3_finish = 'top-3-finish'

data = []

scraper = cloudscraper.create_scraper()
# print(scraper.get(url_base).content)

request = scraper.get(url_base)
request = request.text
request

soup = BeautifulSoup(request, "html.parser")
# print(soup.prettify())

def establish_connection():
    conn = psycopg2.connect(
        host="localhost",
        port="5432",
        database="galgos",
        user="postgres",
        password="postgres"
    )
    return conn

def race_list(soup):
    # Verificar se a tag 'html' contém o atributo 'ng-app="ocAngularApp"'
    if soup.html.has_attr('ng-app') and soup.html['ng-app'] == 'ocAngularApp':
        print("O atributo ng-app='ocAngularApp' existe.")

        races = soup.find_all('li', class_='group accordian-parent beta-body', attrs={'data-day': True})

        for race in races:
            races = race.find_all('a')
            for link in races:
                link = link.get('href')
                if re.match(pattern, link):
                    link = "https://www.oddschecker.com" + link
                    data.append(link)

    else:
        print("O atributo ng-app='ocAngularApp' não existe.\n")

        def find_race_meets_container(tag):
            return tag.name == 'div' and tag.has_attr('class') and 'race-meets-container' in tag['class']

        uk_container = soup.find(find_race_meets_container)

        races = uk_container.find_all('div', class_='race-details')

        for race in races:
            races = race.find_all('a')
            for link in races:
                    link = link.get('href')
                    if re.match(pattern, link):
                            link = "https://www.oddschecker.com" + link
                            data.append(link)

    df = pd.DataFrame(data, columns=['link'])
    df['lugar'] = df['link'].apply(lambda link: re.search(r'/greyhounds/([^/]+)/\d{2}:\d{2}/', link).group(1))
    df['quando'] = df['link'].apply(lambda link: re.search(r'/\d{2}:\d{2}/', link).group().strip('/'))
    df['quando'] = pd.to_datetime(df['quando'])
    df['mercado'] = df['link'].apply(lambda link: re.search(r'/winner', link).group().strip('/'))
    race_list = df.sort_values('quando')
    
    return race_list

def get_nextrace(df):
    #  o fuso horário de Londres
    time_zone_uk = tz.gettz('Europe/London')
    
    # hora atual no fuso horário de Londres
    current_time = datetime.now(time_zone_uk)
    current_time = current_time.replace(tzinfo=None)
    next_race = df[df['quando']> current_time].head(1)
    next_race = next_race.iloc[0]['link']
    print(next_race)
    
    return next_race

def get_upcoming_races(df, minutes):
    # Fuso horário de Londres
    time_zone_uk = tz.gettz('Europe/London')

    # # Hora atual no fuso horário de Londres
    current_time = datetime.now(tz=time_zone_uk)
    current_time = current_time.replace(tzinfo=None)

    # # Calcula o tempo limite (x minutos a partir da hora atual)
    time_limit = current_time + timedelta(minutes=minutes)

    # Filtra as corridas cujo horário esteja dentro do intervalo
    upcoming_races = df[(df['quando'] > current_time) & (df['quando'] <= time_limit)]
    upcoming_races_links = upcoming_races['link'].tolist()

    print(upcoming_races_links)

    return upcoming_races_links

def get_data_races(race):    
    # Extraindo dados da URL
    url_parts = race.split("/")

    onde = url_parts[4]
    quando = url_parts[5]
    mercado = url_parts[6]
    date = datetime.now().date()
    quando = f"{date} {quando}"
    

    try:
        request = scraper.get(race)
        request = request.text


        soup = BeautifulSoup(request, "html.parser")

        if soup.html.has_attr('ng-app') and soup.html['ng-app'] == 'ocAngularApp':
            print('pega por lista')
            # SITUAÇÃO 2
            # Aqui não consigo pegar as odds em decimal
            driver = uc.Chrome()
            driver.get(race)

            # click on popup
            time.sleep(5)
            driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div/button[1]').click()

            # Obter o HTML da página
            html_content = driver.page_source

            # driver.quit()

            # Criar o objeto BeautifulSoup para analisar o HTML
            soup = BeautifulSoup(html_content, "html.parser")

            # # consulte o html
            with open('race_list_list.html', 'w') as file:
                file.write(soup.prettify())
            
            # Encontrar o tbody desejado pelo id
            tbody = soup.find("tbody", id="t1")

            data = []

            # Iterar sobre todos os elementos tr dentro do tbody
            for tr in tbody.find_all("tr", class_="diff-row evTabRow bc"):
                # Realizar alguma ação com cada elemento tr, por exemplo, extrair o texto
                trap = tr.find('td', class_="trap-cell").text
                galgo = tr.find('td', class_="sel nm basket-active").text
                odds = tr.find('td', class_="bc")
                odd_frac = odds['data-o']
                odd_dec = odds['data-odig']
                data_fodds = odds['data-fodds']

                data.append([trap,galgo, odd_dec, onde, quando, mercado])
                
                conn = establish_connection()
                cur = conn.cursor()
                query = "INSERT INTO odds (odd, nome_pista, quando, trap, nome_galgo, mercado) VALUES (%s, %s, %s, %s, %s, %s) ON CONFLICT (nome_pista, quando, trap, nome_galgo, mercado) DO UPDATE SET odd = excluded.odd"
                data_sql = (odd_dec, onde, quando, trap, galgo, mercado)
                cur.execute(query, data_sql)
                conn.commit()
                cur.close()
                conn.close()
                
            driver.quit()
        else:
            print('pega por div')
            # SITUAÇÃO 1

            dog_list = soup.find_all('tr', class_='diff-row evTabRow bc')

            data = []

            for dog in dog_list:
                trap = dog.find('span', class_='trap').text
                galgo = dog.find('a', class_="popup selTxt").text
                odds = dog.find('td', class_="bc")
                odd_frac = odds['data-o']
                odd_dec = odds['data-odig']
                data_fodds = odds['data-fodds']

                data.append([trap, galgo, odd_dec, odd_frac, data_fodds, onde, quando, mercado])
                
                conn = establish_connection()
                cur = conn.cursor()
                # query = "INSERT INTO odds (odd, nome_pista, quando, trap, nome_galgo, mercado) VALUES (%s, %s, %s, %s, %s, %s) ON CONFLICT (nome_pista, quando, trap, nome_galgo, mercado) DO UPDATE SET odd = excluded.odd"
                query = """
                            INSERT INTO odds_teste (odd, nome_pista, quando, trap, nome_galgo, mercado)
                            VALUES (%s, %s, %s, %s, %s, %s)
                            ON CONFLICT (nome_pista, quando, trap, nome_galgo, mercado)
                            DO UPDATE SET odd = EXCLUDED.odd;
                        """
                data_sql = (odd_dec, onde, quando, trap, galgo, mercado)
                cur.execute(query, data_sql)
                conn.commit()
                cur.close()
                conn.close()


            # print(data)
        try:
                
            df = pd.DataFrame(data, columns=['trap', 'galgo', 'odd_dec', 'odd_frac', 'data_fodds', 'onde', 'quando', 'mercado'])
        except:
            df = pd.DataFrame(data, columns=['trap', 'galgo', 'odd_dec', 'onde', 'quando', 'mercado'])
            
    except scraper.simpleException as e:
        print(e)
        
    print(df)
    return df

race_list = race_list(soup)
print('Total de corridas hoje:',race_list.shape[0])

O atributo ng-app='ocAngularApp' não existe.

Total de corridas hoje: 148


  df['quando'] = pd.to_datetime(df['quando'])


In [2]:
while True:
    minutes = 60
    next_races = get_upcoming_races(race_list, minutes)

    print(f'Existem {len(next_races)} corridas nos próximos {minutes} minutos')

    count = 0
    for race in next_races:
        print(race)
        top_2_finish_nxr = race.replace('winner', top_2_finish)
        top_3_finish_nxr = race.replace('winner', top_3_finish)
        print(top_2_finish_nxr)
        print(top_3_finish_nxr)

        get_data_races(race)
        top_2_finish_nxr = get_data_races(top_2_finish_nxr)
        top_3_finish_nxr = get_data_races(top_3_finish_nxr)
        count += 1
        print("Número de iterações:", count)
        
    time.sleep(60)

['https://www.oddschecker.com/greyhounds/doncaster/20:17/winner', 'https://www.oddschecker.com/greyhounds/nottingham/20:21/winner', 'https://www.oddschecker.com/greyhounds/oxford/20:23/winner', 'https://www.oddschecker.com/greyhounds/youghal/20:26/winner', 'https://www.oddschecker.com/greyhounds/yarmouth/20:27/winner', 'https://www.oddschecker.com/greyhounds/nottingham/20:36/winner', 'https://www.oddschecker.com/greyhounds/doncaster/20:38/winner', 'https://www.oddschecker.com/greyhounds/oxford/20:43/winner', 'https://www.oddschecker.com/greyhounds/yarmouth/20:46/winner', 'https://www.oddschecker.com/greyhounds/nottingham/20:52/winner', 'https://www.oddschecker.com/greyhounds/doncaster/20:54/winner', 'https://www.oddschecker.com/greyhounds/yarmouth/21:01/winner', 'https://www.oddschecker.com/greyhounds/oxford/21:02/winner', 'https://www.oddschecker.com/greyhounds/nottingham/21:11/winner', 'https://www.oddschecker.com/greyhounds/doncaster/21:12/winner']
Existem 15 corridas nos próximos 6

TypeError: catching classes that do not inherit from BaseException is not allowed

In [None]:
import threading

while True:
    # Cria threads para extrair dados das páginas
    threads = []
    for url in next_races:
        thread = threading.Thread(target=get_data_races, args=(url,))
        thread.start()
        threads.append(thread)
    
    # Aguarda todas as threads terminarem
    for thread in threads:
        thread.join()

In [None]:
next_race = get_data_races('https://www.oddschecker.com/greyhounds/kinsley/17:14/winner')

In [None]:
minutes=60

try:
    next_races = get_upcoming_races(race_list,minutes)
    xxx = get_data_races(next_race)
    
    # top_2_finish_nxr = next_race.replace('winner', top_2_finish)
    # top_3_finish_nxr = next_race.replace('winner', top_3_finish)

    # top_2_finish_nxr = get_data_races(top_2_finish_nxr)
    # top_3_finish_nxr = get_data_races(top_3_finish_nxr)
except:
    print('Erro ao tentar capturar a próxima corrida')
    print('Provavelmente elas terminaram...')

In [None]:
xxx

In [None]:
top_2_finish_nxr

In [None]:
# # APENAS TESTE NO BRASIL
# CASO ESTEJA VENDO UM DIA ANTERIOR AO DAS CORRIDAS
# EM PRODUÇÃO ESSE DF DEVE SER CONSTRUIDO NA VIRADA DE UM DIA PARA OUTRO

# Obtendo a data atual
# current_date = datetime.now().date()

# Adicionando um dia à data atual para indicar que é amanhã
# target_date = current_date + timedelta(days=1)

# Convertendo a coluna 'Hora' para o formato datetime, especificando a data como amanhã
# df['quando'] = pd.to_datetime(df['quando'], format='%Y-%m-%d %H:%M') + pd.DateOffset(days=1)
# df

# Pega a próxima corrida no fuso horário londrino
