In [3]:
from selenium import webdriver
import requests
from selenium.webdriver.common.keys import Keys
import bs4, time
from datetime import datetime, timedelta
import pandas as pd
import numpy as np

pd.options.display.max_rows = None
pd.options.display.max_columns = None

In [580]:
def getGames(market, data_hoje, games_now=False):
    '''
        Ira pegar os ultimos jogos
        
        games_now para pegar somente os jogos de agora
    '''

    if market == 'futebol_clubes':
        main_url = 'https://www.betfair.com/sport/virtuals-results?sport=SOCCER&day='
    elif market == 'futebol_mundial':
        main_url = 'https://www.betfair.com/sport/virtuals-results?sport=SOCCER_WORLD_CUP&day='

    link =  main_url + data_hoje
        
    if games_now != False:
        req = requests.get(link)
        if not req.status_code == 200:
            return 'Problem with request'
        else:
            htmlPage = bs4.BeautifulSoup(req.content, features='html.parser')
    else:
        driver = webdriver.Chrome(executable_path='./webdriver/linux/chromedriver')
        
        driver.get(link)
        time.sleep(1)
        while total != '475':
            total = driver.find_element_by_class_name('number-of-results').text
            driver.find_element_by_tag_name('html').send_keys(Keys.END)
            time.sleep(1)
            driver.find_element_by_tag_name('html').send_keys(Keys.HOME)
            count +=1
            if count >=20:
                print('\ndia com menos de 475\n')
                next_or_break = input('1 para parar de tentar pegar os dados ou outra tecla para continuar\n:')
                if str(next_or_break) == '1':
                    break        

        htmlPage = bs4.BeautifulSoup(driver.page_source, features='html.parser')
        driver.close()


    total = 0
    count = 0
    
    table_results = htmlPage.find('div',{'class':'result-list-wrapper'})

    data_day = []
    for event in table_results.find_all('div',{'class':'result-title'}):
        content = event.text.split('\n')
        event_data = {'date': '', 'time': '', 'team_home': '', 'score': '', 'team_back': ''}

        event_data['date'] = data_hoje
        event_data['time'] = content[1]
        event_data['team_home'] = content[2]
        event_data['score'] = content[4]
        event_data['team_back'] = content[6]

        data_day.append(event_data)

    data = pd.DataFrame(data_day, columns=list(event_data.keys()))
#     print('saved day: {}'.format(data_hoje))

    return data

In [457]:
def cleanDatetime(data, data_today=True):
    '''
        Pre processa os dados e os deixa prontos para analise
        
        data_today=False para tratar dados que nao sejam de hoje
    '''
    for row in range(len(data)):
        data.loc[row, 'datetime'] = data.loc[row, 'date'] + ' ' + data.loc[row, 'time']
        data.loc[row, 'datetime'] = pd.to_datetime(data.loc[row, 'datetime'], errors='coerce')
        
        if data_today == True and data.loc[row, 'datetime'].hour > datetime.now().hour:
            data.loc[row, 'datetime'] = data.loc[row, 'datetime']-timedelta(days=1)
    
    data.drop(['date', 'time'], axis=1, inplace=True)
    data.sort_values(['datetime'], inplace=True)
    data.reset_index(drop=True, inplace=True)
    return data

In [455]:
def saveDF(data, name=None):
    name_save = name + '_' + data_hoje + '_' + str(len(data)) + '.csv'
    data.to_csv('./dados/' + name_save, index=None)
    return True

In [507]:
def pipeline(data):
    '''
        deixa os dados prontos para treinamento e previsao
    '''
    
    #transforma os valores do resultado de jogo para float
    for row, content in data.T.iteritems():
        game_result = content['score'].replace(' - ', '.')
        data.loc[row, 'float_score'] = float(game_result)
        
    #adiciona em colunas os resultados dos 19 jogos anteriores
    for row, content in data.T.iteritems():
        #salva 1 se o resultado futuro for over e 0 se for under
        numbers = str(content['float_score']).split('.')
        soma = int(numbers[0]) + int(numbers[1])
        if soma >2:
            data.loc[row-1, 'future_result'] = 1
        else:
            data.loc[row-1, 'future_result'] = 0
        
        #adiciona em colunas os resultados dos 20 eventos anteriores
        if row <=20:
            pass
        else:
            for before in range(1,20):
                data.loc[row, str(before)] = data.loc[row-before, 'float_score']
            if row % 1000 == 0:
                print('{} rows processed'.format(row))            
    
    data['predict'] = data['future_result']
    data.drop(['future_result'], axis=1, inplace=True)
    
    data.drop(['team_back', 'team_home', 'score'], axis=1, inplace=True)
    data.reset_index(drop=True, inplace=True)
    
    #retira as ultimas duas linhas que terao dados faltantes
    data = data.drop([data.index[-1]])    
    return data

In [1]:
def includeYesterday(data, save=True):
    '''
        Junta os dados anteriores com os dados de ontem
    '''
    #carrega os dados anteriores
    anteriores_file = './dados/clubes_20_jogos_anteriores.csv'
    anteriores = pd.read_csv(anteriores_file)
    
    
    dia_ontem = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')
    
    #pega os dados de ontem
    data = getGames('futebol_clubes', dia_ontem)
    
    #limpa as datas
    data = cleanDatetime(data, data_today=False)
    
    #ultimo jogo salvo no dataframe principal
    ultimo_dia_salvo = anteriores.loc[len(anteriores)-1, 'datetime']

    #posicao do ultimo jogo salvo no dataframe com os dias a serem incluidos
    position = int(data[data['datetime'] == pd.to_datetime(ultimo_dia_salvo)].index[0])
    
    #salva os 20 jogos anteriores e a coluna com o resultado a ser previsto
    data = pipeline(data)
    
    #ira pegar os dados que estao entre os dados anteriores e ate o ultimo item que estara vazio pois sera criada com o resultado futuro
    data = data.loc[position+1:,:]

    
#     anteriores['predict'] = anteriores['future_result']
    anteriores = anteriores.append(data, sort=False)
    anteriores.reset_index(drop=True, inplace=True)
    
    if save == True:
        saveDF(data, anteriores_file)

In [4]:
data = pd.read_csv('futebol_mundial.csv')

In [509]:
data = getGames('futebol_clubes', datetime.today().strftime('%Y-%m-%d'), games_now=True)

saved day: 2020-04-29


In [510]:
data = cleanDatetime(data)

In [511]:
data = pipeline(data)

In [538]:
data.tail()

Unnamed: 0,datetime,float_score,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,predict
45,2020-04-29 14:20:00,2.2,2.1,0.0,0.3,0.1,1.3,1.1,4.0,1.1,0.2,2.0,2.0,2.1,2.0,1.0,1.0,2.0,1.1,0.0,1.1,1.0
46,2020-04-29 14:23:00,0.3,2.2,2.1,0.0,0.3,0.1,1.3,1.1,4.0,1.1,0.2,2.0,2.0,2.1,2.0,1.0,1.0,2.0,1.1,0.0,0.0
47,2020-04-29 14:26:00,0.0,0.3,2.2,2.1,0.0,0.3,0.1,1.3,1.1,4.0,1.1,0.2,2.0,2.0,2.1,2.0,1.0,1.0,2.0,1.1,0.0
48,2020-04-29 14:29:00,1.0,0.0,0.3,2.2,2.1,0.0,0.3,0.1,1.3,1.1,4.0,1.1,0.2,2.0,2.0,2.1,2.0,1.0,1.0,2.0,0.0
49,2020-04-29 14:32:00,0.0,1.0,0.0,0.3,2.2,2.1,0.0,0.3,0.1,1.3,1.1,4.0,1.1,0.2,2.0,2.0,2.1,2.0,1.0,1.0,


In [539]:
def getLastGames():
    data = getGames('futebol_clubes', datetime.today().strftime('%Y-%m-%d'), games_now=True)
    data = cleanDatetime(data)
    data = pipeline(data)
    return data

# o ultimo valor em future_result esta como nulo porque o modelo preditivo ira nos dizer qual sera o resultado futuro: 1.0 para OVER e 0.0 para UNDER

In [None]:
#treinar novamente o modelo sem dados de teste mas sim com todos os dados
#exportar o modelo ja com os pesos ajustados

#importar aqui no programa
#fazer as previsoes

In [515]:
# %tensorflow_version 2.x
import tensorflow as tf
tf.__version__

'2.1.0'

In [523]:
from keras.models import model_from_json
import keras
# load json and create model
file = open('./neural_network/mymodel.json', 'r')
model_json = file.read()
file.close()

In [525]:
loading_model = tf.keras.models.model_from_json(model_json)

In [527]:
# load weights
loading_model.load_weights('./neural_network/weights.h5')

In [531]:
model = loading_model

In [532]:
data.tail()

Unnamed: 0,datetime,float_score,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,predict
45,2020-04-29 14:20:00,2.2,2.1,0.0,0.3,0.1,1.3,1.1,4.0,1.1,0.2,2.0,2.0,2.1,2.0,1.0,1.0,2.0,1.1,0.0,1.1,1.0
46,2020-04-29 14:23:00,0.3,2.2,2.1,0.0,0.3,0.1,1.3,1.1,4.0,1.1,0.2,2.0,2.0,2.1,2.0,1.0,1.0,2.0,1.1,0.0,0.0
47,2020-04-29 14:26:00,0.0,0.3,2.2,2.1,0.0,0.3,0.1,1.3,1.1,4.0,1.1,0.2,2.0,2.0,2.1,2.0,1.0,1.0,2.0,1.1,0.0
48,2020-04-29 14:29:00,1.0,0.0,0.3,2.2,2.1,0.0,0.3,0.1,1.3,1.1,4.0,1.1,0.2,2.0,2.0,2.1,2.0,1.0,1.0,2.0,0.0
49,2020-04-29 14:32:00,0.0,1.0,0.0,0.3,2.2,2.1,0.0,0.3,0.1,1.3,1.1,4.0,1.1,0.2,2.0,2.0,2.1,2.0,1.0,1.0,


In [None]:
def predictLastGame():
    global data
    data = getLastGames()
    
    last = int(len(data)-1)
    
    next_game = data.loc[last,'datetime'] + timedelta(minutes=3)
    next_game = next_game + timedelta(hours=2)
    next_game = next_game.time().strftime('%H:%M')
    
    a = data.iloc[last,1:-1].to_numpy()

    a = np.array([a])

    X = np.asarray(a).astype(np.float32)

    y_predict = model.predict(X)
    
    if next_game not in predicted.keys():
        if y_predict[0][0] > 0.5:
            message = 'Futebol de clubes às {} será OVER'.format(next_game)
            sendMessage(message)
            print(message)
            predicted[next_game] = y_predict[0][0]
            return True #pois pegou uma nova previsao
        else:
            message = 'Futebol de clubes às {} será UNDER'.format(next_game)
#             sendMessage(message)
            print(message)
            predicted[next_game] = y_predict[0][0]
            return True #pois pegou uma nova previsao
    else:
        return False #pois nao pegou nenhuma nova previsao

In [705]:
# predicted = {}

In [707]:
while True:
    predictLastGame()
    print('requested {}'.format(datetime.now().strftime('%H:%M:%S')))
    time.sleep(40)

requested 19:46:33


KeyboardInterrupt: 

In [708]:
predictLastGame()

Futebol de clubes às 19:47 será UNDER


True