In [93]:
import novibet_functions as nv
import stoiximan_function as stm
import queries as sq
import pandas as pd
import duckdb
from unidecode import unidecode
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

## Scrapping

In [2]:
# Set options for headless mode
options = webdriver.ChromeOptions()
options.add_argument("--headless")
# Set options for window size
options.add_argument("--window-size=1920,1200")

# Initialize the webdriver
driver = webdriver.Chrome(ChromeDriverManager().install(), options = options)

[WDM] - Downloading: 100%|██████████| 6.79M/6.79M [00:00<00:00, 7.99MB/s]
  driver = webdriver.Chrome(ChromeDriverManager().install(), options = options)


### 1. Novibet data

Novibet acts different with headless mode. It returns also the extra bets (+ XX). 

If I use the open mode I want to refactor the code.

In [3]:
page_url = 'https://www.novibet.gr/en/sports'

# Football
football_string = nv.novibet_football_text(page_url, driver)
nv.novibet_football_export(football_string)

# Basketball
basketball_string = nv.novibet_basketball_text(driver)
nv.novibet_basketball_export(basketball_string)

# Tennis
tennis_string = nv.novibet_tennis_text(driver)
nv.novibet_tennis_export(tennis_string)

### 2. Stoiximan data

In [4]:
# Football
football_url = 'https://en.stoiximan.gr/sport/soccer/'
football_string = stm.stoiximan_football_text(football_url, driver)
stm.stoiximan_football_export(football_string)

# Basketball
basketball_url = 'https://en.stoiximan.gr/sport/basketball/'
basketball_string = stm.stoiximan_basketball_text(basketball_url, driver)
stm.stoiximan_basketball_export(basketball_string)

# Tennis
tennis_url = 'https://en.stoiximan.gr/sport/tennis/'
tennis_string = stm.stoiximan_tennis_text(tennis_url, driver)
stm.stoiximan_tennis_export(tennis_string)

## Calculations



### 1. Football teams process

Functions:
* Replace Unicode Character
* Remove Single/Double words 

In [19]:
def remove_unicode(df: pd.DataFrame)->pd.DataFrame:
    cleaned_df = df.apply(lambda x: unidecode(x) if isinstance(x, str) else x)
    return cleaned_df

def remove_single_double_words(df: pd.DataFrame)->pd.DataFrame:
    cleaned_df = df.apply(lambda x: ' '.join([word for word in x.split() if len(set(word)) > 2]) if isinstance(x, str) else x)
    return cleaned_df

def dbrun(dbcon: duckdb.DuckDBPyConnection, query: str, df1: pd.DataFrame, df2: pd.DataFrame = None) -> pd.DataFrame:
    """Run the query with duckdb"""
    # Register the 1st DataFrame as a DuckDB table
    dbcon.register('table1', df1)
    # Register the second dataframe as a DuckDB table (if provided)
    if df2 is not None:
        dbcon.register('table2', df2)
    # Execute the query on the DuckDB table
    result = dbcon.query(query).to_df()
    return result

In [81]:
# Import
football_novibet = pd.read_csv('data/novibet_football.csv')
football_stoiximan = pd.read_csv('data/stoiximan_football.csv')


# Remove unicodes from teams names
football_novibet['Team1'] = remove_unicode(football_novibet['Team1'].astype(str)).str.lower()
football_novibet['Team2'] = remove_unicode(football_novibet['Team2'].astype(str)).str.lower()
football_stoiximan['Team1'] = remove_unicode(football_stoiximan['Team1'].astype(str)).str.lower()
football_stoiximan['Team2'] = remove_unicode(football_stoiximan['Team2'].astype(str)).str.lower()

# Remove single/double words from teams names
football_novibet['Team1'] = remove_single_double_words(football_novibet['Team1'].astype(str)).str.lower()
football_novibet['Team2'] = remove_single_double_words(football_novibet['Team2'].astype(str)).str.lower()
football_stoiximan['Team1'] = remove_single_double_words(football_stoiximan['Team1'].astype(str)).str.lower()
football_stoiximan['Team2'] = remove_single_double_words(football_stoiximan['Team2'].astype(str)).str.lower()

# DuckDB connection
dbcon = duckdb.connect()

#### 1.1 Football (Over-Under)

In [87]:
# Over/Under Arbitrage
df_over_under = dbrun(dbcon, sq.query_over_under, football_novibet, football_stoiximan)
df_over_under

Unnamed: 0,Team1,Team2,O_novibet,U_novibet,O_stoiximan,U_stoiximan,O_max,U_max,arb,Team1_novibet,Team2_novibet,Team1_stoiximan,Team2_stoiximan
0,richards bay,orlando pirates,2.45,1.52,1.88,1.82,2.45,1.82,0.957614,richards bay,orlando pirates,,orlando pirates
1,lechia gdansk,slask wroclaw,2.04,1.76,1.62,2.12,2.04,2.12,0.961894,lechia gdansk,slask wroclaw,,slask wroclaw
2,chrobry glogow,miedz legnica,1.65,2.1,2.05,1.72,2.05,2.1,0.963995,chrobry glogow,miedz legnica,,miedz legnica
3,korona kielce,miedz legnica,2.05,1.75,1.65,2.1,2.05,2.1,0.963995,korona kielce,miedz legnica,,miedz legnica


#### 1.2 Football (GG-NG)

In [97]:
# GG/NG Arbitrage
df_gg_ng = dbrun(dbcon, sq.query_gg_ng, football_novibet, football_stoiximan)
df_gg_ng

ImportError: cannot import name 'query_gg_ng' from 'queries' (e:\panag\Desktop\Github repos\arbitage_betting\queries.py)

In [88]:
football_novibet

Unnamed: 0,Championship,Team1,Team2,Time,1,1_odd,X,X_odd,2,2_odd,Over,O_odd,Under,U_odd,GG,GG_odd,NG,NG_odd
0,Greece - Super League 1,levadiakos,atromitos,17:00,1,2.35,X,3.10,2,3.30,O 1.5,1.53,U 1.5,2.50,GG,2.15,NG,1.68
1,Greece - Super League 1,panetolikos,lamia,17:30,1,2.40,X,3.15,2,3.20,O 2.5,2.50,U 2.5,1.53,GG,2.10,NG,1.71
2,Greece - Super League 1,pas giannina,ofi,19:30,1,2.45,X,3.05,2,3.20,O 2.5,2.45,U 2.5,1.55,GG,2.03,NG,1.76
3,Greece - Super League 1,ionikos,asteras tripolis,21:00,1,1.89,X,3.35,2,4.55,O 1.5,1.53,U 1.5,2.50,GG,2.30,NG,1.60
4,England - Premier League,manchester city,liverpool,14:30,1,1.66,X,4.40,2,5.70,O 2.5,1.63,U 2.5,2.30,GG,1.67,NG,2.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1053,Tunisia - Ligue 2,omrane,sfaxien,15:30,1,1.74,X,3.15,2,4.60,O 1.5,1.56,U 1.5,2.25,GG,2.50,NG,1.48
1054,Tunisia - Ligue 2,kalaa sport,stade gabesien,15:30,1,1.95,X,2.95,2,3.90,O 1.5,1.62,U 1.5,2.15,GG,2.45,NG,1.49
1055,Tunisia - Ligue 2,moknine,djerba,15:30,1,1.93,X,2.95,2,3.95,O 1.5,1.59,U 1.5,2.20,GG,2.40,NG,1.52
1056,Cyprus - 3rd Division,aspis pylas,ethnikos assias,16:00,1,1.44,X,4.20,2,5.60,O 2.5,1.82,U 2.5,1.89,GG,2.01,NG,1.72


In [89]:
football_stoiximan

Unnamed: 0,Date,Time,Team1,Team2,1_odd,X_odd,2_odd,O,O_odd,U,U_odd,GG,GG_odd,NG,NG_odd
0,01-Apr,10:15,melbourne serbia,casey comets,2.15,3.35,3.05,O,1.65,U,2.05,Yes,1.55,No,2.27
1,01-Apr,10:30,sydney olympic,bulls academy,1.60,4.25,4.35,O,1.45,U,2.5,Yes,1.52,No,2.32
2,01-Apr,10:30,sorrento,perth redstar,2.95,3.60,2.12,O,1.47,U,2.42,Yes,1.44,No,2.55
3,01-Apr,10:30,incheon united,daegu,2.22,3.35,3.15,O,2,U,1.75,Yes,1.78,No,1.93
4,01-Apr,10:30,gwangju,suwon city,1.87,3.35,4.20,O,2.07,U,1.7,Yes,1.91,No,1.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
889,01-Apr,21:00,olympique lyonnais,louhans-cuiseaux,2.50,3.25,2.62,O,1.83,U,1.85,Yes,1.65,No,2.07
890,01-Apr,21:45,oostende,standard liege,3.05,3.65,2.22,O,1.7,U,2.18,Yes,1.55,No,2.35
891,01-Apr,22:00,envigado,once caldas,2.42,3.10,3.20,O,2.5,U,1.55,Yes,2.05,No,1.72
892,02-Apr,00:10,independiente medellin,deportes tolima,2.37,3.10,3.30,O,2.55,U,1.52,Yes,2.07,No,1.7


In [None]:
# I want a way to check the results beacause I get duplicates 

# Check if team1 ,team2 have O_odd_x in novibet and O_odd_y in stoiximan

In [64]:
from fuzzywuzzy import fuzz

from fuzzywuzzy import fuzz

matches = []
for index, row in football_novibet.iterrows():
    team1_novibet = row['team1']
    team2_novibet = row['team2']
    team1_stoiximan = football_stoiximan.loc[football_stoiximan['team1'].apply(lambda x: fuzz.token_sort_ratio(x, team1_novibet)) > 80, 'team1'].values
    team2_stoiximan = football_stoiximan.loc[football_stoiximan['team2'].apply(lambda x: fuzz.token_sort_ratio(x, team2_novibet)) > 80, 'team2'].values
    
    if len(team1_stoiximan) > 0 and len(team2_stoiximan) > 0:
        # both team names are similar in both dataframes
        # create a dictionary with the data and append it to the matches list
        match = {
            'team1': team1_novibet,
            'team2': team2_novibet,
            'odds_over_novibet': row['odds_over'],
            'odds_under_novibet': row['odds_under'],
        }
        
        # check if there is a match in football_stoiximan
        matching_row = football_stoiximan[(football_stoiximan['team1'] == team1_stoiximan[0]) & (football_stoiximan['team2'] == team2_stoiximan[0])]
        if len(matching_row) > 0:
            # there is a match, add the odds values to the dictionary
            match['odds_over_stoiximan'] = matching_row['O_odds'].values[0]
            match['odds_under_stoiximan'] = matching_row['U_odds'].values[0]
            
            matches.append(match)
        else:
            # no match, skip this row
            continue

# create a new dataframe with the matched data
merged_df_lev = pd.DataFrame(matches)


In [None]:
bet1 = 2.4
bet2 = 1.59

Arb_percentage = 1/bet1 + 1/bet2
print("Arb_per:  {0:.3f}".format(Arb_percentage))

win_amount = 50

total_outlay = (win_amount/bet1) + (win_amount/bet2)

profit = win_amount - total_outlay
roi = profit/total_outlay


print("Total_outlay:  {0:.2f}".format(total_outlay))
print("Bet on 1st: '  {0:.2f}".format(win_amount/bet1))
print("Bet on 2nd: '  {0:.2f}".format(win_amount/bet2))
print("Profit:        {0:.2f}".format(profit))
print("ROI:           {0:.2f}".format(roi))

## Testing 

In [10]:
football_url = 'https://en.stoiximan.gr/sport/soccer/'
football_string = stm.stoiximan_football_text(football_url, driver)

In [11]:
football_string

'Home Soccer Next 24 Hours Full Coupon\nSoccer - Complete Coupon\nAll\n3 hours\n12 hours\n24 hours\nBy start time\nBy Competition\nSoccer - Matches in the next 24 hours\nMatches\n1\nX\n2\nO/U 2.5\nGG/NG\n  26/03\n20:15\nHIFK U20\nFC Futura U20\n3.30\n4.20\n1.80\n16\n26/03\n21:00\nJacuipense BA\nEC Bahia BA\n4.55\n3.20\n1.80\nO\n2.10\nU\n1.62\nYes\n1.93\nNo\n1.75\n35\n26/03\n21:00\nMaguary PE\nRetro FC Brasil\n5.70\n3.45\n1.60\nO\n2.15\nU\n1.60\nYes\n2.15\nNo\n1.60\n35\n26/03\n21:00\nLeonico U20\nCamacariense U20\n8.50\n5.20\n1.27\n16\n26/03\n21:00\nClube Atletico MG (W)\nReal Brasilia DF (W)\n1.83\n3.65\n3.55\nO\n1.65\nU\n2.10\n20\n26/03\n21:00\nAvai Kindermann SC (W)\nSao Paulo SP (W)\n4.90\n4.00\n1.55\nO\n1.65\nU\n2.10\n20\n26/03\n21:00\nAsante Kotoko\nMedeama SC\n1.52\n3.60\n6.10\nO\n2.32\nU\n1.55\n16\n26/03\n21:00\nCoatepeque\nNueva Concepcion\n1.70\n3.50\n4.50\nO\n1.85\nU\n1.85\n20\n26/03\n21:00\nAl Hamriyah\nHatta\n2.82\n3.40\n2.20\nO\n1.80\nU\n1.91\n20\n26/03\n21:00\nBaynounah S

In [17]:
import re
import csv
stoiximan_football_export(football_string)

In [16]:
def stoiximan_football_export(football_string: str): 
    # Create list from the initial string
    initial_list = football_string.split('\n')
    # Remove first elements of the list not needed
    remove_elements = ['Home Soccer Next 24 Hours Full Coupon', 'Soccer - Complete Coupon',
    'All', '3 hours', '12 hours','24 hours','By start time','By Competition',
    'Soccer - Matches in the next 24 hours','Matches','1','X','2','O/U 2.5','GG/NG','0%','Semifinals','In neutral venue','Behind Closed Doors']
    list_1 = [x for x in initial_list if x not in remove_elements]
    # Remove elements that start with "1st leg"
    football_list = [x for x in list_1 if not x.startswith('1st leg:')]
    # Create sublists based on date (matches)
    match = [x for x in football_list if re.match(r'\d{2}/\d{2}', x)]
    index_match = [i for i,x in enumerate(football_list) if re.match(r'\d{2}/\d{2}', x)]
    sublists_matches = [football_list[i:j] for i, j in zip([0]+index_match, index_match + [len(football_list)])]
    # Exclude the initial empty list from sublist_championships
    sublists_matches = sublists_matches[1:]
    
    # Remove the last element of each sublist (extra bets)
    for sublist in sublists_matches:
        sublist.pop()
    # Add extra elements for the missing bets
    extra_element = 'No_bet'
    for sublist in sublists_matches:
        if len(sublist) < 15:
            sublist.extend([extra_element] * (15 - len(sublist)))

    # Set the filename for the output CSV file
    output_file = "stoiximan_football.csv"
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['date', 'time', 'team1', 'team2', '1', 'X', '2', 'O', 'O_odds', 
                         'U', 'U_odds', 'Yes', 'Yes_odds', 'No', 'No_odds'])
        for row in sublists_matches:
            writer.writerow(row)

In [38]:
football_stoiximan.loc[188]

date              27/03
time              20:00
team1          Freiburg
team2       Saarbrucken
1                   2.6
X                  3.35
2                  2.57
O                     O
O_odds             1.88
U                     U
U_odds             1.85
Yes                 Yes
Yes_odds           1.70
No                   No
No_odds            2.05
Name: 188, dtype: object

In [34]:
football_stoiximan

Unnamed: 0,date,time,team1,team2,1,X,2,O,O_odds,U,U_odds,Yes,Yes_odds,No,No_odds
0,26/03,21:00,Jacuipense BA,EC Bahia BA,4.55,3.20,1.80,O,2.10,U,1.62,Yes,1.93,No,1.75
1,26/03,21:00,Maguary PE,Retro FC Brasil,5.70,3.45,1.60,O,2.15,U,1.60,Yes,2.15,No,1.60
2,26/03,21:00,Leonico U20,Camacariense U20,8.50,5.20,1.27,No_bet,No_bet,No_bet,No_bet,No_bet,No_bet,No_bet,No_bet
3,26/03,21:00,Clube Atletico MG (W),Real Brasilia DF (W),1.83,3.65,3.55,O,1.65,U,2.10,No_bet,No_bet,No_bet,No_bet
4,26/03,21:00,Avai Kindermann SC (W),Sao Paulo SP (W),4.90,4.00,1.55,O,1.65,U,2.10,No_bet,No_bet,No_bet,No_bet
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,27/03,19:30,FC Martigues,Le Mans UC 72,2.40,2.95,3.10,O,2.12,U,1.65,Yes,1.82,No,1.90
188,27/03,20:00,SC Freiburg II,1. FC Saarbrucken,2.60,3.35,2.57,O,1.88,U,1.85,Yes,1.70,No,2.05
189,27/03,20:00,Berliner AK 07,BSG Chemie Leipzig,2.42,3.30,2.75,O,1.78,U,1.93,Yes,1.62,No,2.15
190,27/03,20:00,Belgium U21,Japan U22,2.05,3.35,3.30,O,1.85,U,1.82,Yes,1.70,No,1.98
