In [7]:
import novibet_functions as nv
import stoiximan_function as stm
import pandas as pd
from unidecode import unidecode
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

## Scrapping

In [9]:
# Set options for headless mode
options = webdriver.ChromeOptions()
options.add_argument("--headless")
# Set options for window size
options.add_argument("--window-size=1920,1200")

# Initialize the webdriver
driver = webdriver.Chrome(ChromeDriverManager().install(), options = options)

  driver = webdriver.Chrome(ChromeDriverManager().install(), options = options)


### 1. Novibet data

Novibet acts different with headless mode. It returns also the extra bets (+ XX). 

If I use the open mode I want to refactor the code.

In [None]:
page_url = 'https://www.novibet.gr/en/sports'

# Football
football_string = nv.novibet_football_text(page_url, driver)
nv.novibet_football_export(football_string)

# Basketball
basketball_string = nv.novibet_basketball_text(driver)
nv.novibet_basketball_export(basketball_string)

# Tennis
tennis_string = nv.novibet_tennis_text(driver)
nv.novibet_tennis_export(tennis_string)

### 2. Stoiximan data

In [5]:
# Football
football_url = 'https://en.stoiximan.gr/sport/soccer/'
football_string = stm.stoiximan_football_text(football_url, driver)
stm.stoiximan_football_export(football_string)

# Basketball
basketball_url = 'https://en.stoiximan.gr/sport/basketball/'
basketball_string = stm.stoiximan_basketball_text(basketball_url, driver)
stm.stoiximan_basketball_export(basketball_string)

# Tennis
tennis_url = 'https://en.stoiximan.gr/sport/tennis/'
tennis_string = stm.stoiximan_tennis_text(tennis_url, driver)
stm.stoiximan_tennis_export(tennis_string)

## Calculations



### 1. Football teams process - Merge

Functions:
* Replace Unicode Character
* Remove Single/Double words 

In [33]:
def remove_unicode(df: pd.DataFrame)->pd.DataFrame:
    cleaned_df = df.apply(lambda x: unidecode(x) if isinstance(x, str) else x)
    return cleaned_df

def remove_single_double_words(df: pd.DataFrame)->pd.DataFrame:
    cleaned_df = df.apply(lambda x: ' '.join([word for word in x.split() if len(set(word)) > 2]) if isinstance(x, str) else x)
    return cleaned_df

In [86]:
# Import
football_novibet = pd.read_csv('data/novibet_football.csv')
football_stoiximan = pd.read_csv('data/stoiximan_football.csv')


# Remove unicodes from teams names
football_novibet['Team1'] = remove_unicode(football_novibet['Team1'].astype(str)).str.lower()
football_novibet['Team2'] = remove_unicode(football_novibet['Team2'].astype(str)).str.lower()
football_stoiximan['Team1'] = remove_unicode(football_stoiximan['Team1'].astype(str)).str.lower()
football_stoiximan['Team2'] = remove_unicode(football_stoiximan['Team2'].astype(str)).str.lower()

# Remove single/double words from teams names
football_novibet['Team1'] = remove_single_double_words(football_novibet['Team1'].astype(str)).str.lower()
football_novibet['Team2'] = remove_single_double_words(football_novibet['Team2'].astype(str)).str.lower()
football_stoiximan['Team1'] = remove_single_double_words(football_stoiximan['Team1'].astype(str)).str.lower()
football_stoiximan['Team2'] = remove_single_double_words(football_stoiximan['Team2'].astype(str)).str.lower()

### 1. Football (Over-Under)

In [87]:
# Keep O/U = 2.5 // Novibet
novibet_o_u = football_novibet[football_novibet['Over'] == 'O 2.5'][['Team1', 'Team2', 'O_odd', 'U_odd']]

# Convert Numeric, exlude no bets // Stoiximan
football_stoiximan['O_odd'] = pd.to_numeric(football_stoiximan['O_odd'], errors='coerce')
football_stoiximan['U_odd'] = pd.to_numeric(football_stoiximan['U_odd'], errors='coerce')
stoiximan_o_u = football_stoiximan[football_stoiximan['O_odd'] != 'No bet'][['Team1', 'Team2', 'O_odd', 'U_odd']]

# Merge on Team 1
merged1 = pd.merge(novibet_o_u, stoiximan_o_u, on='Team1', how='inner')
merged1.drop('Team2_y', axis=1, inplace=True)
merged1.rename({'Team2_x': 'Team2'}, axis=1, inplace=True)
# Merge on Team 2
merged2 = pd.merge(novibet_o_u, stoiximan_o_u, on='Team2', how='inner')
merged2.drop('Team1_y', axis=1, inplace=True)
merged2.rename({'Team1_x': 'Team1'}, axis=1, inplace=True)
# Combine the results and drop duplicates
merge_df = pd.concat([merged1, merged2], axis = 0).drop_duplicates()

# Get the Max values
merge_df['O_max'] = merge_df.apply(lambda row: max(row['O_odd_x'], row['O_odd_y']), axis=1)
merge_df['U_max'] = merge_df.apply(lambda row: max(row['U_odd_x'], row['U_odd_x']), axis=1)

In [116]:
merge_df['arb'] = 1/merge_df['O_max'] + 1/merge_df['U_max']
mask = (merge_df['arb'] >= 0) & (merge_df['arb'] <= 1.0)
result_df = merge_df[mask]
result_df

Unnamed: 0,Team1,Team2,O_odd_x,U_odd_x,O_odd_y,U_odd_y,O_max,U_max,arb
99,ucam murcia,atletico pulpileno,2.08,1.67,2.7,1.39,2.7,1.67,0.969173
18,real titanico,sporting gijon,2.05,1.69,2.52,1.53,2.52,1.69,0.988541
156,orihuela,hercules,2.04,1.7,2.67,1.4,2.67,1.7,0.962767


In [64]:
from fuzzywuzzy import fuzz

from fuzzywuzzy import fuzz

matches = []
for index, row in football_novibet.iterrows():
    team1_novibet = row['team1']
    team2_novibet = row['team2']
    team1_stoiximan = football_stoiximan.loc[football_stoiximan['team1'].apply(lambda x: fuzz.token_sort_ratio(x, team1_novibet)) > 80, 'team1'].values
    team2_stoiximan = football_stoiximan.loc[football_stoiximan['team2'].apply(lambda x: fuzz.token_sort_ratio(x, team2_novibet)) > 80, 'team2'].values
    
    if len(team1_stoiximan) > 0 and len(team2_stoiximan) > 0:
        # both team names are similar in both dataframes
        # create a dictionary with the data and append it to the matches list
        match = {
            'team1': team1_novibet,
            'team2': team2_novibet,
            'odds_over_novibet': row['odds_over'],
            'odds_under_novibet': row['odds_under'],
        }
        
        # check if there is a match in football_stoiximan
        matching_row = football_stoiximan[(football_stoiximan['team1'] == team1_stoiximan[0]) & (football_stoiximan['team2'] == team2_stoiximan[0])]
        if len(matching_row) > 0:
            # there is a match, add the odds values to the dictionary
            match['odds_over_stoiximan'] = matching_row['O_odds'].values[0]
            match['odds_under_stoiximan'] = matching_row['U_odds'].values[0]
            
            matches.append(match)
        else:
            # no match, skip this row
            continue

# create a new dataframe with the matched data
merged_df_lev = pd.DataFrame(matches)


In [65]:
len(merged_df_lev)

223

In [66]:
merged_df_lev['max_O'] = merged_df_lev.apply(lambda row: max(row['odds_over_novibet'], row['odds_over_stoiximan']), axis=1)
merged_df_lev['max_U'] = merged_df_lev.apply(lambda row: max(row['odds_under_novibet'], row['odds_under_stoiximan']), axis=1)

merged_df_lev['arb'] = 1/merged_df_lev['max_O'] + 1/merged_df_lev['max_U']

In [67]:
mask = (merged_df_lev['arb'] >= 0.2) & (merged_df_lev['arb'] <= 1.03)
result_df = merged_df_lev[mask]
result_df

Unnamed: 0,team1,team2,odds_over_novibet,odds_under_novibet,odds_over_stoiximan,odds_under_stoiximan,max_O,max_U,arb
174,Oakland Roots SC,Memphis 901 FC,2.06,1.71,1.83,1.88,2.06,1.88,1.017352


In [46]:
merged_df['max_O'] = merged_df.apply(lambda row: max(row['odds_over'], row['O_odds']), axis=1)
merged_df['max_U'] = merged_df.apply(lambda row: max(row['odds_under'], row['U_odds']), axis=1)

merged_df['arb'] = 1/merged_df['max_O'] + 1/merged_df['max_U']


In [50]:
merged_df

Unnamed: 0,team1_x,team2,odds_over,odds_under,Over,team1_y,O_odds,U_odds,max_O,max_U,arb
0,Kazakhstan,Denmark,1.64,2.25,O 2.5,Kazakhstan,1.70,2.07,1.70,2.25,1.032680
1,England,Ukraine,1.71,2.15,O 2.5,England,1.70,2.18,1.71,2.18,1.043511
2,Liechtenstein,Iceland,1.62,2.25,O 2.5,Liechtenstein,1.62,2.18,1.62,2.25,1.061728
3,Northern Ireland,Finland,2.45,1.55,O 2.5,Northern Ireland,2.55,1.52,2.55,1.55,1.037318
4,Slovakia,Bosnia & Herzegovina,2.25,1.64,O 2.5,Slovakia,2.27,1.65,2.27,1.65,1.046589
...,...,...,...,...,...,...,...,...,...,...,...
173,Cerro Porteno,Nacional Asuncion,2.15,1.68,O 2.5,Cerro Porteno,2.12,1.70,2.15,1.70,1.053352
174,CS Esportiva AL,Cruzeiro de Arapiraca,1.94,1.77,O 2.5,Clube Sociedade Esportiva,2.05,1.70,2.05,1.77,1.052777
175,JS Kairouanaise,AS Mhamdia,2.25,1.58,O 2.5,Js Kairouanaise,2.27,1.57,2.27,1.58,1.073440
176,Martinique,Costa Rica,1.70,2.10,O 2.5,Martinique,1.65,2.10,1.70,2.10,1.064426


In [49]:
mask = (merged_df['arb'] >= 0.2) & (merged_df['arb'] <= 1.03)
result_df = merged_df[mask]
result_df

Unnamed: 0,team1_x,team2,odds_over,odds_under,Over,team1_y,O_odds,U_odds,max_O,max_U,arb
138,Oakland Roots SC,Memphis 901 FC,2.06,1.71,O 2.5,Oakland Roots,1.83,1.88,2.06,1.88,1.017352


In [39]:
merged_df['arb']

0      1.052391
1      1.004480
2      0.996692
3      0.091057
4      1.053492
         ...   
286    0.213033
287    0.074390
288    1.058029
289    0.305128
290    0.779221
Name: arb, Length: 291, dtype: float64

In [31]:
merged_df['1_y'].dtype

dtype('O')

In [34]:
football_stoiximan['1'] = football_stoiximan['1'].apply(pd.to_numeric, errors='coerce')
football_stoiximan['X'] = football_stoiximan['1'].apply(pd.to_numeric, errors='coerce')
football_stoiximan['2'] = football_stoiximan['1'].apply(pd.to_numeric, errors='coerce')

In [None]:
bet1 = 2.4
bet2 = 1.59

Arb_percentage = 1/bet1 + 1/bet2
print("Arb_per:  {0:.3f}".format(Arb_percentage))

win_amount = 50

total_outlay = (win_amount/bet1) + (win_amount/bet2)

profit = win_amount - total_outlay
roi = profit/total_outlay


print("Total_outlay:  {0:.2f}".format(total_outlay))
print("Bet on 1st: '  {0:.2f}".format(win_amount/bet1))
print("Bet on 2nd: '  {0:.2f}".format(win_amount/bet2))
print("Profit:        {0:.2f}".format(profit))
print("ROI:           {0:.2f}".format(roi))

## Testing 

In [10]:
football_url = 'https://en.stoiximan.gr/sport/soccer/'
football_string = stm.stoiximan_football_text(football_url, driver)

In [11]:
football_string

'Home Soccer Next 24 Hours Full Coupon\nSoccer - Complete Coupon\nAll\n3 hours\n12 hours\n24 hours\nBy start time\nBy Competition\nSoccer - Matches in the next 24 hours\nMatches\n1\nX\n2\nO/U 2.5\nGG/NG\n  26/03\n20:15\nHIFK U20\nFC Futura U20\n3.30\n4.20\n1.80\n16\n26/03\n21:00\nJacuipense BA\nEC Bahia BA\n4.55\n3.20\n1.80\nO\n2.10\nU\n1.62\nYes\n1.93\nNo\n1.75\n35\n26/03\n21:00\nMaguary PE\nRetro FC Brasil\n5.70\n3.45\n1.60\nO\n2.15\nU\n1.60\nYes\n2.15\nNo\n1.60\n35\n26/03\n21:00\nLeonico U20\nCamacariense U20\n8.50\n5.20\n1.27\n16\n26/03\n21:00\nClube Atletico MG (W)\nReal Brasilia DF (W)\n1.83\n3.65\n3.55\nO\n1.65\nU\n2.10\n20\n26/03\n21:00\nAvai Kindermann SC (W)\nSao Paulo SP (W)\n4.90\n4.00\n1.55\nO\n1.65\nU\n2.10\n20\n26/03\n21:00\nAsante Kotoko\nMedeama SC\n1.52\n3.60\n6.10\nO\n2.32\nU\n1.55\n16\n26/03\n21:00\nCoatepeque\nNueva Concepcion\n1.70\n3.50\n4.50\nO\n1.85\nU\n1.85\n20\n26/03\n21:00\nAl Hamriyah\nHatta\n2.82\n3.40\n2.20\nO\n1.80\nU\n1.91\n20\n26/03\n21:00\nBaynounah S

In [17]:
import re
import csv
stoiximan_football_export(football_string)

In [16]:
def stoiximan_football_export(football_string: str): 
    # Create list from the initial string
    initial_list = football_string.split('\n')
    # Remove first elements of the list not needed
    remove_elements = ['Home Soccer Next 24 Hours Full Coupon', 'Soccer - Complete Coupon',
    'All', '3 hours', '12 hours','24 hours','By start time','By Competition',
    'Soccer - Matches in the next 24 hours','Matches','1','X','2','O/U 2.5','GG/NG','0%','Semifinals','In neutral venue','Behind Closed Doors']
    list_1 = [x for x in initial_list if x not in remove_elements]
    # Remove elements that start with "1st leg"
    football_list = [x for x in list_1 if not x.startswith('1st leg:')]
    # Create sublists based on date (matches)
    match = [x for x in football_list if re.match(r'\d{2}/\d{2}', x)]
    index_match = [i for i,x in enumerate(football_list) if re.match(r'\d{2}/\d{2}', x)]
    sublists_matches = [football_list[i:j] for i, j in zip([0]+index_match, index_match + [len(football_list)])]
    # Exclude the initial empty list from sublist_championships
    sublists_matches = sublists_matches[1:]
    
    # Remove the last element of each sublist (extra bets)
    for sublist in sublists_matches:
        sublist.pop()
    # Add extra elements for the missing bets
    extra_element = 'No_bet'
    for sublist in sublists_matches:
        if len(sublist) < 15:
            sublist.extend([extra_element] * (15 - len(sublist)))

    # Set the filename for the output CSV file
    output_file = "stoiximan_football.csv"
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['date', 'time', 'team1', 'team2', '1', 'X', '2', 'O', 'O_odds', 
                         'U', 'U_odds', 'Yes', 'Yes_odds', 'No', 'No_odds'])
        for row in sublists_matches:
            writer.writerow(row)

In [38]:
football_stoiximan.loc[188]

date              27/03
time              20:00
team1          Freiburg
team2       Saarbrucken
1                   2.6
X                  3.35
2                  2.57
O                     O
O_odds             1.88
U                     U
U_odds             1.85
Yes                 Yes
Yes_odds           1.70
No                   No
No_odds            2.05
Name: 188, dtype: object

In [34]:
football_stoiximan

Unnamed: 0,date,time,team1,team2,1,X,2,O,O_odds,U,U_odds,Yes,Yes_odds,No,No_odds
0,26/03,21:00,Jacuipense BA,EC Bahia BA,4.55,3.20,1.80,O,2.10,U,1.62,Yes,1.93,No,1.75
1,26/03,21:00,Maguary PE,Retro FC Brasil,5.70,3.45,1.60,O,2.15,U,1.60,Yes,2.15,No,1.60
2,26/03,21:00,Leonico U20,Camacariense U20,8.50,5.20,1.27,No_bet,No_bet,No_bet,No_bet,No_bet,No_bet,No_bet,No_bet
3,26/03,21:00,Clube Atletico MG (W),Real Brasilia DF (W),1.83,3.65,3.55,O,1.65,U,2.10,No_bet,No_bet,No_bet,No_bet
4,26/03,21:00,Avai Kindermann SC (W),Sao Paulo SP (W),4.90,4.00,1.55,O,1.65,U,2.10,No_bet,No_bet,No_bet,No_bet
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,27/03,19:30,FC Martigues,Le Mans UC 72,2.40,2.95,3.10,O,2.12,U,1.65,Yes,1.82,No,1.90
188,27/03,20:00,SC Freiburg II,1. FC Saarbrucken,2.60,3.35,2.57,O,1.88,U,1.85,Yes,1.70,No,2.05
189,27/03,20:00,Berliner AK 07,BSG Chemie Leipzig,2.42,3.30,2.75,O,1.78,U,1.93,Yes,1.62,No,2.15
190,27/03,20:00,Belgium U21,Japan U22,2.05,3.35,3.30,O,1.85,U,1.82,Yes,1.70,No,1.98
