In [1]:
import novibet_functions as nv
import stoiximan_function as stm
import queries as sq
import pandas as pd
import duckdb
from unidecode import unidecode
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

## Scrapping

In [2]:
# Set options for headless mode
options = webdriver.ChromeOptions()
options.add_argument("--headless")
# Set options for window size
options.add_argument("--window-size=1920,1200")

# Initialize the webdriver
driver = webdriver.Chrome(ChromeDriverManager().install(), options = options)

[WDM] - Downloading: 100%|██████████| 6.79M/6.79M [00:00<00:00, 7.99MB/s]
  driver = webdriver.Chrome(ChromeDriverManager().install(), options = options)


### 1. Novibet data

Novibet acts different with headless mode. It returns also the extra bets (+ XX). 

If I use the open mode I want to refactor the code.

In [3]:
page_url = 'https://www.novibet.gr/en/sports'

# Football
football_string = nv.novibet_football_text(page_url, driver)
nv.novibet_football_export(football_string)

# Basketball
basketball_string = nv.novibet_basketball_text(driver)
nv.novibet_basketball_export(basketball_string)

# Tennis
tennis_string = nv.novibet_tennis_text(driver)
nv.novibet_tennis_export(tennis_string)

### 2. Stoiximan data

In [4]:
# Football
football_url = 'https://en.stoiximan.gr/sport/soccer/'
football_string = stm.stoiximan_football_text(football_url, driver)
stm.stoiximan_football_export(football_string)

# Basketball
basketball_url = 'https://en.stoiximan.gr/sport/basketball/'
basketball_string = stm.stoiximan_basketball_text(basketball_url, driver)
stm.stoiximan_basketball_export(basketball_string)

# Tennis
tennis_url = 'https://en.stoiximan.gr/sport/tennis/'
tennis_string = stm.stoiximan_tennis_text(tennis_url, driver)
stm.stoiximan_tennis_export(tennis_string)

## Calculations

Functions:
* Replace Unicode Character
* Remove Single/Double words 
* Query a dataframe 

In [None]:
def remove_unicode(df: pd.DataFrame)->pd.DataFrame:
    cleaned_df = df.apply(lambda x: unidecode(x) if isinstance(x, str) else x)
    return cleaned_df

def remove_single_double_words(df: pd.DataFrame)->pd.DataFrame:
    cleaned_df = df.apply(lambda x: ' '.join([word for word in x.split() if len(set(word)) > 2]) if isinstance(x, str) else x)
    return cleaned_df

def dbrun(dbcon: duckdb.DuckDBPyConnection, query: str, df1: pd.DataFrame, df2: pd.DataFrame = None) -> pd.DataFrame:
    """Run the query with duckdb"""
    # Register the 1st DataFrame as a DuckDB table
    dbcon.register('table1', df1)
    # Register the second dataframe as a DuckDB table (if provided)
    if df2 is not None:
        dbcon.register('table2', df2)
    # Execute the query on the DuckDB table
    result = dbcon.query(query).to_df().drop_duplicates()
    return result

### 1. Football teams process

In [47]:
# Import
football_novibet = pd.read_csv('data/novibet_football.csv')
football_stoiximan = pd.read_csv('data/stoiximan_football.csv')


# Remove unicodes from teams names
football_novibet['Team1'] = remove_unicode(football_novibet['Team1'].astype(str)).str.lower()
football_novibet['Team2'] = remove_unicode(football_novibet['Team2'].astype(str)).str.lower()
football_stoiximan['Team1'] = remove_unicode(football_stoiximan['Team1'].astype(str)).str.lower()
football_stoiximan['Team2'] = remove_unicode(football_stoiximan['Team2'].astype(str)).str.lower()

# Remove single/double words from teams names
football_novibet['Team1'] = remove_single_double_words(football_novibet['Team1'].astype(str)).str.lower()
football_novibet['Team2'] = remove_single_double_words(football_novibet['Team2'].astype(str)).str.lower()
football_stoiximan['Team1'] = remove_single_double_words(football_stoiximan['Team1'].astype(str)).str.lower()
football_stoiximan['Team2'] = remove_single_double_words(football_stoiximan['Team2'].astype(str)).str.lower()

# DuckDB connection
dbcon = duckdb.connect()

#### 1.1 Football (Over-Under)

In [4]:
# Over/Under Arbitrage
df_over_under = dbrun(dbcon, sq.query_over_under, football_novibet, football_stoiximan)
df_over_under

Unnamed: 0,Team1,Team2,O_novibet,U_novibet,O_stoiximan,U_stoiximan,O_max,U_max,arb,Team1_novibet,Team2_novibet,Team1_stoiximan,Team2_stoiximan
0,richards bay,orlando pirates,2.45,1.52,1.88,1.82,2.45,1.82,0.957614,richards bay,orlando pirates,,orlando pirates
1,lechia gdansk,slask wroclaw,2.04,1.76,1.62,2.12,2.04,2.12,0.961894,lechia gdansk,slask wroclaw,,slask wroclaw
2,chrobry glogow,miedz legnica,1.65,2.1,2.05,1.72,2.05,2.1,0.963995,chrobry glogow,miedz legnica,,miedz legnica
3,korona kielce,miedz legnica,2.05,1.75,1.65,2.1,2.05,2.1,0.963995,korona kielce,miedz legnica,,miedz legnica


#### 1.2 Football (GG-NG)

In [5]:
# GG/NG Arbitrage
df_gg_ng = dbrun(dbcon, sq.query_gg_ng, football_novibet, football_stoiximan)
df_gg_ng

Unnamed: 0,Team1,Team2,GG_novibet,NG_novibet,GG_stoiximan,NG_stoiximan,GG_max,NG_max,arb,Team1_novibet,Team2_novibet,Team1_stoiximan,Team2_stoiximan
0,weiche flensburg,vfb lubeck,1.45,2.55,1.75,1.95,1.75,2.55,0.963585,weiche flensburg,vfb lubeck,,vfb lubeck
1,chrobry glogow,miedz legnica,1.58,2.25,1.83,1.87,1.83,2.25,0.990893,chrobry glogow,miedz legnica,,miedz legnica


#### 1.3 Football (1-X-2)

In [67]:
# 1/X/2 Arbitrage

df_1x2 = dbrun(dbcon, sq.query_1X2, football_novibet, football_stoiximan)
df_1x2

Unnamed: 0,Team1,Team2,one_novibet,x_novibet,two_novibet,one_stoiximan,x_stoiximan,two_stoiximan,one_max,x_max,two_max,arb,Team1_novibet,Team2_novibet,Team1_stoiximan,Team2_stoiximan
0,motor lublin,slask wroclaw,1.65,3.8,4.4,1.42,4.4,6.4,1.65,4.4,6.4,0.989583,motor lublin,slask wroclaw,motor lublin,slask wroclaw
2,manchester city,liverpool,1.66,4.4,5.7,1.64,4.52,5.91,1.66,4.52,5.91,0.992853,manchester city,liverpool,manchester city,liverpool


### 2. Basketball teams process

In [68]:
# Import
basketball_novibet = pd.read_csv('data/novibet_basketball.csv')
basketball_stoiximan = pd.read_csv('data/stoiximan_basketball.csv')


# Remove unicodes from teams names
basketball_novibet['Team1'] = remove_unicode(basketball_novibet['Team1'].astype(str)).str.lower()
basketball_novibet['Team2'] = remove_unicode(basketball_novibet['Team2'].astype(str)).str.lower()
basketball_stoiximan['Team1'] = remove_unicode(basketball_stoiximan['Team1'].astype(str)).str.lower()
basketball_stoiximan['Team2'] = remove_unicode(basketball_stoiximan['Team2'].astype(str)).str.lower()

# Remove single/double words from teams names
basketball_novibet['Team1'] = remove_single_double_words(basketball_novibet['Team1'].astype(str)).str.lower()
basketball_novibet['Team2'] = remove_single_double_words(basketball_novibet['Team2'].astype(str)).str.lower()
basketball_stoiximan['Team1'] = remove_single_double_words(basketball_stoiximan['Team1'].astype(str)).str.lower()
basketball_stoiximan['Team2'] = remove_single_double_words(basketball_stoiximan['Team2'].astype(str)).str.lower()

# DuckDB connection
dbcon = duckdb.connect()

#### 2.1 Basketball (Win1-Win2)

In [None]:
# Win1/Win2 Arbitrage

query_1_2


df_1_2 = dbrun(dbcon, query_1_2, basketball_novibet, basketball_stoiximan)
df_1_2

In [64]:
from fuzzywuzzy import fuzz

from fuzzywuzzy import fuzz

matches = []
for index, row in football_novibet.iterrows():
    team1_novibet = row['team1']
    team2_novibet = row['team2']
    team1_stoiximan = football_stoiximan.loc[football_stoiximan['team1'].apply(lambda x: fuzz.token_sort_ratio(x, team1_novibet)) > 80, 'team1'].values
    team2_stoiximan = football_stoiximan.loc[football_stoiximan['team2'].apply(lambda x: fuzz.token_sort_ratio(x, team2_novibet)) > 80, 'team2'].values
    
    if len(team1_stoiximan) > 0 and len(team2_stoiximan) > 0:
        # both team names are similar in both dataframes
        # create a dictionary with the data and append it to the matches list
        match = {
            'team1': team1_novibet,
            'team2': team2_novibet,
            'odds_over_novibet': row['odds_over'],
            'odds_under_novibet': row['odds_under'],
        }
        
        # check if there is a match in football_stoiximan
        matching_row = football_stoiximan[(football_stoiximan['team1'] == team1_stoiximan[0]) & (football_stoiximan['team2'] == team2_stoiximan[0])]
        if len(matching_row) > 0:
            # there is a match, add the odds values to the dictionary
            match['odds_over_stoiximan'] = matching_row['O_odds'].values[0]
            match['odds_under_stoiximan'] = matching_row['U_odds'].values[0]
            
            matches.append(match)
        else:
            # no match, skip this row
            continue

# create a new dataframe with the matched data
merged_df_lev = pd.DataFrame(matches)


In [None]:
bet1 = 2.4
bet2 = 1.59

Arb_percentage = 1/bet1 + 1/bet2
print("Arb_per:  {0:.3f}".format(Arb_percentage))

win_amount = 50

total_outlay = (win_amount/bet1) + (win_amount/bet2)

profit = win_amount - total_outlay
roi = profit/total_outlay


print("Total_outlay:  {0:.2f}".format(total_outlay))
print("Bet on 1st: '  {0:.2f}".format(win_amount/bet1))
print("Bet on 2nd: '  {0:.2f}".format(win_amount/bet2))
print("Profit:        {0:.2f}".format(profit))
print("ROI:           {0:.2f}".format(roi))

## Testing 