# dashboard-scraper

In this script, we write the functions necessary to scrap useful information from the dashboard present on the following page:
http://184.73.28.182/

In [31]:
### import 
from collections import namedtuple
from datetime import datetime
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By

In [32]:
### 
options = webdriver.ChromeOptions()
#options.add_argument('headless') 
options.add_argument('window-size=1200x600')
driver = webdriver.Chrome(chrome_options=options)
driver.get('http://184.73.28.182/')

In [33]:
### get list of all games listed as suggested bets
container_element = driver.find_element_by_class_name('container')
table_element = container_element.find_element_by_css_selector('table.table-striped')
suggested_games = table_element.find_elements_by_class_name('accordion-toggle')

In [34]:
### expand all the suggested games
for game in suggested_games:
    game.click()

In [35]:
### extract odds from bookies
odds_matrices = table_element.find_elements_by_class_name('hiddenRow')

In [36]:
### create named tuple from game and odd_matrix containing the following fields:
#   * query_time 
#   * time_to_game
#   * game
#   * league
#   * odd_matrix

# get input for the function to be implemented
game = suggested_games[0]
odd_matrix = odds_matrices[0]

# initialize named tuple
scraped_game = namedtuple('ScrapedGame',  ['timestamp', 'date', 'game', 'league', 'odd_matrix'])

# fill values in named tuple from game
game_contents = [element.text for element in game.find_elements_by_css_selector('td')]
scraped_game.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
scraped_game.date = str(game_contents[4])
scraped_game.game = str(game_contents[1])
scraped_game.league = str(game_contents[2])

# get single rows from odd matrix
table = odd_matrix.find_element(By.CLASS_NAME, 'accordian-body').find_element(By.CLASS_NAME, 'table-striped')
table_body = table.find_element(By.CSS_SELECTOR,'tbody')
table_body_elements = table_body.find_elements(By.CSS_SELECTOR, 'tr')

# add column in scraped_odd_matrix, containing the bookie as column name, and odds as values
scraped_odd_matrix = pd.DataFrame(index=['1', 'X', '2'])
for body in table_body_elements:
    body_elements = body.find_elements(By.CSS_SELECTOR, 'td')
    
    bookie = str(body_elements[0].text)
    odds_1 = float(body_elements[1].text.split('\n')[0])
    odds_X = float(body_elements[2].text.split('\n')[0])
    odds_2 = float(body_elements[3].text.split('\n')[0])
    
    scraped_odd_matrix[bookie] = pd.Series([odds_1, odds_X, odds_2], index=scraped_odd_matrix.index)

scraped_game.odd_matrix = scraped_odd_matrix

In [43]:
scraped_game.odd_matrix

Unnamed: 0,10Bet,12BET,188BET,1xBet,888sport,bet-at-home,bet365,Betclic,Betfair,Betfair Exchange,...,Interwetten,mybet,Paddy Power,Pinnacle,SBOBET,Sportingbet,Tipico,Unibet,William Hill,youwin
1,1.71,1.72,1.72,1.76,1.75,1.67,1.66,1.68,1.67,1.71,...,1.9,1.7,1.62,1.72,1.73,1.73,1.75,1.75,1.73,1.7
X,3.85,3.7,3.75,4.1,3.65,3.96,3.79,3.7,3.9,3.85,...,3.45,4.1,3.75,4.12,3.6,3.5,3.8,3.7,3.8,3.4
2,4.6,4.5,4.3,4.76,4.35,4.4,5.0,4.5,4.6,4.71,...,3.8,4.6,4.5,4.83,4.5,4.5,4.5,4.4,4.2,4.5


In [6]:
### extract information from game
from collections import namedtuple
import datetime

game = suggested_games[0]
game_contents = [element.text for element in game.find_elements_by_css_selector('td')]

game_info = {}
game_info['timestamp'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 
game_info['sport'] = str(game_contents[0])
game_info['match_title'] = str(game_contents[1])
game_info['league'] = str(game_contents[2])
game_info['result_to_bet'] = str(game_contents[3])
game_info['date'] = str(game_contents[4])
game_info['time_to_match'] = str(game_contents[5])
game_info['best_bookie'] = str(game_contents[6])
game_info['best_odds'] = float(game_contents[7])
game_info['mean'] = float(game_contents[8].split('/')[0].strip())
game_info['median'] = float(game_contents[8].split('/')[1].strip())

In [7]:
game_info

{'best_bookie': 'Interwetten',
 'best_odds': 2.1,
 'date': '2018-01-28 21:00:00',
 'league': 'France: Ligue 1',
 'match_title': 'Marseille vs. Monaco',
 'mean': 1.87,
 'median': 1.86,
 'result_to_bet': '1',
 'sport': 'soccer',
 'time_to_match': '09:47:29',
 'timestamp': '2018-01-28 11:13:12'}

In [8]:
### extract information from odds_matrix
odds_matrix = odds_matrices[0]
table = odds_matrix.find_element(By.CLASS_NAME, 'table-striped.table-bordered')
table_body = table.find_element(By.CSS_SELECTOR,'tbody')
table_body_elements = table_body.find_elements(By.CSS_SELECTOR, 'tr')
# iterate over table_body_elements
OddsInfoArray = []
for body in table_body_elements:
    body_elements = body.find_elements(By.CSS_SELECTOR, 'td')

    OddsInfo = {}
    OddsInfo['bookie'] = str(body_elements[0].text)
    OddsInfo['odds_1'] = float(body_elements[1].text.split('\n')[0])
    OddsInfo['timestamp_1'] = str(body_elements[1].text.split('\n')[1])
    OddsInfo['odds_X'] = float(body_elements[2].text.split('\n')[0])
    OddsInfo['timestamp_X'] = str(body_elements[2].text.split('\n')[1])
    OddsInfo['odds_2'] = float(body_elements[3].text.split('\n')[0])
    OddsInfo['timestamp_2'] = str(body_elements[3].text.split('\n')[1])
    OddsInfoArray.append(OddsInfo)

In [9]:
### check if games are present:
str(container_element.text) == 'No advantageous bet opportunities currently available.'

False