In [52]:
# coding: utf-8

# In[1]:
import itertools
from datetime import timedelta

import pandas as pd
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.ui import WebDriverWait


# get_ipython().magic(u'matplotlib inline')


def fix_str(s):
    s = s.strip()
    if ':' in s:
        if 'OT' in s: s = s[:s.find('OT')].strip()  # Used for am-football model
        s = s.split(':')
        s = [int(_) for _ in s]
        s = [str(_) for _ in s]
        s = ' '.join(s)


def fix_odds(s):
    try:
        if '+' in s:
            return str(float(s[1:]) / 100 + 1)
        elif '-' in s:
            return str((float(s[1:]) + 100) / float(s[1:]))
        elif '/' in s:
            s = s.strip()
            return str((float(s[:s.find('/')]) / float(s[s.find('/') + 1:]) + 1.0))
        elif '.' in s:
            return s
        else:
            print s
            return 1.0
    except:
        return 1.0


class AcquireMatchupDatetimeOddsTwoChoices(object):
    # -------------------------------------------------------------------------------------------------------------------------------------

    def __init__(self, betbrain_upcoming_games_url, cs_team_stats_filename, league_name,
                 tableau_filename, upcoming_games_output_filename_us, upcoming_games_output_filename_eu):
        self.betbrain_upcoming_games_url = betbrain_upcoming_games_url
        self.cs_team_stats_filename = cs_team_stats_filename
        self.league_name = league_name
        self.tableau_filename = tableau_filename
        self.upcoming_games_output_filename_us = upcoming_games_output_filename_us
        self.upcoming_games_output_filename_eu = upcoming_games_output_filename_eu

    def __call__(self):
        self.generate_data(self.betbrain_upcoming_games_url)
        self.clean_data(self.game_df, self.league_name)
        self.merge_with_tableau_output(self.game_df, self.tableau_filename, self.upcoming_games_output_filename_us,
                                       self.upcoming_games_output_filename_eu)

    # -------------------------------------------------------------------------------------------------------------------------------------

    # -------------------------------------------------------------------------------------------------------------------------------------

    # -------------------------------------------------------------------------------------------------------------------------------------

    def generate_data(self, betbrain_upcoming_games_url):

        delay = 5

        # WARNING!!! Here we check if the required element for scraping (class) is on the page,
        # if not we assume that the championship is over, so we pass
        try:

            while True:
                try:
                    browser = webdriver.Chrome("C:\Users\jbadiabo\chromedriver.exe")
                    browser.get(betbrain_upcoming_games_url)
                    WebDriverWait(browser, delay).until(ec.presence_of_element_located((By.CLASS_NAME, "MatchesList")))
                    table_check = browser.find_element_by_class_name("MatchesList")
                    body_rows_check = table_check.find_elements_by_class_name("Match")
                    if len(body_rows_check) == 0:
                        browser.quit()
                        print "No upcoming matchup for this league. Probably the season is over or on a break..."
                        raise SystemExit(0)  # could use sys.exit()
                    else:
                        pass
                except TimeoutException:
                    browser.quit()
                    delay += 3
                    continue
                break

            browser.maximize_window()
            table = browser.find_element_by_class_name("MatchesList")
            body_rows = table.find_elements_by_class_name("Match")

            file_dates = []
            file_matchups = []
            file_odds = []

            for row in body_rows:
                date = row.find_element_by_class_name('DateTime')
                date = [date.text.encode('utf-8')]
                file_dates.append(date)

                matchup = row.find_element_by_class_name('MatchDetails')
                matchup_d1 = matchup.find_element_by_class_name('MatchTitleLink')
                matchup_d1 = [matchup_d1.text.encode('utf-8')]
                file_matchups.append(matchup_d1)

                odds_list = row.find_element_by_class_name('BetList')
                odds = odds_list.find_elements_by_class_name('Bet')
                file_odds_xy = []
                for odd in odds:
                    odd_w = odd.find_elements_by_class_name("AverageOdds")
                    odd_x = [fix_odds(x.text.encode('ascii', 'ignore')) for x in odd_w]
                    file_odds_xy.append(odd_x)
                    file_odds_xy_chain = list(itertools.chain.from_iterable(file_odds_xy))
                    file_odds_xy_chain = ' '.join(file_odds_xy_chain)

                file_odds.append(file_odds_xy_chain)

            file_matchups = list(itertools.chain.from_iterable(file_matchups))

            browser.quit()

            game_df = pd.DataFrame(file_dates, columns=['Matchup_Date'])

            game_df['Matchup'] = file_matchups

            # To count the number of inplay game
            games_inplay = 0

            for date in game_df['Matchup_Date']:
                try:
                    date = pd.to_datetime(date, format='%d/%m/%Y %H:%M')
                except ValueError:
                    games_inplay += 1
                    continue
            print "in-play games: " + str(games_inplay)

            game_df['Odds'] = file_odds
            game_df['Visitor_Odd'] = game_df["Odds"].str.split(' ').str.get(1)
            game_df['Home_Odd'] = game_df["Odds"].str.split(' ').str.get(0)
            game_df.drop('Odds', axis=1, inplace=True)

            True_Result = []
            string_to_check = "/"
            # For each row in the column,
            for row in game_df['Matchup_Date']:
                # if more than a value,
                if string_to_check in row:
                    # Append a letter grade
                    True_Result.append('Upcoming')

                else:
                    # Append a failing grade
                    True_Result.append('IN-PLAY')

            # Create a column from the list
            game_df['True_Result_U'] = True_Result

            # Drop potential matchups in in-play and dropping the Score column for simplicity
            game_df = game_df[game_df.True_Result_U != "IN-PLAY"]
            game_df.drop('True_Result_U', axis=1, inplace=True)

            # CHANGE 'HOURS' AND THE OPERATOR ACCORDING THE TIMEZONE FORMAT FOR MATCHUPS ON THE DATABASE SOURCE WEBSITE 
            # eg: see timezone on am-football-reference 

            self.game_df = game_df
            game_df['Matchup_Date_GMT_Minus_4'] = game_df['Matchup_Date']
            game_df['Matchup_Date_GMT_Minus_4'] = pd.to_datetime(game_df['Matchup_Date_GMT_Minus_4'],
                                                                 format='%d/%m/%Y %H:%M')
            game_df['Matchup_Date_GMT_Minus_4'] = game_df['Matchup_Date_GMT_Minus_4'] - timedelta(hours=4)
            game_df["Matchup_Date"] = game_df['Matchup_Date_GMT_Minus_4'].astype(str)
            game_df['Date'] = game_df["Matchup_Date"].str.split(' ').str.get(0)
            game_df['Date'] = pd.to_datetime(game_df['Date'], format='%Y/%m/%d')
            game_df['Time'] = game_df["Matchup_Date"].str.split(' ').str.get(1)
            game_df.drop('Matchup_Date', axis=1, inplace=True)
            game_df['Visitor_Team'] = game_df["Matchup"].str.split(' — ').str.get(1)
            game_df['Home_Team'] = game_df["Matchup"].str.split(' — ').str.get(0)
            game_df.drop('Matchup', axis=1, inplace=True)
            self.game_df00 = game_df
            self.game_df = game_df

        except TimeoutException:  # No data to work on so we pass
            browser.quit()
            pass

    # -------------------------------------------------------------------------------------------------------------------------------------

    def clean_data(self, game_df, league_name):
        self.game_df_test = game_df
        # game_df['Visitor_Odd'] = game_df['Visitor_Odd'].str.lstrip('Away\n')
        # game_df['Home_Odd'] = game_df['Home_Odd'].str.lstrip('Home\n')
        game_df['Visitor_Odd'] = game_df['Visitor_Odd'].str.lstrip('(')
        game_df['Visitor_Odd'] = game_df['Visitor_Odd'].str.rstrip(')')
        game_df['Home_Odd'] = game_df['Home_Odd'].str.lstrip('(')
        game_df['Home_Odd'] = game_df['Home_Odd'].str.rstrip(')')
        self.game_df_test1 = game_df

        game_df = game_df[game_df['Visitor_Odd'].notnull()]
        # game_df = game_df[game_df['Visitor_Team'].notnull()]
        # game_df = game_df[game_df['Home_Team'].notnull()]
        game_df = game_df[game_df['Visitor_Odd'].str.contains('\n') == False]
        game_df = game_df[game_df['Home_Odd'].str.contains('\n') == False]

        self.game_df0 = game_df

        game_df['Visitor_Odd'] = pd.to_numeric(game_df['Visitor_Odd'])
        game_df['Home_Odd'] = pd.to_numeric(game_df['Home_Odd'])

        game_df.insert(3, 'League', str(league_name))
        
        # Create a dictionary like in soccer model

        # for s, t in zip(db_team_names_list, team_names_list):
            # game_df['Visitor_Team'] = game_df['Visitor_Team'].str.strip().str.replace(t, s)
            # game_df['Home_Team'] = game_df['Home_Team'].str.strip().str.replace(t, s)
        
        self.game_df = game_df
        self.game_df1 = game_df

    # -------------------------------------------------------------------------------------------------------------------------------------

    def merge_with_tableau_output(self, game_df, tableau_filename, upcoming_games_output_filename_us,
                                  upcoming_games_output_filename_eu):

        left = pd.read_csv(tableau_filename)
        left['Date'] = pd.to_datetime(left['Date'], infer_datetime_format=True)
        self.left = left

        df = pd.merge(left, game_df, on=['Visitor_Team', 'Home_Team', 'Date'], how="inner")
        df = df.sort_values(['Date', 'Time'])
        self.df = df
        df['Sibyl'] = df.apply(lambda x: x['Home_Team'] if x['Predicted_Result'] == 1 else x['Visitor_Team'], axis=1)
        df['Bookies_choice'] = df.apply(
            lambda x: x['Home_Team'] if x['Home_Odd'] < x['Visitor_Odd'] else x['Visitor_Team'], axis=1)
        df['Divergence_Y/N'] = df[['Sibyl', 'Bookies_choice']].apply(lambda x: 'Y' if x[0] != x[1] else "N", axis=1)

        df = df.drop('ID', axis=1)
        df = df.drop('V_Team_PTS', axis=1)
        df = df.drop('H_Team_PTS', axis=1)
        df['True_Result'] = 'Upcoming'

        df['Confidence'] = df['Confidence'].round(3)
        df['Matchup_US_P'] = df['Visitor_Team'] + ' @ ' + df['Home_Team']
        df['Matchup_EU_P'] = df['Home_Team'] + ' vs ' + df['Visitor_Team']

        nfl_us_p_df = pd.concat(
            [df['Date'], df['Time'], df['League'], df['Matchup_US_P'], df['Visitor_Team'], df['Home_Team'],
             df['Visitor_Odd'], df['Home_Odd'], df['Bookies_choice'], df['Sibyl'], df['Confidence'], df['Divergence_Y/N'],
             df['True_Result']], axis=1)

        nfl_eu_p_df = pd.concat(
            [df['Date'], df['Time'], df['League'], df['Matchup_EU_P'], df['Home_Team'], df['Visitor_Team'],
             df['Home_Odd'], df['Visitor_Odd'], df['Bookies_choice'], df['Sibyl'], df['Confidence'], df['Divergence_Y/N'],
             df['True_Result']], axis=1)

        nfl_us_p_df = nfl_us_p_df.reset_index(drop=True)
        nfl_eu_p_df = nfl_eu_p_df.reset_index(drop=True)

        self.nfl_us_p_df = nfl_us_p_df
        self.nfl_eu_p_df = nfl_eu_p_df

        nfl_us_p_df.to_csv(upcoming_games_output_filename_us, mode='w+', index=True, index_label='ID')

        nfl_eu_p_df.to_csv(upcoming_games_output_filename_eu, mode='w+', index=True, index_label='ID')

# ex of use
# x = AcquireMatchupDatetimeOddsTwoChoices(
# "https://www.betbrain.com/am-football/united-states/nfl/nfl/#/winner/whole-event/",\
# "https://www.betbrain.com/am-football/united-states/nfl/",\
# "nfl_team_stats_2016_2016.csv",\
# "NFL",\
# "nfl_tableau_output_2016.csv",\
# "NFL_Upcoming_Matchups_US_P_df",\
# "NFL_Upcoming_Matchups_EU_P_df")

# x()

in-play games: 1


In [15]:
from IPython.core.display import HTML
HTML("<style>.container { width:100% !important; }</style>")

In [53]:
x.nfl_eu_p_df

Unnamed: 0,Date,Time,League,Matchup_EU_P,Home_Team,Visitor_Team,Home_Odd,Visitor_Odd,Bookies_choice,Sibyl,Confidence,Divergence_Y/N,True_Result
0,2016-12-22,21:25:00,NFL,Philadelphia Eagles vs New York Giants,Philadelphia Eagles,New York Giants,1.95,1.9,New York Giants,Philadelphia Eagles,0.586,Y,Upcoming
1,2016-12-24,14:00:00,NFL,New England Patriots vs New York Jets,New England Patriots,New York Jets,1.93,1.91,New York Jets,New England Patriots,0.708,Y,Upcoming
2,2016-12-24,14:00:00,NFL,Buffalo Bills vs Miami Dolphins,Buffalo Bills,Miami Dolphins,1.9,1.94,Buffalo Bills,Buffalo Bills,0.575,N,Upcoming
3,2016-12-24,14:00:00,NFL,Jacksonville Jaguars vs Tennessee Titans,Jacksonville Jaguars,Tennessee Titans,1.92,1.92,Tennessee Titans,Jacksonville Jaguars,0.523,Y,Upcoming
4,2016-12-24,14:00:00,NFL,Cleveland Browns vs San Diego Chargers,Cleveland Browns,San Diego Chargers,1.92,1.93,Cleveland Browns,San Diego Chargers,0.671,Y,Upcoming
5,2016-12-24,14:00:00,NFL,Green Bay Packers vs Minnesota Vikings,Green Bay Packers,Minnesota Vikings,1.93,1.91,Minnesota Vikings,Green Bay Packers,0.564,Y,Upcoming
6,2016-12-24,17:05:00,NFL,Oakland Raiders vs Indianapolis Colts,Oakland Raiders,Indianapolis Colts,1.91,1.93,Oakland Raiders,Oakland Raiders,0.67,N,Upcoming
7,2016-12-24,17:25:00,NFL,New Orleans Saints vs Tampa Bay Buccaneers,New Orleans Saints,Tampa Bay Buccaneers,1.91,1.91,Tampa Bay Buccaneers,New Orleans Saints,0.723,Y,Upcoming
8,2016-12-24,17:25:00,NFL,Los Angeles Rams vs San Francisco 49ers,Los Angeles Rams,San Francisco 49ers,1.53,2.54,Los Angeles Rams,Los Angeles Rams,0.729,N,Upcoming
9,2016-12-24,17:25:00,NFL,Seattle Seahawks vs Arizona Cardinals,Seattle Seahawks,Arizona Cardinals,1.91,1.91,Arizona Cardinals,Seattle Seahawks,0.655,Y,Upcoming


In [43]:
x.game_df

Unnamed: 0,Visitor_Odd,Home_Odd,Matchup_Date_GMT_Minus_4,League,Date,Time,Visitor_Team,Home_Team
1,1.9,1.95,2016-12-22 21:25:00,NFL,2016-12-22,21:25:00,Miami Dolphins,New Orleans Saints
2,1.91,1.93,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,Jacksonville Jaguars,Denver Broncos
3,1.93,1.92,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,New York Jets,Cleveland Browns
4,1.94,1.9,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,Indianapolis Colts,Buffalo Bills
5,1.92,1.92,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,Pittsburgh Steelers,Jacksonville Jaguars
6,1.91,1.93,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,Minnesota Vikings,Kansas City Chiefs
7,1.93,1.91,2016-12-24 17:05:00,NFL,2016-12-24,17:05:00,Green Bay Packers,New England Patriots
8,2.54,1.53,2016-12-24 17:25:00,NFL,2016-12-24,17:25:00,San Francisco 49ers,Los Angeles Rams
9,1.91,1.91,2016-12-24 17:25:00,NFL,2016-12-24,17:25:00,Arizona Cardinals,Oakland Raiders
10,1.91,1.91,2016-12-24 17:25:00,NFL,2016-12-24,17:25:00,Philadelphia Eagles,Los Angeles Rams


Unnamed: 0,Visitor_Odd,Home_Odd,Matchup_Date_GMT_Minus_4,League,Date,Time,Visitor_Team,Home_Team
1,1.9,1.95,2016-12-22 21:25:00,NFL,2016-12-22,21:25:00,Miami Dolphins,New Orleans Saints
2,1.91,1.93,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,Jacksonville Jaguars,Denver Broncos
3,1.93,1.92,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,New York Jets,Cleveland Browns
4,1.94,1.9,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,Indianapolis Colts,Buffalo Bills
5,1.92,1.92,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,Pittsburgh Steelers,Jacksonville Jaguars
6,1.91,1.93,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,Minnesota Vikings,Kansas City Chiefs
7,1.93,1.91,2016-12-24 17:05:00,NFL,2016-12-24,17:05:00,Green Bay Packers,New England Patriots
8,2.54,1.53,2016-12-24 17:25:00,NFL,2016-12-24,17:25:00,San Francisco 49ers,Los Angeles Rams
9,1.91,1.91,2016-12-24 17:25:00,NFL,2016-12-24,17:25:00,Arizona Cardinals,Oakland Raiders
10,1.91,1.91,2016-12-24 17:25:00,NFL,2016-12-24,17:25:00,Philadelphia Eagles,Los Angeles Rams


In [13]:
x.left

Unnamed: 0,ID,Visitor_Team,V_Team_PTS,Home_Team,H_Team_PTS,True_Result,Predicted_Result,Confidence,Date
0,0,Carolina Panthers,20,Denver Broncos,21,1,1,0.633670,2016-09-08
1,1,Green Bay Packers,27,Jacksonville Jaguars,23,0,1,0.505392,2016-09-11
2,2,Buffalo Bills,7,Baltimore Ravens,13,1,1,0.655689,2016-09-11
3,3,Cincinnati Bengals,23,New York Jets,22,0,1,0.542848,2016-09-11
4,4,Cleveland Browns,10,Philadelphia Eagles,29,1,1,0.854994,2016-09-11
5,5,Minnesota Vikings,25,Tennessee Titans,16,0,1,0.545876,2016-09-11
6,6,San Diego Chargers,27,Kansas City Chiefs,33,1,1,0.657561,2016-09-11
7,7,Tampa Bay Buccaneers,31,Atlanta Falcons,24,0,1,0.683905,2016-09-11
8,8,Oakland Raiders,35,New Orleans Saints,34,0,1,0.580624,2016-09-11
9,9,Chicago Bears,14,Houston Texans,23,1,1,0.701339,2016-09-11


In [25]:
x.game_df0

Unnamed: 0,Visitor_Odd,Home_Odd,Matchup_Date_GMT_Minus_4,League,Date,Time,Visitor_Team,Home_Team
0,3.48,1.32,2016-12-19 21:30:00,NFL,2016-12-19,21:30:00,Carolina Panthers,San Diego Chargers
1,1.89,1.95,2016-12-22 21:25:00,NFL,2016-12-22,21:25:00,Miami Dolphins,New Orleans Saints
2,1.93,1.92,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,Jacksonville Jaguars,Denver Broncos
3,1.93,1.92,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,New York Jets,Cleveland Browns
4,1.94,1.9,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,Indianapolis Colts,Buffalo Bills
5,1.92,1.92,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,Pittsburgh Steelers,Jacksonville Jaguars
6,1.91,1.93,2016-12-24 14:00:00,NFL,2016-12-24,14:00:00,Minnesota Vikings,Kansas City Chiefs
7,1.93,1.91,2016-12-24 17:05:00,NFL,2016-12-24,17:05:00,Green Bay Packers,New England Patriots
8,2.54,1.53,2016-12-24 17:25:00,NFL,2016-12-24,17:25:00,San Francisco 49ers,Los Angeles Rams
9,1.91,1.91,2016-12-24 17:25:00,NFL,2016-12-24,17:25:00,Arizona Cardinals,Oakland Raiders


In [50]:
len(x.team_names_list)

27

In [47]:
x.db_team_names_list

['Arizona Cardinals',
 'Atlanta Falcons',
 'Baltimore Ravens',
 'Buffalo Bills',
 'Carolina Panthers',
 'Chicago Bears',
 'Cincinnati Bengals',
 'Cleveland Browns',
 'Dallas Cowboys',
 'Denver Broncos',
 'Detroit Lions',
 'Green Bay Packers',
 'Houston Texans',
 'Indianapolis Colts',
 'Jacksonville Jaguars',
 'Kansas City Chiefs',
 'Los Angeles Rams',
 'Miami Dolphins',
 'Minnesota Vikings',
 'New England Patriots',
 'New Orleans Saints',
 'New York Giants',
 'New York Jets',
 'Oakland Raiders',
 'Philadelphia Eagles',
 'Pittsburgh Steelers',
 'San Diego Chargers',
 'San Francisco 49ers',
 'Seattle Seahawks',
 'Tampa Bay Buccaneers',
 'Tennessee Titans',
 'Washington Redskins']

In [31]:
x.db_team_names_list == x.team_names_list

False

In [44]:
lol = x.db_team_names_list
bol = x.team_names_list

In [45]:
for l, y in zip(lol, bol):
    if l == y:
        print "egale"
    else:
        print "different"

egale
egale
egale
egale
egale
different
different
different
different
different
different
different
different
different
different
different
different
different
different
different
different
different
different
different
different
different
different
