# BetFair Dataset Feature Engineering

@author Nathan Stevens<br>
@version 0.2 10/03/2023 

In [1]:
import requests
import json
from pprint import pprint
import pandas as pd
import numpy as np
import time
from datetime import datetime
import pickle

# load our api keys
from config import geoapify_key

# Turn off warning messages
import warnings
warnings.filterwarnings("ignore")

In [2]:
# load the main dataset from pickle file
betfair_df = pd.read_pickle("resources/betfair_dataset.pkl")
betfair_df.info()
betfair_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1043349 entries, 0 to 1043348
Data columns (total 19 columns):
 #   Column            Non-Null Count    Dtype  
---  ------            --------------    -----  
 0   index             1043349 non-null  int64  
 1   SPORTS_ID         1043349 non-null  object 
 2   EVENT_ID          1043349 non-null  object 
 3   SETTLED_DATE      1043349 non-null  object 
 4   FULL_DESCRIPTION  1043349 non-null  object 
 5   SCHEDULED_OFF     1043349 non-null  object 
 6   EVENT             1043349 non-null  object 
 7   DT ACTUAL_OFF     1043349 non-null  object 
 8   SELECTION_ID      1043349 non-null  int64  
 9   SELECTION         1043349 non-null  object 
 10  ODDS              1043349 non-null  float64
 11  NUMBER_BETS       1043349 non-null  int64  
 12  VOLUME_MATCHED    1043349 non-null  float64
 13  LATEST_TAKEN      1043349 non-null  object 
 14  FIRST_TAKEN       1043349 non-null  object 
 15  WIN_FLAG          1043349 non-null  int64  
 16  

Unnamed: 0,index,SPORTS_ID,EVENT_ID,SETTLED_DATE,FULL_DESCRIPTION,SCHEDULED_OFF,EVENT,DT ACTUAL_OFF,SELECTION_ID,SELECTION,ODDS,NUMBER_BETS,VOLUME_MATCHED,LATEST_TAKEN,FIRST_TAKEN,WIN_FLAG,IN_PLAY,WIN_AMOUNT,LOSS_AMOUNT
0,0,Soccer,115279736,04-09-2014 17:07:30,U21 Euro Championship/Qualifiers/Fixtures 04 S...,04-09-2014 15:30,Over/Under 1.5 Goals,04-09-2014 15:32:39,1221386,Over 1.5 Goals,1.35,7,765.54,04-09-2014 16:39:49,04-09-2014 16:39:28,1,IP,1033.479,0.0
1,1,Tennis,115288925,04-09-2014 11:50:25,Group A/Brasov Challenger 2014/Second Round Ma...,04-09-2014 10:30,Match Odds,04-09-2014 10:34:32,7659748,Aslan Karatsev,1.12,41,3836.7,04-09-2014 11:44:26,04-09-2014 11:18:57,1,IP,4297.104,0.0
2,2,Soccer,115196118,03-09-2014 15:08:55,U21 Euro Championship/Qualifiers/Fixtures 03 S...,03-09-2014 14:00,Over/Under 1.5 Goals,03-09-2014 14:00:45,1221386,Over 1.5 Goals,2.1,6,625.54,03-09-2014 15:00:37,03-09-2014 14:45:28,1,IP,1313.634,0.0
3,4,Soccer,115236324,01-09-2014 19:46:39,Irish Soccer/FAI Ford Cup/Fixtures 01 Septembe...,01-09-2014 19:00,Over/Under 0.5 Goals,01-09-2014 19:01:34,5851483,Over 0.5 Goals,1.1,22,1176.46,01-09-2014 19:26:21,01-09-2014 19:20:00,1,IP,1294.106,0.0
4,5,Soccer,115184779,02-09-2014 18:38:14,Swedish Soccer/Superettan/Fixtures 02 Septembe...,02-09-2014 17:20,Over/Under 1.5 Goals,02-09-2014 17:20:13,1221386,Over 1.5 Goals,1.34,19,903.18,02-09-2014 18:30:48,02-09-2014 18:28:48,1,IP,1210.2612,0.0


In [None]:
# find soccer gae with none aski characters
#results = betfair_df.loc[betfair_df["FULL_DESCRIPTION"].str.contains("Womens Soccer/Icelandic U19 League Women/Fixtures 04 September / F", na=False)]
#print(results["FULL_DESCRIPTION"].tolist())

In [4]:
# define of names of files containing name of spotting event and rough location
sport_files = ["golf_location.csv", "american_football_location.csv", "basketball_location.csv",
              "cricket_location.csv", "baseball_location.csv", "tennis_location.csv", "soccer_location.csv"]

# read in the first file and display the dataframe
df = pd.read_csv("output/" + sport_files[6], encoding='utf-8')

#df = pd.read_csv("output/" + sport_files[6], encoding='utf-8')
result = df.loc[df["FULL_DESCRIPTION"].str.contains("Womens Soccer/Icelandic U19 League Women/Fixtures 04 September / F", na=False)]
print(result["FULL_DESCRIPTION"].iloc[0])
df

Womens Soccer/Icelandic U19 League Women/Fixtures 04 September / Fjonlir U19 (W) v Breidablik U19 (W)


Unnamed: 0,FULL_DESCRIPTION,NUMBER_BETS,VOLUME_MATCHED,WIN_AMOUNT,LOSS_AMOUNT,SETTLED_DATE,COUNTRY,CITY
0,Algerian Soccer/Ligue 1/Fixtures 06 September/...,1273,16644.00,12232.6506,21270.4386,6/9/14 20:01,Algeria,Algiers
1,Anderlecht (W) v OH Leuven (W)/Asian Handicap,2,7.10,0.0000,19.5250,,Belgium,Leuven
2,Argentinian Soccer/Argentinian Primera B Metro...,1693,30690.96,33057.8786,28235.8778,1/9/14 17:37,Argentina,Buenos Aires
3,Argentinian Soccer/Argentinian Primera B Metro...,2,16.40,57.5640,0.0000,,Argentina,Buenos Aires
4,Argentinian Soccer/Argentinian Primera B Metro...,2953,37557.27,39084.1152,49958.4387,2/9/14 1:57,Argentina,Buenos Aires
...,...,...,...,...,...,...,...,...
1954,Womens Soccer/Women's Super League 2/Fixtures ...,677,12267.28,11040.3920,16842.5044,4/9/14 20:36,United Kingdom,Birmingham
1955,Womens Soccer/Women's Super League 2/Fixtures ...,382,4437.80,5537.3990,1626.6658,7/9/14 15:36,United Kingdom,Birmingham
1956,Womens Soccer/Women's Super League 2/Fixtures ...,307,4297.10,4548.1826,2156.8096,7/9/14 13:59,United Kingdom,Birmingham
1957,Womens Soccer/Women's Super League 2/Fixtures ...,1094,29389.31,28708.7117,33928.8714,7/9/14 13:46,United Kingdom,Birmingham


In [5]:
# convert the date and time to unix time stamp
def get_timestamp(datetime_str):
    datetime_object = datetime.strptime(datetime_str, '%d/%m/%y %H:%M')
    ttuple = datetime_object.timetuple()
    
    timestamp = int(time.mktime(ttuple)) - 18000 # remove 18000 seconds so we match the timezone in betfair dataset
    date = datetime_object.strftime("%Y-%m-%d")
    return(timestamp, date)

In [33]:
# function to get the historical weather using Open-Meteo Historical Weather API
# https://open-meteo.com/en/docs/historical-weather-api
def get_weather(lat, lon, game_datetime):
    # set temp to -1, and rain to 0
    temp = -1
    rain = 0
    weather_url = ""
    
    # debug for when we exceed the max hour api calls
    # return(temp, rain)
    
    try:
        # get the timestamp from the date string
        timestamp, game_date = get_timestamp(game_datetime)
        
        weather_url = f"https://archive-api.open-meteo.com/v1/archive?latitude={lat}&longitude={lon}&start_date={game_date}&end_date={game_date}&hourly=temperature_2m,rain&timeformat=unixtime"
        weather_data = requests.get(weather_url).json()
        
        rains = weather_data["hourly"]["rain"]
        times = weather_data["hourly"]["time"]
        temps = weather_data["hourly"]["temperature_2m"]
              
        # grab the temperature and rain amount within 30 minutes
        for i in range(len(times)):
            tdiff = abs(times[i] - timestamp)
            
            if tdiff < 1800:
                temp = temps[i]
                
                if rains[i] > 0:
                    rain = 1
                    
                #print(times[i], tdiff, temps[i], rain)
                break
        # we didn't find a temp lets just use the max
        if temp == -1:
            temp = max(temps)
            print("Max Temp (C), Rained?", temp, rain)
    except Exception as e:
        print("Unable to get weather data for", lat, lon, game_datetime)
        print(weather_url)
        print("Exception:", e)
        
    return(temp, rain)

In [7]:
# define dictionary to store city locations to avoid hitting the geoapify API every time
city_locations = dict()

In [None]:
# save the city_locations to pickle file
with open('output/city_location.pkl', 'wb') as handle:
    pickle.dump(city_locations, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [8]:
# open the city_locations from the pickle file
with open('output/city_location.pkl', 'rb') as handle:
    city_locations = pickle.load(handle)

In [32]:
# function to process a csv file containing country and cities and return lat, lon, temp, rain?
base_url = "https://api.geoapify.com/v1/geocode/search?text="

def get_location(filename):
    #df = pd.read_csv(filename, encoding='ISO-8859-1')
    df = pd.read_csv(filename, encoding='utf-8')
    df["LATITUDE"] = ""
    df["LONGITUDE"] = ""
    df["TEMPERATURE"] = ""
    df["RAIN"] = ""
    
    game_datetime = ""
    
    # process each row
    for index, row in df.iterrows():        
        target_city = row["CITY"].strip() + ", " + row["COUNTRY"].strip()
        endpoint_url = base_url + target_city + "&format=json&apiKey=" + geoapify_key
        
        if type(row["SETTLED_DATE"]) == str:
            game_datetime = row["SETTLED_DATE"]
            
        #print("Game Date:", game_datetime)
        # call the api for taget city or used saved values
        try:
            if target_city in city_locations:
                lat, lon = city_locations[target_city]
                #print("Using save location:", target_city, lat, lon)
            else:
                geo_data = requests.get(endpoint_url).json()
            
                lat = geo_data["results"][0]["lat"]
                lon = geo_data["results"][0]["lon"]
                city_locations[target_city] = (lat, lon)
                #print("Using geoapify API:",target_city, lat,lon)
        except Exception as e:
            lat = -1
            lon = -1
            print("\nNo Latitude/Longitude:", target_city)
            print(endpoint_url)
            print(e)
        
        # get the weather tuple containing temperature, and rain (1=rained, 0=no rain)
        weather = get_weather(lat, lon, game_datetime)
        
        print(filename, (index + 2),"-- Processing City: ", target_city, lat, lon, weather)
        
        # update the dataframe with the lat,lon, temperature and weather
        df.loc[index, "LATITUDE"] = lat
        df.loc[index, "LONGITUDE"] = lon
        df.loc[index, "TEMPERATURE"] = weather[0]
        df.loc[index, "RAIN"] = weather[1]
    # return the modified dataframe
    return df

In [None]:
df = get_location("output/" + sport_files[6])

In [34]:
# add lat,lon, and weather info to the sports files
sports_df = []

for idx, filename in enumerate(sport_files):
    df = get_location("output/" + filename)
    sports_df.append(df)
    
    #debug code to only process a small number of the files. Set to 10 to process all files
    if idx == 10:
        break
    
print("Total cities:", len(city_locations))

output/golf_location.csv 2 -- Processing City:  Cherry Hills Village, CO, USA 39.6416544 -104.959425 (16.6, 0)
output/golf_location.csv 3 -- Processing City:  Cherry Hills Village, CO, USA 39.6416544 -104.959425 (27.4, 0)
output/golf_location.csv 4 -- Processing City:  Davidson, NC, USA 35.4992614 -80.8485218 (29.2, 0)
output/golf_location.csv 5 -- Processing City:  Norton, Massachusetts, USA 41.9667666 -71.1869939 (28.3, 0)
output/golf_location.csv 6 -- Processing City:  Norton, Massachusetts, USA 41.9667666 -71.1869939 (28.3, 0)
output/golf_location.csv 7 -- Processing City:  Crans-Montana, Switzerland 46.3304899 7.526435803722132 (17.1, 0)
output/golf_location.csv 8 -- Processing City:  Crans-Montana, Switzerland 46.3304899 7.526435803722132 (11.2, 0)
output/golf_location.csv 9 -- Processing City:  Portland, Oregon, USA 45.5202471 -122.674194 (23.3, 0)
output/golf_location.csv 10 -- Processing City:  Levis, Quebec, Canada 46.8033114 -71.1778792 (19.0, 0)
output/american_football_loc

output/basketball_location.csv 18 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (33.9, 0)
output/basketball_location.csv 19 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (33.9, 0)
output/basketball_location.csv 20 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (32.1, 0)
output/basketball_location.csv 21 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (33.9, 0)
output/basketball_location.csv 22 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (35.8, 0)
output/basketball_location.csv 23 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (29.0, 0)
output/basketball_location.csv 24 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (32.0, 0)
output/basketball_location.csv 25 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (35.8, 0)
output/basketball_location.csv 26 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (35.6, 0)
output/basketball_location.csv 27 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 

output/cricket_location.csv 32 -- Processing City:  Kent, United Kingdom 51.2474823 0.7105077 (19.6, 1)
output/cricket_location.csv 33 -- Processing City:  Durham, United Kingdom 54.7770139 -1.5756205 (14.4, 0)
output/cricket_location.csv 34 -- Processing City:  Durham, United Kingdom 54.7770139 -1.5756205 (14.4, 0)
output/cricket_location.csv 35 -- Processing City:  Durham, United Kingdom 54.7770139 -1.5756205 (14.4, 0)
output/cricket_location.csv 36 -- Processing City:  Warwickshire, United Kingdom 52.32130635 -1.5536905536661392 (18.1, 0)
output/cricket_location.csv 37 -- Processing City:  Warwickshire, United Kingdom 52.32130635 -1.5536905536661392 (18.1, 0)
output/cricket_location.csv 38 -- Processing City:  Warwickshire, United Kingdom 52.32130635 -1.5536905536661392 (18.1, 0)
output/cricket_location.csv 39 -- Processing City:  Warwickshire, United Kingdom 52.32130635 -1.5536905536661392 (18.1, 0)
output/cricket_location.csv 40 -- Processing City:  Centurion, South Africa -25.836

output/baseball_location.csv 46 -- Processing City:  Baltimore, Maryland, USA 39.2908816 -76.610759 (24.4, 0)
output/baseball_location.csv 47 -- Processing City:  Cleveland, Ohio, USA 41.4996574 -81.6936772 (23.8, 0)
output/baseball_location.csv 48 -- Processing City:  Minneapolis, Minnesota, USA 44.9772995 -93.2654692 (24.0, 1)
output/baseball_location.csv 49 -- Processing City:  Arlington, Texas, USA 32.7355816 -97.1071186 (31.2, 0)
output/baseball_location.csv 50 -- Processing City:  Milwaukee, Wisconsin, USA 43.0349931 -87.922497 (23.8, 0)
output/baseball_location.csv 51 -- Processing City:  St. Petersburg, Florida, USA 27.7700475 -82.6359078 (27.5, 0)
output/baseball_location.csv 52 -- Processing City:  Los Angeles, California, USA 34.0536909 -118.242766 (24.4, 0)
output/baseball_location.csv 53 -- Processing City:  Miami, Florida, USA 25.7741728 -80.19362 (26.4, 0)
output/baseball_location.csv 54 -- Processing City:  St. Petersburg, Florida, USA 27.7700475 -82.6359078 (27.9, 0)
o

output/tennis_location.csv 23 -- Processing City:  Alphen aan den Rijn, Netherlands 52.1131403 4.6408411999450685 (21.4, 0)
output/tennis_location.csv 24 -- Processing City:  Alphen aan den Rijn, Netherlands 52.1131403 4.6408411999450685 (21.5, 0)
output/tennis_location.csv 25 -- Processing City:  Alphen aan den Rijn, Netherlands 52.1131403 4.6408411999450685 (21.4, 0)
output/tennis_location.csv 26 -- Processing City:  Alphen aan den Rijn, Netherlands 52.1131403 4.6408411999450685 (20.7, 0)
output/tennis_location.csv 27 -- Processing City:  Alphen aan den Rijn, Netherlands 52.1131403 4.6408411999450685 (21.4, 0)
output/tennis_location.csv 28 -- Processing City:  Alphen aan den Rijn, Netherlands 52.1131403 4.6408411999450685 (19.9, 0)
output/tennis_location.csv 29 -- Processing City:  Alphen aan den Rijn, Netherlands 52.1131403 4.6408411999450685 (21.4, 0)
output/tennis_location.csv 30 -- Processing City:  Alphen aan den Rijn, Netherlands 52.1131403 4.6408411999450685 (19.0, 0)
output/t

output/tennis_location.csv 103 -- Processing City:  Genoa, Italy 44.40726 8.9338624 (25.6, 0)
output/tennis_location.csv 104 -- Processing City:  Genoa, Italy 44.40726 8.9338624 (24.8, 0)
output/tennis_location.csv 105 -- Processing City:  Genoa, Italy 44.40726 8.9338624 (24.8, 0)
output/tennis_location.csv 106 -- Processing City:  Genoa, Italy 44.40726 8.9338624 (26.0, 0)
output/tennis_location.csv 107 -- Processing City:  Genoa, Italy 44.40726 8.9338624 (26.0, 0)
output/tennis_location.csv 108 -- Processing City:  Genoa, Italy 44.40726 8.9338624 (26.0, 0)
output/tennis_location.csv 109 -- Processing City:  Genoa, Italy 44.40726 8.9338624 (20.9, 0)
output/tennis_location.csv 110 -- Processing City:  Genoa, Italy 44.40726 8.9338624 (23.9, 0)
output/tennis_location.csv 111 -- Processing City:  Genoa, Italy 44.40726 8.9338624 (21.5, 0)
output/tennis_location.csv 112 -- Processing City:  Genoa, Italy 44.40726 8.9338624 (23.1, 0)
output/tennis_location.csv 113 -- Processing City:  Causeway

output/tennis_location.csv 183 -- Processing City:  Shanghai, China 31.2323437 121.4691024 (27.4, 0)
output/tennis_location.csv 184 -- Processing City:  Shanghai, China 31.2323437 121.4691024 (27.6, 0)
output/tennis_location.csv 185 -- Processing City:  Shanghai, China 31.2323437 121.4691024 (27.5, 0)
output/tennis_location.csv 186 -- Processing City:  Shanghai, China 31.2323437 121.4691024 (27.4, 0)
output/tennis_location.csv 187 -- Processing City:  Shanghai, China 31.2323437 121.4691024 (26.0, 0)
output/tennis_location.csv 188 -- Processing City:  Shanghai, China 31.2323437 121.4691024 (26.9, 0)
output/tennis_location.csv 189 -- Processing City:  Shanghai, China 31.2323437 121.4691024 (26.0, 0)
output/tennis_location.csv 190 -- Processing City:  Shanghai, China 31.2323437 121.4691024 (26.7, 0)
output/tennis_location.csv 191 -- Processing City:  Shanghai, China 31.2323437 121.4691024 (26.9, 0)
output/tennis_location.csv 192 -- Processing City:  Shanghai, China 31.2323437 121.4691024 

output/tennis_location.csv 265 -- Processing City:  Tashkent, Uzbekistan 41.3123363 69.2787079 (29.1, 0)
output/tennis_location.csv 266 -- Processing City:  Tashkent, Uzbekistan 41.3123363 69.2787079 (26.8, 0)
output/tennis_location.csv 267 -- Processing City:  Tashkent, Uzbekistan 41.3123363 69.2787079 (28.5, 0)
output/tennis_location.csv 268 -- Processing City:  Tashkent, Uzbekistan 41.3123363 69.2787079 (28.5, 0)
output/tennis_location.csv 269 -- Processing City:  Tashkent, Uzbekistan 41.3123363 69.2787079 (28.5, 0)
output/tennis_location.csv 270 -- Processing City:  Tashkent, Uzbekistan 41.3123363 69.2787079 (29.3, 0)
output/tennis_location.csv 271 -- Processing City:  Queens, NY, USA 40.7135078 -73.8283132 (22.1, 0)
output/tennis_location.csv 272 -- Processing City:  Queens, NY, USA 40.7135078 -73.8283132 (31.8, 0)
output/tennis_location.csv 273 -- Processing City:  Queens, NY, USA 40.7135078 -73.8283132 (26.0, 0)
output/tennis_location.csv 274 -- Processing City:  Queens, NY, USA

output/tennis_location.csv 346 -- Processing City:  Queens, NY, USA 40.7135078 -73.8283132 (28.9, 0)
output/tennis_location.csv 347 -- Processing City:  Queens, NY, USA 40.7135078 -73.8283132 (28.2, 0)
output/tennis_location.csv 348 -- Processing City:  Queens, NY, USA 40.7135078 -73.8283132 (31.8, 0)
output/tennis_location.csv 349 -- Processing City:  Queens, NY, USA 40.7135078 -73.8283132 (31.9, 0)
output/tennis_location.csv 350 -- Processing City:  Queens, NY, USA 40.7135078 -73.8283132 (31.9, 0)
output/tennis_location.csv 351 -- Processing City:  Queens, NY, USA 40.7135078 -73.8283132 (31.8, 0)
output/tennis_location.csv 352 -- Processing City:  Queens, NY, USA 40.7135078 -73.8283132 (30.3, 0)
output/tennis_location.csv 353 -- Processing City:  Queens, NY, USA 40.7135078 -73.8283132 (29.1, 0)
output/tennis_location.csv 354 -- Processing City:  Queens, NY, USA 40.7135078 -73.8283132 (29.1, 0)
output/tennis_location.csv 355 -- Processing City:  Queens, NY, USA 40.7135078 -73.8283132 

output/soccer_location.csv 6 -- Processing City:  Buenos Aires, Argentina -34.6075682 -58.4370894 (14.5, 0)
output/soccer_location.csv 7 -- Processing City:  Buenos Aires, Argentina -34.6075682 -58.4370894 (17.0, 0)
output/soccer_location.csv 8 -- Processing City:  Buenos Aires, Argentina -34.6075682 -58.4370894 (15.5, 0)
output/soccer_location.csv 9 -- Processing City:  Buenos Aires, Argentina -34.6075682 -58.4370894 (15.5, 0)
output/soccer_location.csv 10 -- Processing City:  Buenos Aires, Argentina -34.6075682 -58.4370894 (15.5, 0)
output/soccer_location.csv 11 -- Processing City:  Buenos Aires, Argentina -34.6075682 -58.4370894 (16.2, 0)
output/soccer_location.csv 12 -- Processing City:  Buenos Aires, Argentina -34.6075682 -58.4370894 (16.7, 0)
output/soccer_location.csv 13 -- Processing City:  Buenos Aires, Argentina -34.6075682 -58.4370894 (16.7, 0)
output/soccer_location.csv 14 -- Processing City:  Buenos Aires, Argentina -34.6075682 -58.4370894 (16.7, 0)
output/soccer_location.

output/soccer_location.csv 82 -- Processing City:  Baku, Azerbaijan 40.3755885 49.8328009 (29.4, 0)
output/soccer_location.csv 83 -- Processing City:  Baku, Azerbaijan 40.3755885 49.8328009 (29.4, 0)
output/soccer_location.csv 84 -- Processing City:  Baku, Azerbaijan 40.3755885 49.8328009 (29.4, 0)
output/soccer_location.csv 85 -- Processing City:  Barysaw, Belarus 54.2240665 28.5117849 (18.6, 0)
output/soccer_location.csv 86 -- Processing City:  Barysaw, Belarus 54.2240665 28.5117849 (18.6, 0)
output/soccer_location.csv 87 -- Processing City:  Barysaw, Belarus 54.2240665 28.5117849 (18.6, 0)
output/soccer_location.csv 88 -- Processing City:  Barysaw, Belarus 54.2240665 28.5117849 (18.6, 0)
output/soccer_location.csv 89 -- Processing City:  Barysaw, Belarus 54.2240665 28.5117849 (19.0, 0)
output/soccer_location.csv 90 -- Processing City:  Barysaw, Belarus 54.2240665 28.5117849 (18.9, 0)
output/soccer_location.csv 91 -- Processing City:  Brussels, Belgium 50.8465573 4.351697 (17.3, 0)
o

Max Temp (C), Rained? 30.3 0
output/soccer_location.csv 160 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (30.3, 0)
Max Temp (C), Rained? 30.3 0
output/soccer_location.csv 161 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (30.3, 0)
Max Temp (C), Rained? 30.3 0
output/soccer_location.csv 162 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (30.3, 0)
Max Temp (C), Rained? 30.3 0
output/soccer_location.csv 163 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (30.3, 0)
Max Temp (C), Rained? 30.3 0
output/soccer_location.csv 164 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (30.3, 0)
Max Temp (C), Rained? 30.3 0
output/soccer_location.csv 165 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (30.3, 0)
Max Temp (C), Rained? 30.3 0
output/soccer_location.csv 166 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (30.3, 0)
output/soccer_location.csv 167 -- 

output/soccer_location.csv 231 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (24.6, 0)
output/soccer_location.csv 232 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (24.6, 0)
output/soccer_location.csv 233 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (24.6, 0)
output/soccer_location.csv 234 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (24.6, 0)
output/soccer_location.csv 235 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (24.6, 0)
output/soccer_location.csv 236 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (27.0, 0)
output/soccer_location.csv 237 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (27.0, 0)
output/soccer_location.csv 238 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (27.0, 0)
output/soccer_location.csv 239 -- Processing City:  Rio de Janeiro, Brazil -22.9110137 -43.2093727 (27.0, 0)
output/soccer_locat

output/soccer_location.csv 309 -- Processing City:  Bogota, Columbia 4.6529539 -74.0835643 (18.4, 0)
output/soccer_location.csv 310 -- Processing City:  Bogota, Columbia 4.6529539 -74.0835643 (18.4, 0)
output/soccer_location.csv 311 -- Processing City:  Bogota, Columbia 4.6529539 -74.0835643 (13.3, 0)
output/soccer_location.csv 312 -- Processing City:  Bogota, Columbia 4.6529539 -74.0835643 (13.3, 0)
output/soccer_location.csv 313 -- Processing City:  Bogota, Columbia 4.6529539 -74.0835643 (13.1, 0)
output/soccer_location.csv 314 -- Processing City:  Bogota, Columbia 4.6529539 -74.0835643 (13.1, 0)
output/soccer_location.csv 315 -- Processing City:  Bogota, Columbia 4.6529539 -74.0835643 (13.1, 0)
output/soccer_location.csv 316 -- Processing City:  Bogota, Columbia 4.6529539 -74.0835643 (14.4, 0)
output/soccer_location.csv 317 -- Processing City:  Bogota, Columbia 4.6529539 -74.0835643 (14.4, 0)
output/soccer_location.csv 318 -- Processing City:  Bogota, Columbia 4.6529539 -74.0835643 

output/soccer_location.csv 387 -- Processing City:  Prague, Czech Republic 50.0874654 14.4212535 (19.6, 0)
output/soccer_location.csv 388 -- Processing City:  Prague, Czech Republic 50.0874654 14.4212535 (19.6, 0)
output/soccer_location.csv 389 -- Processing City:  Prague, Czech Republic 50.0874654 14.4212535 (19.9, 0)
output/soccer_location.csv 390 -- Processing City:  Prague, Czech Republic 50.0874654 14.4212535 (19.6, 0)
output/soccer_location.csv 391 -- Processing City:  Prague, Czech Republic 50.0874654 14.4212535 (19.6, 0)
output/soccer_location.csv 392 -- Processing City:  Prague, Czech Republic 50.0874654 14.4212535 (19.6, 0)
output/soccer_location.csv 393 -- Processing City:  Prague, Czech Republic 50.0874654 14.4212535 (23.6, 0)
output/soccer_location.csv 394 -- Processing City:  Prague, Czech Republic 50.0874654 14.4212535 (22.2, 0)
output/soccer_location.csv 395 -- Processing City:  Prague, Czech Republic 50.0874654 14.4212535 (22.2, 0)
output/soccer_location.csv 396 -- Pro

output/soccer_location.csv 464 -- Processing City:  Prague, Czech Republic 50.0874654 14.4212535 (14.0, 0)
output/soccer_location.csv 465 -- Processing City:  Prague, Czech Republic 50.0874654 14.4212535 (14.0, 0)
output/soccer_location.csv 466 -- Processing City:  Copenhagen, Denmark 55.6867243 12.5700724 (15.4, 0)
output/soccer_location.csv 467 -- Processing City:  Copenhagen, Denmark 55.6867243 12.5700724 (15.4, 0)
output/soccer_location.csv 468 -- Processing City:  Copenhagen, Denmark 55.6867243 12.5700724 (15.4, 0)
output/soccer_location.csv 469 -- Processing City:  Copenhagen, Denmark 55.6867243 12.5700724 (17.6, 0)
output/soccer_location.csv 470 -- Processing City:  Copenhagen, Denmark 55.6867243 12.5700724 (17.6, 0)
output/soccer_location.csv 471 -- Processing City:  Copenhagen, Denmark 55.6867243 12.5700724 (17.6, 0)
output/soccer_location.csv 472 -- Processing City:  Copenhagen, Denmark 55.6867243 12.5700724 (19.1, 0)
output/soccer_location.csv 473 -- Processing City:  Copenh

output/soccer_location.csv 543 -- Processing City:  Amsterdam, Netherlands 52.3730796 4.8924534 (17.6, 1)
output/soccer_location.csv 544 -- Processing City:  Amsterdam, Netherlands 52.3730796 4.8924534 (17.6, 1)
output/soccer_location.csv 545 -- Processing City:  Amsterdam, Netherlands 52.3730796 4.8924534 (17.6, 1)
output/soccer_location.csv 546 -- Processing City:  Amsterdam, Netherlands 52.3730796 4.8924534 (17.6, 1)
output/soccer_location.csv 547 -- Processing City:  Amsterdam, Netherlands 52.3730796 4.8924534 (17.6, 1)
output/soccer_location.csv 548 -- Processing City:  Amsterdam, Netherlands 52.3730796 4.8924534 (17.6, 1)
output/soccer_location.csv 549 -- Processing City:  Quito, Ecuador -0.2201641 -78.5123274 (13.0, 0)
output/soccer_location.csv 550 -- Processing City:  Quito, Ecuador -0.2201641 -78.5123274 (13.0, 0)
output/soccer_location.csv 551 -- Processing City:  Quito, Ecuador -0.2201641 -78.5123274 (13.0, 0)
output/soccer_location.csv 552 -- Processing City:  London, Unit

output/soccer_location.csv 618 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (16.5, 0)
output/soccer_location.csv 619 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (16.5, 0)
output/soccer_location.csv 620 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (16.5, 0)
output/soccer_location.csv 621 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (15.6, 0)
output/soccer_location.csv 622 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (15.6, 0)
output/soccer_location.csv 623 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (15.6, 0)
output/soccer_location.csv 624 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (15.6, 0)
output/soccer_location.csv 625 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (15.6, 0)
output/soccer_location.csv 626 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (15.6, 0)
o

output/soccer_location.csv 692 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (17.9, 0)
output/soccer_location.csv 693 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (17.9, 0)
output/soccer_location.csv 694 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (17.9, 0)
output/soccer_location.csv 695 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (17.9, 0)
output/soccer_location.csv 696 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (18.7, 0)
output/soccer_location.csv 697 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (18.7, 0)
output/soccer_location.csv 698 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (18.7, 0)
output/soccer_location.csv 699 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (18.7, 0)
output/soccer_location.csv 700 -- Processing City:  Birmingham, United Kingdom 52.4796992 -1.9026911 (18.7, 0)
o

output/soccer_location.csv 770 -- Processing City:  Helsinki, Finland 60.1674881 24.9427473 (17.6, 0)
output/soccer_location.csv 771 -- Processing City:  Helsinki, Finland 60.1674881 24.9427473 (17.6, 0)
output/soccer_location.csv 772 -- Processing City:  Helsinki, Finland 60.1674881 24.9427473 (17.6, 0)
output/soccer_location.csv 773 -- Processing City:  Helsinki, Finland 60.1674881 24.9427473 (17.6, 0)
output/soccer_location.csv 774 -- Processing City:  Helsinki, Finland 60.1674881 24.9427473 (17.7, 0)
output/soccer_location.csv 775 -- Processing City:  Helsinki, Finland 60.1674881 24.9427473 (17.7, 0)
output/soccer_location.csv 776 -- Processing City:  Helsinki, Finland 60.1674881 24.9427473 (17.4, 0)
output/soccer_location.csv 777 -- Processing City:  Helsinki, Finland 60.1674881 24.9427473 (17.1, 0)
output/soccer_location.csv 778 -- Processing City:  Helsinki, Finland 60.1674881 24.9427473 (17.1, 0)
output/soccer_location.csv 779 -- Processing City:  Helsinki, Finland 60.1674881 2

output/soccer_location.csv 852 -- Processing City:  Villarreal, Spain 39.9372616 -0.1004465 (26.9, 0)
output/soccer_location.csv 853 -- Processing City:  Villarreal, Spain 39.9372616 -0.1004465 (27.2, 0)
output/soccer_location.csv 854 -- Processing City:  Villarreal, Spain 39.9372616 -0.1004465 (26.6, 0)
output/soccer_location.csv 855 -- Processing City:  Villarreal, Spain 39.9372616 -0.1004465 (27.5, 0)
output/soccer_location.csv 856 -- Processing City:  Villarreal, Spain 39.9372616 -0.1004465 (27.5, 0)
output/soccer_location.csv 857 -- Processing City:  Villarreal, Spain 39.9372616 -0.1004465 (27.5, 0)
output/soccer_location.csv 858 -- Processing City:  Tbilisi, Georgia 41.6934591 44.8014495 (27.0, 0)
output/soccer_location.csv 859 -- Processing City:  Tbilisi, Georgia 41.6934591 44.8014495 (27.0, 0)
output/soccer_location.csv 860 -- Processing City:  Tbilisi, Georgia 41.6934591 44.8014495 (27.0, 0)
output/soccer_location.csv 861 -- Processing City:  Tbilisi, Georgia 41.6934591 44.80

output/soccer_location.csv 934 -- Processing City:  Berlin, Germany 52.5170365 13.3888599 (22.3, 0)
output/soccer_location.csv 935 -- Processing City:  Berlin, Germany 52.5170365 13.3888599 (25.2, 0)
output/soccer_location.csv 936 -- Processing City:  Berlin, Germany 52.5170365 13.3888599 (25.2, 0)
output/soccer_location.csv 937 -- Processing City:  Berlin, Germany 52.5170365 13.3888599 (25.2, 0)
output/soccer_location.csv 938 -- Processing City:  Berlin, Germany 52.5170365 13.3888599 (25.2, 0)
output/soccer_location.csv 939 -- Processing City:  Berlin, Germany 52.5170365 13.3888599 (24.7, 1)
Max Temp (C), Rained? 24.9 0
output/soccer_location.csv 940 -- Processing City:  Berlin, Germany 52.5170365 13.3888599 (24.9, 0)
Max Temp (C), Rained? 24.9 0
output/soccer_location.csv 941 -- Processing City:  Berlin, Germany 52.5170365 13.3888599 (24.9, 0)
Max Temp (C), Rained? 24.9 0
output/soccer_location.csv 942 -- Processing City:  Berlin, Germany 52.5170365 13.3888599 (24.9, 0)
Max Temp (C),

output/soccer_location.csv 1012 -- Processing City:  Kolkata, India 22.5726459 88.3638953 (28.8, 0)
output/soccer_location.csv 1013 -- Processing City:  Kolkata, India 22.5726459 88.3638953 (28.3, 0)
output/soccer_location.csv 1014 -- Processing City:  Kolkata, India 22.5726459 88.3638953 (28.4, 1)
output/soccer_location.csv 1015 -- Processing City:  Kolkata, India 22.5726459 88.3638953 (26.1, 1)
output/soccer_location.csv 1016 -- Processing City:  Kolkata, India 22.5726459 88.3638953 (27.4, 1)
output/soccer_location.csv 1017 -- Processing City:  Kolkata, India 22.5726459 88.3638953 (28.2, 0)
output/soccer_location.csv 1018 -- Processing City:  Kolkata, India 22.5726459 88.3638953 (28.8, 0)
output/soccer_location.csv 1019 -- Processing City:  Kolkata, India 22.5726459 88.3638953 (28.3, 0)
output/soccer_location.csv 1020 -- Processing City:  Kolkata, India 22.5726459 88.3638953 (28.4, 1)
output/soccer_location.csv 1021 -- Processing City:  Jakarta, Indonesia -6.175247 106.8270488 (27.4,

output/soccer_location.csv 1093 -- Processing City:  Hanoi, Vietnam 21.0283207 105.8540217 (26.0, 0)
output/soccer_location.csv 1094 -- Processing City:  London, United Kingdom 51.5073359 -0.12765 (15.6, 0)
output/soccer_location.csv 1095 -- Processing City:  London, United Kingdom 51.5073359 -0.12765 (15.6, 0)
output/soccer_location.csv 1096 -- Processing City:  London, United Kingdom 51.5073359 -0.12765 (15.6, 0)
output/soccer_location.csv 1097 -- Processing City:  Tokyo, Japan 35.695126 139.75391 (18.3, 0)
output/soccer_location.csv 1098 -- Processing City:  Tokyo, Japan 35.695126 139.75391 (18.3, 0)
output/soccer_location.csv 1099 -- Processing City:  Tokyo, Japan 35.695126 139.75391 (18.3, 0)
output/soccer_location.csv 1100 -- Processing City:  Tokyo, Japan 35.695126 139.75391 (18.3, 0)
output/soccer_location.csv 1101 -- Processing City:  Astana, Kazakhstan 51.1282205 71.4306682 (12.4, 0)
output/soccer_location.csv 1102 -- Processing City:  Astana, Kazakhstan 51.1282205 71.4306682

output/soccer_location.csv 1174 -- Processing City:  Rome, Italy 41.8933203 12.4829321 (27.7, 0)
output/soccer_location.csv 1175 -- Processing City:  Rome, Italy 41.8933203 12.4829321 (27.7, 0)
output/soccer_location.csv 1176 -- Processing City:  Rome, Italy 41.8933203 12.4829321 (27.7, 0)
output/soccer_location.csv 1177 -- Processing City:  Rome, Italy 41.8933203 12.4829321 (17.7, 0)
output/soccer_location.csv 1178 -- Processing City:  Rome, Italy 41.8933203 12.4829321 (17.7, 0)
output/soccer_location.csv 1179 -- Processing City:  Rome, Italy 41.8933203 12.4829321 (22.8, 0)
output/soccer_location.csv 1180 -- Processing City:  Rome, Italy 41.8933203 12.4829321 (23.5, 0)
output/soccer_location.csv 1181 -- Processing City:  Rome, Italy 41.8933203 12.4829321 (23.5, 0)
output/soccer_location.csv 1182 -- Processing City:  Rome, Italy 41.8933203 12.4829321 (27.3, 0)
output/soccer_location.csv 1183 -- Processing City:  Rome, Italy 41.8933203 12.4829321 (27.3, 0)
output/soccer_location.csv 118

output/soccer_location.csv 1258 -- Processing City:  Tokyo, Japan 35.695126 139.75391 (25.4, 0)
output/soccer_location.csv 1259 -- Processing City:  Tokyo, Japan 35.695126 139.75391 (25.4, 0)
output/soccer_location.csv 1260 -- Processing City:  Tokyo, Japan 35.695126 139.75391 (26.0, 0)
output/soccer_location.csv 1261 -- Processing City:  Tokyo, Japan 35.695126 139.75391 (26.0, 0)
output/soccer_location.csv 1262 -- Processing City:  Tokyo, Japan 35.695126 139.75391 (26.0, 0)
output/soccer_location.csv 1263 -- Processing City:  Tokyo, Japan 35.695126 139.75391 (26.0, 0)
output/soccer_location.csv 1264 -- Processing City:  Tokyo, Japan 35.695126 139.75391 (26.0, 0)
output/soccer_location.csv 1265 -- Processing City:  Tokyo, Japan 35.695126 139.75391 (26.7, 0)
output/soccer_location.csv 1266 -- Processing City:  Tokyo, Japan 35.695126 139.75391 (26.7, 0)
output/soccer_location.csv 1267 -- Processing City:  Tokyo, Japan 35.695126 139.75391 (26.7, 0)
output/soccer_location.csv 1268 -- Proce

output/soccer_location.csv 1340 -- Processing City:  Carson, CA, USA 33.8322043 -118.2517547 (25.7, 0)
output/soccer_location.csv 1341 -- Processing City:  Carson, CA, USA 33.8322043 -118.2517547 (25.7, 0)
output/soccer_location.csv 1342 -- Processing City:  Carson, CA, USA 33.8322043 -118.2517547 (22.8, 0)
output/soccer_location.csv 1343 -- Processing City:  Carson, CA, USA 33.8322043 -118.2517547 (22.8, 0)
output/soccer_location.csv 1344 -- Processing City:  Carson, CA, USA 33.8322043 -118.2517547 (22.8, 0)
output/soccer_location.csv 1345 -- Processing City:  Carson, CA, USA 33.8322043 -118.2517547 (22.8, 0)
output/soccer_location.csv 1346 -- Processing City:  Carson, CA, USA 33.8322043 -118.2517547 (22.8, 0)
output/soccer_location.csv 1347 -- Processing City:  Foxborough, MA, USA 42.0653768 -71.2478308 (22.5, 0)
output/soccer_location.csv 1348 -- Processing City:  Foxborough, MA, USA 42.0653768 -71.2478308 (22.5, 0)
output/soccer_location.csv 1349 -- Processing City:  Foxborough, MA

output/soccer_location.csv 1415 -- Processing City:  Oslo, Norway 59.9133301 10.7389701 (15.3, 0)
output/soccer_location.csv 1416 -- Processing City:  Oslo, Norway 59.9133301 10.7389701 (15.3, 0)
output/soccer_location.csv 1417 -- Processing City:  Oslo, Norway 59.9133301 10.7389701 (18.6, 0)
output/soccer_location.csv 1418 -- Processing City:  Oslo, Norway 59.9133301 10.7389701 (18.5, 0)
output/soccer_location.csv 1419 -- Processing City:  Oslo, Norway 59.9133301 10.7389701 (19.2, 0)
output/soccer_location.csv 1420 -- Processing City:  Oslo, Norway 59.9133301 10.7389701 (15.7, 0)
output/soccer_location.csv 1421 -- Processing City:  Oslo, Norway 59.9133301 10.7389701 (15.7, 0)
output/soccer_location.csv 1422 -- Processing City:  Panama City, Panama 8.9714493 -79.5341802 (26.8, 0)
output/soccer_location.csv 1423 -- Processing City:  Panama City, Panama 8.9714493 -79.5341802 (26.8, 0)
Max Temp (C), Rained? 30.6 0
output/soccer_location.csv 1424 -- Processing City:  Panama City, Panama 8.

output/soccer_location.csv 1495 -- Processing City:  Bucharest, Romania 44.4361414 26.1027202 (25.1, 0)
output/soccer_location.csv 1496 -- Processing City:  Bucharest, Romania 44.4361414 26.1027202 (21.1, 0)
output/soccer_location.csv 1497 -- Processing City:  Bucharest, Romania 44.4361414 26.1027202 (21.1, 0)
output/soccer_location.csv 1498 -- Processing City:  Bucharest, Romania 44.4361414 26.1027202 (21.1, 0)
output/soccer_location.csv 1499 -- Processing City:  Bucharest, Romania 44.4361414 26.1027202 (22.3, 0)
output/soccer_location.csv 1500 -- Processing City:  Bucharest, Romania 44.4361414 26.1027202 (22.3, 0)
output/soccer_location.csv 1501 -- Processing City:  Bucharest, Romania 44.4361414 26.1027202 (22.3, 0)
output/soccer_location.csv 1502 -- Processing City:  Bucharest, Romania 44.4361414 26.1027202 (22.3, 0)
output/soccer_location.csv 1503 -- Processing City:  Bucharest, Romania 44.4361414 26.1027202 (22.3, 0)
output/soccer_location.csv 1504 -- Processing City:  Bucharest, 

output/soccer_location.csv 1575 -- Processing City:  Bratislava, Slovakia 48.1516988 17.1093063 (21.5, 1)
output/soccer_location.csv 1576 -- Processing City:  Bratislava, Slovakia 48.1516988 17.1093063 (22.5, 0)
output/soccer_location.csv 1577 -- Processing City:  Bratislava, Slovakia 48.1516988 17.1093063 (22.5, 0)
output/soccer_location.csv 1578 -- Processing City:  Bratislava, Slovakia 48.1516988 17.1093063 (23.0, 0)
output/soccer_location.csv 1579 -- Processing City:  Bratislava, Slovakia 48.1516988 17.1093063 (23.0, 0)
output/soccer_location.csv 1580 -- Processing City:  Bratislava, Slovakia 48.1516988 17.1093063 (22.5, 0)
output/soccer_location.csv 1581 -- Processing City:  Bratislava, Slovakia 48.1516988 17.1093063 (23.1, 0)
output/soccer_location.csv 1582 -- Processing City:  Bratislava, Slovakia 48.1516988 17.1093063 (23.1, 0)
output/soccer_location.csv 1583 -- Processing City:  Bratislava, Slovakia 48.1516988 17.1093063 (21.9, 0)
output/soccer_location.csv 1584 -- Processing 

output/soccer_location.csv 1657 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (30.4, 0)
output/soccer_location.csv 1658 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (30.4, 0)
output/soccer_location.csv 1659 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (25.7, 0)
output/soccer_location.csv 1660 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (25.7, 0)
output/soccer_location.csv 1661 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (25.5, 0)
output/soccer_location.csv 1662 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (27.2, 0)
output/soccer_location.csv 1663 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (28.4, 0)
output/soccer_location.csv 1664 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (27.2, 0)
output/soccer_location.csv 1665 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (27.2, 0)
output/soccer_location.csv 1666 -- Processing City:  Madrid, Spain 40.4167047 -3.7035825 (25.5, 0)
output/soc

output/soccer_location.csv 1740 -- Processing City:  Stockholm, Sweden 59.3251172 18.0710935 (15.3, 0)
output/soccer_location.csv 1741 -- Processing City:  Stockholm, Sweden 59.3251172 18.0710935 (15.3, 0)
output/soccer_location.csv 1742 -- Processing City:  Stockholm, Sweden 59.3251172 18.0710935 (16.8, 0)
output/soccer_location.csv 1743 -- Processing City:  Stockholm, Sweden 59.3251172 18.0710935 (16.8, 0)
output/soccer_location.csv 1744 -- Processing City:  Stockholm, Sweden 59.3251172 18.0710935 (16.8, 0)
output/soccer_location.csv 1745 -- Processing City:  Stockholm, Sweden 59.3251172 18.0710935 (13.3, 0)
output/soccer_location.csv 1746 -- Processing City:  Stockholm, Sweden 59.3251172 18.0710935 (13.3, 0)
output/soccer_location.csv 1747 -- Processing City:  Stockholm, Sweden 59.3251172 18.0710935 (13.3, 0)
output/soccer_location.csv 1748 -- Processing City:  Stockholm, Sweden 59.3251172 18.0710935 (17.7, 0)
output/soccer_location.csv 1749 -- Processing City:  Stockholm, Sweden 59

output/soccer_location.csv 1820 -- Processing City:  Oslo, Norway 59.9133301 10.7389701 (17.4, 0)
output/soccer_location.csv 1821 -- Processing City:  Bucharest, Romania 44.4361414 26.1027202 (24.8, 0)
output/soccer_location.csv 1822 -- Processing City:  Bucharest, Romania 44.4361414 26.1027202 (24.8, 0)
output/soccer_location.csv 1823 -- Processing City:  Bucharest, Romania 44.4361414 26.1027202 (24.8, 0)
output/soccer_location.csv 1824 -- Processing City:  Kyiv, Ukraine 50.4500336 30.5241361 (17.1, 0)
output/soccer_location.csv 1825 -- Processing City:  Kyiv, Ukraine 50.4500336 30.5241361 (17.1, 0)
output/soccer_location.csv 1826 -- Processing City:  Kyiv, Ukraine 50.4500336 30.5241361 (17.1, 0)
output/soccer_location.csv 1827 -- Processing City:  Nicosia, Cyprus 35.1748976 33.3638568 (27.0, 0)
output/soccer_location.csv 1828 -- Processing City:  Rome, Italy 41.8933203 12.4829321 (22.8, 0)
output/soccer_location.csv 1829 -- Processing City:  Vienna, Austria 48.2083537 16.3725042 (20.

output/soccer_location.csv 1901 -- Processing City:  Cardiff, Wales 51.4816546 -3.1791934 (19.3, 1)
output/soccer_location.csv 1902 -- Processing City:  Cardiff, Wales 51.4816546 -3.1791934 (19.3, 1)
output/soccer_location.csv 1903 -- Processing City:  Cardiff, Wales 51.4816546 -3.1791934 (17.7, 0)
output/soccer_location.csv 1904 -- Processing City:  Cardiff, Wales 51.4816546 -3.1791934 (17.7, 0)
output/soccer_location.csv 1905 -- Processing City:  Cardiff, Wales 51.4816546 -3.1791934 (17.7, 0)
output/soccer_location.csv 1906 -- Processing City:  Cardiff, Wales 51.4816546 -3.1791934 (17.7, 0)
output/soccer_location.csv 1907 -- Processing City:  Brussels, Belgium 50.8465573 4.351697 (18.9, 0)
output/soccer_location.csv 1908 -- Processing City:  Brussels, Belgium 50.8465573 4.351697 (18.9, 0)
output/soccer_location.csv 1909 -- Processing City:  Brussels, Belgium 50.8465573 4.351697 (17.3, 0)
output/soccer_location.csv 1910 -- Processing City:  Brussels, Belgium 50.8465573 4.351697 (17.3,

In [None]:
# concat and save the sports all dataframe
sports_all_df = pd.concat(sports_df, axis=0, ignore_index=True)

# save to scv file
sports_all_df.to_csv('output/sports_location.csv')
sports_all_df.head()

In [3]:
#load the sports all dataframe
sports_all_df = pd.read_csv("output/sports_location.csv")
sports_all_df.head()

Unnamed: 0.1,Unnamed: 0,FULL_DESCRIPTION,NUMBER_BETS,VOLUME_MATCHED,WIN_AMOUNT,LOSS_AMOUNT,SETTLED_DATE,COUNTRY,CITY,LATITUDE,LONGITUDE,TEMPERATURE,RAIN
0,0,Group B/BMW Championship 2014,149070,4712927.68,5190166.0,80884170.0,5/9/14 17:40,USA,"Cherry Hills Village, CO",39.641654,-104.959425,16.6,0
1,1,Group B/BMW Championship 2014/Tournament Match...,152,3633.26,6218.803,595.5442,7/9/14 22:20,USA,"Cherry Hills Village, CO",39.641654,-104.959425,27.4,0
2,2,Group B/Chiquita Classic 2014,1321,6892.82,404.88,653088.1,7/9/14 20:11,USA,"Davidson, NC",35.499261,-80.848522,29.2,0
3,3,Group B/Deutsche Bank Championship 2014,166663,4672132.07,3546043.0,102746400.0,1/9/14 21:53,USA,"Norton, Massachusetts",41.966767,-71.186994,28.3,0
4,4,Group B/Deutsche Bank Championship 2014/Tourna...,415,10449.26,15697.52,3782.393,1/9/14 22:08,USA,"Norton, Massachusetts",41.966767,-71.186994,28.3,0


In [8]:
sports = ["Soccer", "Tennis", "Golf", "Cricket", "American Football", "Baseball", "Basketball"]
sports_betfair_df = betfair_df.loc[betfair_df["SPORTS_ID"].isin(sports)]
sports_betfair_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1011027 entries, 0 to 1043348
Data columns (total 19 columns):
 #   Column            Non-Null Count    Dtype  
---  ------            --------------    -----  
 0   index             1011027 non-null  int64  
 1   SPORTS_ID         1011027 non-null  object 
 2   EVENT_ID          1011027 non-null  object 
 3   SETTLED_DATE      1011027 non-null  object 
 4   FULL_DESCRIPTION  1011027 non-null  object 
 5   SCHEDULED_OFF     1011027 non-null  object 
 6   EVENT             1011027 non-null  object 
 7   DT ACTUAL_OFF     1011027 non-null  object 
 8   SELECTION_ID      1011027 non-null  int64  
 9   SELECTION         1011027 non-null  object 
 10  ODDS              1011027 non-null  float64
 11  NUMBER_BETS       1011027 non-null  int64  
 12  VOLUME_MATCHED    1011027 non-null  float64
 13  LATEST_TAKEN      1011027 non-null  object 
 14  FIRST_TAKEN       1011027 non-null  object 
 15  WIN_FLAG          1011027 non-null  int64  
 16  IN_PL

In [10]:
#add the new columns
sports_betfair_df["COUNTRY"] = ""
sports_betfair_df["CITY"] = ""
sports_betfair_df["LATITUDE"] = ""
sports_betfair_df["LONGITUDE"] = ""
sports_betfair_df["TEMPERATURE"] = ""
sports_betfair_df["RAIN"] = ""

In [11]:
%%time

# use apply to update data frame
def add_location_weather(row):
    description = row["FULL_DESCRIPTION"]
    
    sport_row = sports_all_df.loc[sports_all_df["FULL_DESCRIPTION"] == description]
    
    if sport_row.empty:
        print("No data for", description)
    else:
        row["COUNTRY"] = sport_row["COUNTRY"].iloc[0]
        row["CITY"] = sport_row["CITY"].iloc[0]
        row["LATITUDE"] = sport_row["LATITUDE"].iloc[0]
        row["LONGITUDE"] = sport_row["LONGITUDE"].iloc[0]
        row["TEMPERATURE"] = sport_row["TEMPERATURE"].iloc[0]
        row["RAIN"] = sport_row["RAIN"].iloc[0]
    
    return row

# use apply to update row information
sports_betfair_df = sports_betfair_df.apply(add_location_weather, axis=1)

CPU times: user 4min 43s, sys: 6 s, total: 4min 49s
Wall time: 4min 45s


In [12]:
%%time

# process each row in the filtered betfair dataframe to add in the locations temperature data. 
for index, row in sports_betfair_df.iterrows():
    description = row["FULL_DESCRIPTION"]
    
    sport_row = sports_all_df.loc[sports_all_df["FULL_DESCRIPTION"] == description]
    
    if sport_row.empty:
        print("No data for", description)
    else:
        sports_betfair_df.loc[index, "COUNTRY"] = sport_row["COUNTRY"].iloc[0]
        sports_betfair_df.loc[index, "CITY"] = sport_row["CITY"].iloc[0]
        sports_betfair_df.loc[index, "LATITUDE"] = sport_row["LATITUDE"].iloc[0]
        sports_betfair_df.loc[index, "LONGITUDE"] = sport_row["LONGITUDE"].iloc[0]
        sports_betfair_df.loc[index, "TEMPERATURE"] = sport_row["TEMPERATURE"].iloc[0]
        sports_betfair_df.loc[index, "RAIN"] = sport_row["RAIN"].iloc[0]
    
    if index % 25000 == 0:
        print("Processing Row:", index)

Processing Row: 0
Processing Row: 20000
Processing Row: 40000
Processing Row: 60000
Processing Row: 100000
Processing Row: 120000
Processing Row: 140000
Processing Row: 160000
Processing Row: 180000
Processing Row: 200000
Processing Row: 220000
Processing Row: 240000
Processing Row: 260000
Processing Row: 280000
Processing Row: 300000
Processing Row: 320000
Processing Row: 340000
Processing Row: 360000
Processing Row: 380000
Processing Row: 400000
Processing Row: 420000
Processing Row: 440000
Processing Row: 460000
Processing Row: 480000
Processing Row: 500000
Processing Row: 520000
Processing Row: 540000
Processing Row: 560000
Processing Row: 580000
Processing Row: 620000
Processing Row: 640000
Processing Row: 660000
Processing Row: 680000
Processing Row: 700000
Processing Row: 720000
Processing Row: 740000
Processing Row: 760000
Processing Row: 780000
Processing Row: 800000
Processing Row: 820000
Processing Row: 840000
Processing Row: 860000
Processing Row: 880000
Processing Row: 900

In [13]:
sports_betfair_df.head()

Unnamed: 0,index,SPORTS_ID,EVENT_ID,SETTLED_DATE,FULL_DESCRIPTION,SCHEDULED_OFF,EVENT,DT ACTUAL_OFF,SELECTION_ID,SELECTION,...,WIN_FLAG,IN_PLAY,WIN_AMOUNT,LOSS_AMOUNT,COUNTRY,CITY,LATITUDE,LONGITUDE,TEMPERATURE,RAIN
0,0,Soccer,115279736,04-09-2014 17:07:30,U21 Euro Championship/Qualifiers/Fixtures 04 S...,04-09-2014 15:30,Over/Under 1.5 Goals,04-09-2014 15:32:39,1221386,Over 1.5 Goals,...,1,IP,1033.479,0.0,Ukraine,Kyiv,50.450034,30.524136,17.1,0
1,1,Tennis,115288925,04-09-2014 11:50:25,Group A/Brasov Challenger 2014/Second Round Ma...,04-09-2014 10:30,Match Odds,04-09-2014 10:34:32,7659748,Aslan Karatsev,...,1,IP,4297.104,0.0,Romania,Brasov,45.65251,25.610565,22.1,0
2,2,Soccer,115196118,03-09-2014 15:08:55,U21 Euro Championship/Qualifiers/Fixtures 03 S...,03-09-2014 14:00,Over/Under 1.5 Goals,03-09-2014 14:00:45,1221386,Over 1.5 Goals,...,1,IP,1313.634,0.0,Latvia,Riga,56.949398,24.105185,17.4,0
3,4,Soccer,115236324,01-09-2014 19:46:39,Irish Soccer/FAI Ford Cup/Fixtures 01 Septembe...,01-09-2014 19:00,Over/Under 0.5 Goals,01-09-2014 19:01:34,5851483,Over 0.5 Goals,...,1,IP,1294.106,0.0,Ireland,Dublin,53.349379,-6.260559,16.1,0
4,5,Soccer,115184779,02-09-2014 18:38:14,Swedish Soccer/Superettan/Fixtures 02 Septembe...,02-09-2014 17:20,Over/Under 1.5 Goals,02-09-2014 17:20:13,1221386,Over 1.5 Goals,...,1,IP,1210.2612,0.0,Sweden,Stockholm,59.325117,18.071093,15.3,0


In [14]:
# export to csv and pickle file
sports_betfair_df.to_csv("resources/betfair_final.csv")
sports_betfair_df.to_pickle("resources/betfair_final.pkl")

In [15]:
sports_betfair_df["RAIN"].value_counts()

RAIN
0    905854
1    105173
Name: count, dtype: int64