In [7]:
# mixture of libs for web scraping, parsing and pandas
from bs4 import BeautifulSoup
import datetime as dt
from functools import reduce
import io
import json
import numpy as np
import os
import pandas as pd
import requests
import seaborn as sns
import sqlite3
from urllib.request import Request, urlopen
import warnings

from epl.query import create_and_query, create_conn, get_table_columns, query_creator, query_db, table_exists
from epl.features_parse import calc_rolling_avg, create_base_feat_cols, create_col_map, get_feats_raw_data, get_new_matches, handle_feats, merge_home_away, process_feature_data, split_home_away_feats
from epl.features_parse import FEATURE_KEY_COLS, FEATURE_ID_COLS

pd.options.display.max_columns = None
warnings.filterwarnings('ignore')

## 1. Get New Matches

First step is to check if there are any new matches in the matches table that don't have corresponding features in the features table

In [2]:
df_new_matches = get_new_matches()

Running query: SELECT name FROM sqlite_master WHERE type='table' AND name='features'
Features table doesn't exist
Running query: SELECT Date, HomeTeam, AwayTeam, Country, Div, Season FROM matches 


In [10]:
print("Found {} new matches for features to be calculated".format(len(df_new_matches)/2))
df_new_matches.tail()

Found 189561.0 new matches for features to be calculated


Unnamed: 0,Date,Team,Location,Country,Div,Season
189523,2020-12-06,Werder Bremen,Home,germany,D1,2021
189512,2020-12-06,West Brom,Home,england,E0,2021
379076,2020-12-06,Wolves,Away,england,E0,2021
189527,2020-12-06,Wurzburger Kickers,Home,germany,D2,2021
189557,2020-12-06,Zaragoza,Home,spain,SP2,2021


## 2. Define Features We Wish to Calc

To 'automate' the calc of features we need the following:
 - A naming convention
 - A mapping from feature name to calc methodology

Can be done in the following way:
 - feats: A dict of base col feature to raw data cols
 - streak_length: Averaging period
 - avg_type: Can be 'Avg' or 'Exp'

In [4]:
# define feats
# we define the new base col name,the required cols and how to convert to t and opp equiv cols
feats = {'GF': {'Home': 'FTHG', 'Away': 'FTAG'},
         'GA': {'Home': 'FTAG', 'Away': 'FTHG'},
         
         'GFH': {'Home': 'FTHG'},
         'GAH': {'Home': 'FTAG'},
         'GFA': {'Away': 'FTAG'},
         'GAA': {'Away': 'FTHG'},
         
         'SF': {'Home': 'HS', 'Away': 'AS'},
         'SA': {'Home': 'AS', 'Away': 'HS'},
         
         'SFH': {'Home': 'HS'},
         'SAH': {'Home': 'AS'},
         'SFA': {'Away': 'AS'},
         'SAA': {'Away': 'HS'},
         
         'STF': {'Home': 'HST', 'Away': 'AST'},
         'STA': {'Home': 'AST', 'Away': 'HST'},
         
         'STFH': {'Home': 'HST'},
         'STAH': {'Home': 'AST'},
         'STFA': {'Away': 'AST'},
         'STAA': {'Away': 'HST'},
         
         'PPG': {'Home': 'FTR', 'Away': 'FTR'},
         'PPGH': {'Home': 'FTR'},
         'PPGA': {'Away': 'FTR'},
        }

# handle streak lengths by iterating over avg function so for now its an int
streak_length = 3

# avg type is string for avg - for now only 2 types
avg_type = 'Avg'

When we want to bulk create can then create a list of these configs like so:

In [5]:
feat_list = [{'feat_type': 'avg',
              'feat_dict': feats,
              'streak': x,
              'avg_type': 'Avg'} for x in [3,5,10,20,40]]

## 3. Compute a Subset of Features

Let's demonstrate by operating on the first element of the feat_list

In [5]:
feat_desc = feat_list[0]

In [6]:
feats = feat_desc['feat_dict']
streak_length = feat_desc['streak']
avg_type = feat_desc['avg_type']

### 3i. Get Relevant Raw Match Data

In [7]:
# get the raw data required to calc the features
df_raw = get_feats_raw_data(feats)
df_raw.tail()

Running query: SELECT name FROM sqlite_master WHERE type='table' AND name='matches'
Running query: SELECT Date, HomeTeam, AwayTeam, HS, AST, FTAG, [AS], FTHG, HST, FTR FROM matches 


Unnamed: 0,Date,Team,Location,HS,AST,FTAG,AS,FTHG,HST,FTR
378127,2020-10-31,Zwolle,Away,8.0,5.0,1.0,10.0,5.0,5.0,H
378252,2020-11-06,Zwolle,Away,10.0,1.0,2.0,12.0,2.0,6.0,D
188956,2020-11-21,Zwolle,Home,10.0,2.0,1.0,2.0,1.0,4.0,D
378801,2020-11-28,Zwolle,Away,11.0,6.0,2.0,13.0,2.0,2.0,D
189453,2020-12-05,Zwolle,Home,16.0,4.0,1.0,12.0,2.0,5.0,H


Restrict raw data to only include historical matches played by teams with new ferature data to be calculated

In [8]:
# restrict to only teams in the new matches df
df_raw = df_raw[df_raw.Team.isin(df_new_matches.Team.unique())]

Split the features into all / home / away for calc

### 3ii. Construct Base Feat Cols e.g. GF

Before we create historical looking avgs we need to construct the cols we will avg over

In [11]:
# create map from feat_name to base_name to construction
# split out by All / Home / Away for sequential calc
col_map = create_col_map(feats, streak_length, avg_type)
all_feats, home_feats, away_feats = split_home_away_feats(col_map)

In [14]:
print("E.g. of entry in col_map --> {}: {}".format(list(col_map.keys())[0], list(col_map.values())[0]))

E.g. of entry in col_map --> AvgGF_3: {'GF': {'Home': 'FTHG', 'Away': 'FTAG'}}


Now compute the base cols which we will then avg over to create our features

In [15]:
# create base cols e.g. GF / GA for use to calc e.g. AvgGF_3
c_dict = {'All': all_feats, 'Home': home_feats, 'Away': away_feats}
for k, v in c_dict.items():
    if len(v) > 0:
        df_raw = create_base_feat_cols(df_raw, v, k)
    elif k == 'All':
        # if all feats blank then issue so report
        print('All features is blank - probably an error: {}'.format(v))

In [17]:
df_raw.tail(5)

Unnamed: 0,Date,Team,Location,HS,AST,FTAG,AS,FTHG,HST,FTR,GF,GA,SF,SA,STF,STA,PPG,GFH,GAH,SFH,SAH,STFH,STAH,PPGH,GFA,GAA,SFA,SAA,STFA,STAA,PPGA
378127,2020-10-31,Zwolle,Away,8.0,5.0,1.0,10.0,5.0,5.0,H,1.0,5.0,10.0,8.0,5.0,5.0,0.0,1.0,5.0,10.0,8.0,5.0,5.0,0.0,1.0,5.0,10.0,8.0,5.0,5.0,0.0
378252,2020-11-06,Zwolle,Away,10.0,1.0,2.0,12.0,2.0,6.0,D,2.0,2.0,12.0,10.0,1.0,6.0,1.0,2.0,2.0,12.0,10.0,1.0,6.0,1.0,2.0,2.0,12.0,10.0,1.0,6.0,1.0
188956,2020-11-21,Zwolle,Home,10.0,2.0,1.0,2.0,1.0,4.0,D,1.0,1.0,10.0,2.0,4.0,2.0,1.0,1.0,1.0,10.0,2.0,4.0,2.0,1.0,1.0,1.0,10.0,2.0,4.0,2.0,1.0
378801,2020-11-28,Zwolle,Away,11.0,6.0,2.0,13.0,2.0,2.0,D,2.0,2.0,13.0,11.0,6.0,2.0,1.0,2.0,2.0,13.0,11.0,6.0,2.0,1.0,2.0,2.0,13.0,11.0,6.0,2.0,1.0
189453,2020-12-05,Zwolle,Home,16.0,4.0,1.0,12.0,2.0,5.0,H,2.0,1.0,16.0,12.0,5.0,4.0,3.0,2.0,1.0,16.0,12.0,5.0,4.0,3.0,2.0,1.0,16.0,12.0,5.0,4.0,3.0


### 3iii. Compute Features

Now we have the data we want, we need to compute the historical averages grouped by team

In [19]:
# compute feats
ft_dfs = {}
for k, v in c_dict.items():
    # if not all, then restrict data to only home/away games
    if k != 'All':
        df_r = df_raw[df_raw.Location == k]
        df_f = calc_rolling_avg(df_r, v, streak_length)
    else:
        df_f = calc_rolling_avg(df_raw, v, streak_length)
    # add to dict
    ft_dfs[k] = df_f

An example of what our output looks like for features relevant to 'All' games

In [22]:
ft_dfs['All'].tail()

Unnamed: 0,Date,Team,Location,GF,GA,SF,SA,STF,STA,PPG,AvgGF_3,AvgGA_3,AvgSF_3,AvgSA_3,AvgSTF_3,AvgSTA_3,AvgPPG_3
378127,2020-10-31,Zwolle,Away,1.0,5.0,10.0,8.0,5.0,5.0,0.0,0.666667,2.0,17.0,10.333333,5.0,3.666667,0.666667
378252,2020-11-06,Zwolle,Away,2.0,2.0,12.0,10.0,1.0,6.0,1.0,1.0,2.333333,14.0,10.0,3.333333,4.666667,0.666667
188956,2020-11-21,Zwolle,Home,1.0,1.0,10.0,2.0,4.0,2.0,1.0,1.333333,2.666667,10.666667,6.666667,3.333333,4.333333,0.666667
378801,2020-11-28,Zwolle,Away,2.0,2.0,13.0,11.0,6.0,2.0,1.0,1.666667,1.666667,11.666667,7.666667,3.666667,3.333333,1.0
189453,2020-12-05,Zwolle,Home,2.0,1.0,16.0,12.0,5.0,4.0,3.0,1.666667,1.333333,13.0,8.333333,5.0,2.666667,1.666667


### 3iv. Merge & Shift Feature Tables

We now have the following issues:
 - We only have 'Home' and 'Away' relevant stats computed for Home and Away games respectively
  - This means e.g. if AvgGFH_3 is helpful to predict a team's perf in their next Away fixture we haven't got it
 - All features are not backward looking i.e. goals in the current match are used to compute the avg - not just historical matches

In [23]:
df_feats = merge_home_away(ft_dfs, all_feats, home_feats, away_feats, shift=True)

In [24]:
df_feats.tail(10)

Unnamed: 0,Date,Team,Location,GF,GA,SF,SA,STF,STA,PPG,AvgGF_3,AvgGA_3,AvgSF_3,AvgSA_3,AvgSTF_3,AvgSTA_3,AvgPPG_3,GFH,GAH,SFH,SAH,STFH,STAH,PPGH,AvgGFH_3,AvgGAH_3,AvgSFH_3,AvgSAH_3,AvgSTFH_3,AvgSTAH_3,AvgPPGH_3,GFA,GAA,SFA,SAA,STFA,STAA,PPGA,AvgGFA_3,AvgGAA_3,AvgSFA_3,AvgSAA_3,AvgSTFA_3,AvgSTAA_3,AvgPPGA_3
379112,2020-09-19,Zwolle,Away,1.0,1.0,14.0,13.0,4.0,3.0,1.0,1.666667,2.0,15.0,12.333333,5.666667,5.0,1.333333,,,,,,,,2.333333,3.0,13.666667,16.0,5.0,7.666667,1.0,1.0,1.0,14.0,13.0,4.0,3.0,1.0,0.333333,1.0,10.0,12.666667,2.333333,4.0,0.666667
379113,2020-09-26,Zwolle,Home,4.0,0.0,20.0,9.0,8.0,2.0,3.0,0.666667,1.333333,14.666667,11.666667,4.666667,3.666667,0.666667,4.0,0.0,20.0,9.0,8.0,2.0,3.0,2.333333,3.0,13.666667,16.0,5.0,7.666667,1.0,,,,,,,,0.666667,1.333333,12.0,13.0,3.333333,4.666667,0.666667
379114,2020-10-18,Zwolle,Home,0.0,3.0,12.0,12.0,3.0,5.0,0.0,1.666667,1.0,17.0,11.666667,5.666667,3.666667,1.333333,0.0,3.0,12.0,12.0,3.0,5.0,0.0,2.666667,1.666667,17.333333,12.333333,6.666667,5.0,2.0,,,,,,,,0.666667,1.333333,12.0,13.0,3.333333,4.666667,0.666667
379115,2020-10-21,Zwolle,Away,1.0,1.0,21.0,11.0,6.0,3.0,1.0,1.666667,1.333333,15.333333,11.333333,5.0,3.333333,1.333333,,,,,,,,1.333333,1.666667,16.333333,11.333333,5.333333,4.333333,1.0,1.0,1.0,21.0,11.0,6.0,3.0,1.0,0.666667,1.333333,12.0,13.0,3.333333,4.666667,0.666667
379116,2020-10-24,Zwolle,Home,0.0,0.0,20.0,12.0,4.0,3.0,1.0,1.666667,1.333333,17.666667,10.666667,5.666667,3.333333,1.333333,0.0,0.0,20.0,12.0,4.0,3.0,1.0,1.333333,1.666667,16.333333,11.333333,5.333333,4.333333,1.0,,,,,,,,1.0,1.0,16.0,11.0,5.0,2.666667,1.0
379117,2020-10-31,Zwolle,Away,1.0,5.0,10.0,8.0,5.0,5.0,0.0,0.333333,1.333333,17.666667,11.666667,4.333333,3.666667,0.666667,,,,,,,,1.333333,1.0,17.333333,11.0,5.0,3.333333,1.333333,1.0,5.0,10.0,8.0,5.0,5.0,0.0,1.0,1.0,16.0,11.0,5.0,2.666667,1.0
379118,2020-11-06,Zwolle,Away,2.0,2.0,12.0,10.0,1.0,6.0,1.0,0.666667,2.0,17.0,10.333333,5.0,3.666667,0.666667,,,,,,,,1.333333,1.0,17.333333,11.0,5.0,3.333333,1.333333,2.0,2.0,12.0,10.0,1.0,6.0,1.0,1.0,2.333333,15.0,10.666667,5.0,3.666667,0.666667
379119,2020-11-21,Zwolle,Home,1.0,1.0,10.0,2.0,4.0,2.0,1.0,1.0,2.333333,14.0,10.0,3.333333,4.666667,0.666667,1.0,1.0,10.0,2.0,4.0,2.0,1.0,1.333333,1.0,17.333333,11.0,5.0,3.333333,1.333333,,,,,,,,1.333333,2.666667,14.333333,9.666667,4.0,4.666667,0.666667
379120,2020-11-28,Zwolle,Away,2.0,2.0,13.0,11.0,6.0,2.0,1.0,1.333333,2.666667,10.666667,6.666667,3.333333,4.333333,0.666667,,,,,,,,0.333333,1.333333,14.0,8.666667,3.666667,3.333333,0.666667,2.0,2.0,13.0,11.0,6.0,2.0,1.0,1.333333,2.666667,14.333333,9.666667,4.0,4.666667,0.666667
379121,2020-12-05,Zwolle,Home,2.0,1.0,16.0,12.0,5.0,4.0,3.0,1.666667,1.666667,11.666667,7.666667,3.666667,3.333333,1.0,2.0,1.0,16.0,12.0,5.0,4.0,3.0,0.333333,1.333333,14.0,8.666667,3.666667,3.333333,0.666667,,,,,,,,1.666667,3.0,11.666667,9.666667,4.0,4.333333,0.666667


### 3v. Finalise Output

Now we just need to return only the cols we wish and only for the new matches

In [25]:
# now we have our correctly offset feats
# we need to select just the cols we need
id_cols = ['Date', 'Team'] + ['Location']
df_feats = df_feats[id_cols + list(col_map.keys())]
df_feats.tail(10)

Unnamed: 0,Date,Team,Location,AvgGF_3,AvgGA_3,AvgGFH_3,AvgGAH_3,AvgGFA_3,AvgGAA_3,AvgSF_3,AvgSA_3,AvgSFH_3,AvgSAH_3,AvgSFA_3,AvgSAA_3,AvgSTF_3,AvgSTA_3,AvgSTFH_3,AvgSTAH_3,AvgSTFA_3,AvgSTAA_3,AvgPPG_3,AvgPPGH_3,AvgPPGA_3
379112,2020-09-19,Zwolle,Away,1.666667,2.0,2.333333,3.0,0.333333,1.0,15.0,12.333333,13.666667,16.0,10.0,12.666667,5.666667,5.0,5.0,7.666667,2.333333,4.0,1.333333,1.0,0.666667
379113,2020-09-26,Zwolle,Home,0.666667,1.333333,2.333333,3.0,0.666667,1.333333,14.666667,11.666667,13.666667,16.0,12.0,13.0,4.666667,3.666667,5.0,7.666667,3.333333,4.666667,0.666667,1.0,0.666667
379114,2020-10-18,Zwolle,Home,1.666667,1.0,2.666667,1.666667,0.666667,1.333333,17.0,11.666667,17.333333,12.333333,12.0,13.0,5.666667,3.666667,6.666667,5.0,3.333333,4.666667,1.333333,2.0,0.666667
379115,2020-10-21,Zwolle,Away,1.666667,1.333333,1.333333,1.666667,0.666667,1.333333,15.333333,11.333333,16.333333,11.333333,12.0,13.0,5.0,3.333333,5.333333,4.333333,3.333333,4.666667,1.333333,1.0,0.666667
379116,2020-10-24,Zwolle,Home,1.666667,1.333333,1.333333,1.666667,1.0,1.0,17.666667,10.666667,16.333333,11.333333,16.0,11.0,5.666667,3.333333,5.333333,4.333333,5.0,2.666667,1.333333,1.0,1.0
379117,2020-10-31,Zwolle,Away,0.333333,1.333333,1.333333,1.0,1.0,1.0,17.666667,11.666667,17.333333,11.0,16.0,11.0,4.333333,3.666667,5.0,3.333333,5.0,2.666667,0.666667,1.333333,1.0
379118,2020-11-06,Zwolle,Away,0.666667,2.0,1.333333,1.0,1.0,2.333333,17.0,10.333333,17.333333,11.0,15.0,10.666667,5.0,3.666667,5.0,3.333333,5.0,3.666667,0.666667,1.333333,0.666667
379119,2020-11-21,Zwolle,Home,1.0,2.333333,1.333333,1.0,1.333333,2.666667,14.0,10.0,17.333333,11.0,14.333333,9.666667,3.333333,4.666667,5.0,3.333333,4.0,4.666667,0.666667,1.333333,0.666667
379120,2020-11-28,Zwolle,Away,1.333333,2.666667,0.333333,1.333333,1.333333,2.666667,10.666667,6.666667,14.0,8.666667,14.333333,9.666667,3.333333,4.333333,3.666667,3.333333,4.0,4.666667,0.666667,0.666667,0.666667
379121,2020-12-05,Zwolle,Home,1.666667,1.666667,0.333333,1.333333,1.666667,3.0,11.666667,7.666667,14.0,8.666667,11.666667,9.666667,3.666667,3.333333,3.666667,3.333333,4.0,4.333333,1.0,0.666667,0.666667


Left join onto our original match data - this will also provide a few additional ID columns to help select subsets of data

In [26]:
df_final = pd.merge(left=df_new_matches, right=df_feats, how='left', on=id_cols)

In [27]:
df_final.tail(10)

Unnamed: 0,Date,Team,Location,Country,Div,Season,AvgGF_3,AvgGA_3,AvgGFH_3,AvgGAH_3,AvgGFA_3,AvgGAA_3,AvgSF_3,AvgSA_3,AvgSFH_3,AvgSAH_3,AvgSFA_3,AvgSAA_3,AvgSTF_3,AvgSTA_3,AvgSTFH_3,AvgSTAH_3,AvgSTFA_3,AvgSTAA_3,AvgPPG_3,AvgPPGH_3,AvgPPGA_3
379112,2020-12-06,Tottenham,Home,england,E0,2021,1.0,0.0,2.333333,1.333333,0.666667,0.0,9.333333,15.666667,8.0,13.666667,11.0,12.666667,2.666667,3.333333,3.666667,3.666667,3.0,3.0,2.333333,2.333333,2.333333
379113,2020-12-06,Utrecht,Home,netherlands,N1,2021,0.666667,1.666667,1.0,1.666667,1.0,2.0,7.666667,12.666667,18.333333,12.666667,9.333333,11.333333,3.666667,4.0,4.333333,4.0,4.666667,4.333333,0.666667,1.333333,0.666667
379114,2020-12-06,Verona,Home,italy,I1,2021,1.333333,1.333333,1.0,1.0,1.666667,1.0,8.333333,15.333333,13.666667,8.333333,5.666667,17.333333,4.333333,6.666667,8.666667,4.0,2.666667,7.666667,1.333333,1.333333,1.666667
379115,2020-12-06,Vicenza,Home,italy,I2,2021,1.333333,1.0,2.0,2.0,1.333333,1.0,13.0,14.333333,17.0,10.333333,13.0,14.333333,4.0,3.666667,5.0,4.0,4.0,3.666667,1.666667,1.0,1.666667
379116,2020-12-06,Villarreal,Home,spain,SP1,2021,1.666667,1.0,1.666667,0.666667,1.333333,0.666667,10.666667,9.0,13.333333,9.333333,8.666667,8.666667,2.666667,3.333333,4.0,1.666667,3.0,3.0,1.666667,2.333333,1.666667
379117,2020-12-06,Werder Bremen,Home,germany,D1,2021,1.666667,2.333333,1.0,0.666667,1.666667,2.333333,10.666667,11.666667,10.0,11.0,7.666667,13.666667,4.0,3.333333,2.666667,4.0,4.0,4.666667,0.666667,1.666667,0.666667
379118,2020-12-06,West Brom,Home,england,E0,2021,0.333333,0.666667,0.333333,0.333333,0.333333,1.333333,12.0,19.0,14.0,16.333333,8.666667,11.666667,3.333333,5.666667,4.666667,4.666667,2.0,5.333333,1.0,1.333333,0.333333
379119,2020-12-06,Wolves,Away,england,E0,2021,1.0,1.0,1.333333,0.666667,1.0,0.666667,12.666667,10.0,17.0,7.666667,8.333333,12.0,5.333333,2.333333,5.666667,2.0,3.333333,2.333333,1.333333,1.666667,2.0
379120,2020-12-06,Wurzburger Kickers,Home,germany,D2,2021,1.333333,2.333333,1.333333,2.0,1.0,3.0,10.666667,12.333333,11.333333,7.0,9.666667,17.666667,4.0,6.333333,4.333333,2.666667,2.333333,6.333333,1.0,1.0,0.0
379121,2020-12-06,Zaragoza,Home,spain,SP2,2021,0.333333,1.666667,1.333333,2.0,0.333333,1.666667,9.0,11.333333,10.666667,7.0,7.666667,11.333333,2.0,3.666667,4.0,4.666667,2.0,3.0,0.0,0.333333,0.0


### 3vi. Check

Can do a few checks to verify if the feature data is in line with expectations

In [90]:
t = ['Man United', 'Tottenham', 'Man City', 'West Brom', 'Wolves']

In [91]:
df_check = df_raw[df_raw.Team.isin(t)][['Date', 'Team', 'GF', 'GA']]

In [92]:
df_check = df_check[['GF', 'GA']].groupby(df_check['Team']).rolling(streak_length).mean().shift(1)

In [93]:
df_data = df_final[df_final.Team.isin(t)].sort_values(['Team', 'Date'])[['AvgGF_3', 'AvgGA_3']]

In [94]:
df_check

Unnamed: 0_level_0,Unnamed: 1_level_0,GF,GA
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Man City,112,,
Man City,189727,,
Man City,189742,,
Man City,237,0.333333,1.000000
Man City,257,0.000000,1.333333
...,...,...,...
Wolves,188470,1.000000,0.333333
Wolves,378379,1.333333,0.333333
Wolves,189082,1.000000,0.666667
Wolves,378818,1.000000,0.666667


In [95]:
x = df_check['GA'].values
y = df_data['AvgGA_3'].values

In [96]:
len(x)

5438

In [97]:
len(y)

5438

In [98]:
sum(np.nan_to_num(x) == np.nan_to_num(y))

5430

## 4. Set Down Into Features Table

Once we have calculated the features we need to set them down into the sqlite db

In [5]:
df = handle_feats(feat_list)

Running query: SELECT name FROM sqlite_master WHERE type='table' AND name='features'
Features table doesn't exist
Running query: SELECT Date, HomeTeam, AwayTeam, Country, Div, Season FROM matches 
Running query: SELECT name FROM sqlite_master WHERE type='table' AND name='matches'
Running query: SELECT Date, HomeTeam, AwayTeam, FTAG, FTR, HST, AST, HS, [AS], FTHG FROM matches 
Running query: SELECT name FROM sqlite_master WHERE type='table' AND name='matches'
Running query: SELECT Date, HomeTeam, AwayTeam, FTAG, FTR, HST, AST, HS, [AS], FTHG FROM matches 
Running query: SELECT name FROM sqlite_master WHERE type='table' AND name='matches'
Running query: SELECT Date, HomeTeam, AwayTeam, FTAG, FTR, HST, AST, HS, [AS], FTHG FROM matches 
Running query: SELECT name FROM sqlite_master WHERE type='table' AND name='matches'
Running query: SELECT Date, HomeTeam, AwayTeam, FTAG, FTR, HST, AST, HS, [AS], FTHG FROM matches 
Running query: SELECT name FROM sqlite_master WHERE type='table' AND name='

In [8]:
res = process_feature_data(feat_list, uat=False)

Running query: SELECT name FROM sqlite_master WHERE type='table' AND name='features'
Running query: SELECT name FROM sqlite_master WHERE type='table' AND name='features'
Running query: SELECT name FROM sqlite_master WHERE type='table' AND name='features'
Running query: SELECT Date, Team FROM features 
Running query: SELECT Date, HomeTeam, AwayTeam, Country, Div, Season FROM matches 
No new matches to process features for
Exiting feature processing
