# This code is used to analyze EPL stats
## Data acquired from http://www.football-data.co.uk/englandm.php



Brainstorming ideas:
    1. maybe make a lookup table for what numbers are associated with what parameters
    2. eventually make a user interface for selections

In [1]:
import os
import pandas as pd
import numpy as np
from datetime import datetime
import soccer_wx_utils
import netCDF4 as nc

In [2]:
path = 'EPL_stats/'
hometeam = 'Chelsea' #pick the team you want to analyze
hometeam_lat =  51.48205052791857 
hometeam_lon = -0.1909135846571512 #enter the coordinates of the home team's stadium (only valid if outdoor)

In [3]:
all_stats = pd.read_csv(path+'20-21.csv')


all_stats.shape


(178, 106)

In [4]:
hometeam_stats = all_stats[all_stats['HomeTeam'] == hometeam]

In [5]:
print(hometeam_stats)

    Div        Date   Time HomeTeam          AwayTeam  FTHG  FTAG FTR  HTHG  \
14   E0  20/09/2020  16:30  Chelsea         Liverpool     0     2   A     0   
28   E0   3/10/2020  12:30  Chelsea    Crystal Palace     4     0   H     0   
39   E0  17/10/2020  15:00  Chelsea       Southampton     3     3   D     2   
72   E0   7/11/2020  17:30  Chelsea  Sheffield United     4     1   H     2   
94   E0  29/11/2020  16:30  Chelsea         Tottenham     0     0   D     0   
101  E0   5/12/2020  20:00  Chelsea             Leeds     3     1   H     1   
136  E0  21/12/2020  20:00  Chelsea          West Ham     3     0   H     1   
148  E0  28/12/2020  17:30  Chelsea       Aston Villa     1     1   D     1   
162  E0    3/1/2021  16:30  Chelsea          Man City     1     3   A     0   

     HTAG   ...    AvgC<2.5  AHCh  B365CAHH  B365CAHA  PCAHH  PCAHA  MaxCAHH  \
14      0   ...        2.34  0.25      2.07      1.86   2.04   1.89     2.19   
28      0   ...        2.08 -1.00      1.93      

### Now need to work with dates to get weather data for home games

In [6]:
date_array = hometeam_stats.iloc[:,1].values
time_array = hometeam_stats.iloc[:,2].values

In [7]:
print(date_array, time_array)

['20/09/2020' '3/10/2020' '17/10/2020' '7/11/2020' '29/11/2020'
 '5/12/2020' '21/12/2020' '28/12/2020' '3/1/2021'] ['16:30' '12:30' '15:00' '17:30' '16:30' '20:00' '20:00' '17:30' '16:30']


In [8]:
## Convert dates into the format needed for ERA5 download

dates_newformat = ""
for i in date_array:
    datetimeobject = datetime.strptime(i, '%d/%m/%Y')
#     dates_newformat.append(datetimeobject.strftime('%Y-%m-%d'))
    dates_newformat = dates_newformat+datetimeobject.strftime('%Y-%m-%d')+"/"

dates_newformat = dates_newformat[0:32]
print(dates_newformat)

2020-09-20/2020-10-03/2020-10-17


In [9]:
## Set up lat/long into coords for data retrieval ('xx/yy/xx/yy')

lat_N = hometeam_lat+0.5
lat_S = hometeam_lat-0.5
lon_W = hometeam_lon-0.5
lon_E = hometeam_lon+0.5

location_str = str(lat_N)+"/"+str(lon_W)+"/"+str(lat_S)+"/"+str(lon_E)
print(location_str)

51.98205052791857/-0.6909135846571512/50.98205052791857/0.3090864153428488


## Keep in mind that ERA5 data is only available up to 3 months prior to current date

In [10]:
##Next step - feed date list into method to retrieve ERA5 data



#soccer_wx_utils.ERA5_data_retrieval(dates_newformat, location_str, "Chelsea_data.nc")

## Now let's take a look at the weather data we retrieved

In [11]:
fn = 'Chelsea_data.nc'
ds = nc.Dataset(fn)

In [12]:
for var in ds.variables.values():
    print(var)

<class 'netCDF4._netCDF4.Variable'>
float32 longitude(longitude)
    units: degrees_east
    long_name: longitude
unlimited dimensions: 
current shape = (5,)
filling on, default _FillValue of 9.969209968386869e+36 used

<class 'netCDF4._netCDF4.Variable'>
float32 latitude(latitude)
    units: degrees_north
    long_name: latitude
unlimited dimensions: 
current shape = (4,)
filling on, default _FillValue of 9.969209968386869e+36 used

<class 'netCDF4._netCDF4.Variable'>
int32 time(time)
    units: hours since 1900-01-01 00:00:00.0
    long_name: time
    calendar: gregorian
unlimited dimensions: 
current shape = (72,)
filling on, default _FillValue of -2147483647 used

<class 'netCDF4._netCDF4.Variable'>
int16 sp(time, latitude, longitude)
    scale_factor: 0.0722794241069385
    add_offset: 99378.13573528794
    _FillValue: -32767
    missing_value: -32767
    units: Pa
    long_name: Surface pressure
    standard_name: surface_air_pressure
unlimited dimensions: 
current shape = (72, 4

In [17]:
longitude = ds['longitude'][:]
print(longitude)

latitude = ds['latitude'][:]
print(latitude)

lon_idx = soccer_wx_utils.find_nearest(longitude, hometeam_lon)
lat_idx = soccer_wx_utils.find_nearest(latitude, hometeam_lat)

print(lat_idx, lon_idx)

[-0.691   -0.44075 -0.1905   0.05975  0.31   ]
[51.983 51.733 51.483 51.233]
2 2


In [20]:
time = ds['time'][:]
# time = datetime.datenum(1900,1,1,double(time),0,0)
# print(time)

time = ds.variables['time'] # do not cast to numpy array yet 
time_convert = nc.num2date(time[:], time.units, time.calendar)
time.shape

(72,)

In [19]:
T = ds['t2m'][:,lat_idx,lon_idx] #2 meter temp (basically surface temp)
T.shape

(72,)

In [21]:
print(T) 

[287.71057535 287.53115046 287.37550477 287.30752988 287.19463872
 287.22466296 287.19656027 287.637076   288.92043236 290.47785002
 292.11741403 293.55065142 294.65434269 295.43425249 295.67060335
 295.41455659 294.81383148 293.75121338 292.26873623 290.80955789
 289.5992205  288.42347104 287.7883982  286.91625391 286.7855884
 286.76637288 286.81393128 287.01377266 286.99984141 286.97726318
 286.9763024  286.80600488 286.45868441 286.204319   286.23962751
 286.10583948 285.87357191 286.23986771 286.60664389 286.93114594
 286.64387396 286.17429476 285.87429249 285.53225629 285.04129982
 284.62288194 284.47780478 283.97724056 282.18803571 282.32182375
 282.3316717  282.42270522 282.56490004 282.1366342  282.19211901
 282.06145349 282.19452095 282.40108776 282.94320554 283.51919067
 283.99885801 284.437212   284.6243231  284.42592288 284.00246092
 283.38372127 283.29845241 283.12743431 283.03856254 282.95065155
 282.73063388 282.74312397]
