#Import Libaries

In [14]:
import numpy
import math
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

import tensorflow as tf
%matplotlib inline

#Load Data

In [15]:
from google.colab import drive
drive.mount('/content/drive/')
import os
os.chdir("/content/drive/My Drive/CA683 Assignment2")

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [16]:
df = pd.read_csv('Premier League_2014_2022.csv')
df = df[df['ranking']!='None']

#Data Processing

In [None]:
MOVING_AVERAGE = 5
MINIMUM_RECORD = 120

In [None]:
dict_player = {}
dict_player_name = {}
for player_id in df['player_id'].unique():
  list_rate = list(df[df['player_id']==player_id]['ranking'].astype(float).rolling(MOVING_AVERAGE).mean())
  if(len(list_rate)>=MINIMUM_RECORD):
    dict_player[player_id] = list_rate[MOVING_AVERAGE-1:]
    dict_player_name[player_id] = df[df['player_id']==player_id]['player_name'].unique()[0]

In [None]:
def create_inout_sequences(input_data, tw):
    feature_seq = []
    result_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]#预测time_step之后的第一个数值
        feature_seq.append(train_seq)#feature_seq内的数据不断更新，但是总量只有tw个
        result_seq.append(train_label)

    return feature_seq,result_seq
def cal_rmsle(predicted,real):
  sum=0.0
  for x in range(len(predicted)):
    p = np.log(predicted[x]+1)
    r = np.log(real[x]+1)
    sum = sum + (p - r)**2
  return (sum/len(predicted))**0.5

In [None]:
len(dict_player)

269

#Load Grid Search Result

In [None]:
class MODEL_CONFIG:
  TEST_DATA_SIZE = 20
  INITIAL_WINDOW = 80
  WINDOW_LENTH = 20
  Filename = './LSTM_CV_result80.csv'
  Foldername = './Pic_LSTM_CV_80/'
  def __init__(self, \
        LEARNING_RATE = 0.001,\
        TIME_STEP = 10,\
        DENSE = 1,\
        NUM_NODES = 30,\
        EPOCHS = 500,\
        OPTIMIZER = 'SGD'):
    self.LEARNING_RATE = LEARNING_RATE
    self.TIME_STEP = TIME_STEP
    self.DENSE = DENSE
    self.NUM_NODES = NUM_NODES
    self.EPOCHS = EPOCHS
    if OPTIMIZER == "SGD":
      self.OPTIMIZER = tf.keras.optimizers.SGD(learning_rate=LEARNING_RATE, decay=1e-6, momentum=0.9, nesterov=True)
    if OPTIMIZER == "ADAM":
      self.OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE,beta_1=0.9,beta_2=0.999,epsilon=1e-07,amsgrad=False)
    if OPTIMIZER == "RMSprop":
      self.OPTIMIZER = tf.keras.optimizers.RMSprop(learning_rate=LEARNING_RATE,rho=0.9,momentum=0.0,epsilon=1e-07,centered=False)

In [None]:
df_para = pd.read_csv('ParaResultCV.csv')
df_para = df_para.sort_values(by="MSE",ascending=True)
print(df_para[0:5])

    LEARNING_RATE  TIME_STEP  DENSE  NUM_NODES  EPOCHS OPTIMIZER  TIME_STEP.1  \
23          0.001          7      1         20     500      ADAM          NaN   
34          0.001          7      1         40     500       SGD          NaN   
9           0.001          6      1         30     400      ADAM          NaN   
3           0.001          6      1         20     400      ADAM          NaN   
32          0.001          7      1         40     400       SGD          NaN   

         MAE       MSE     RMSLE        R2  
23  0.179809  0.055023  0.029342  0.516331  
34  0.185452  0.057287  0.030016  0.476154  
9   0.188581  0.059201  0.030295  0.502503  
3   0.189823  0.061629  0.030634  0.485452  
32  0.189335  0.062664  0.030677  0.485385  


In [None]:
config = MODEL_CONFIG(0.001,7,1,20,500,'ADAM')

#Apply Model

In [None]:
def sliding_cv_fit(data, config, cur_player_id):
  list_mae = []
  list_mse = []
  list_r2 = []
  list_rmsle = []
  
  #set model
  model = Sequential()
  model.add(LSTM(config.NUM_NODES, input_shape=(1, config.TIME_STEP)))
  model.add(Dense(config.DENSE))
  model.compile(loss='mean_squared_error', optimizer=config.OPTIMIZER)


  list_train_pred = []
  list_test_pred = []

  for train_mark in range(config.INITIAL_WINDOW, (len(data)//config.WINDOW_LENTH)*config.WINDOW_LENTH, config.WINDOW_LENTH):
    data_piece = data[train_mark - config.INITIAL_WINDOW : train_mark + config.WINDOW_LENTH]
    data_seq,result_seq = create_inout_sequences(data_piece, config.TIME_STEP)
    
    train_X = np.array(data_seq[:-config.TEST_DATA_SIZE])
    test_X = np.array(data_seq[-config.TEST_DATA_SIZE:])
    train_Y = np.array(result_seq[:-config.TEST_DATA_SIZE])
    test_Y = np.array(result_seq[-config.TEST_DATA_SIZE:])
    
    train_X = np.reshape(train_X, (train_X.shape[0],1,train_X.shape[1]))
    test_X = np.reshape(test_X, (test_X.shape[0],1,test_X.shape[1]))
    
    model.fit(train_X, train_Y, epochs=config.EPOCHS, batch_size=1, verbose=0)

    trainPredict = model(train_X)
    testPredict = model(test_X)
    if train_mark == config.INITIAL_WINDOW:
      list_train_pred = list_train_pred + list(trainPredict[:,0])
    else:
      list_train_pred = list_train_pred + list(trainPredict[:,0][-config.WINDOW_LENTH:])

    list_test_pred = list_test_pred + list(testPredict[:,0])

    list_mae.append(mean_absolute_error(test_Y, testPredict))
    list_mse.append(mean_squared_error(test_Y, testPredict))
    list_r2.append(r2_score(test_Y, testPredict))
    list_rmsle.append(cal_rmsle(test_Y, testPredict))
  #create figure
  fig_len = len(data) // 10 + 2
  plt.figure(figsize=[fig_len,6])
  plt.plot(range(0,len(data)),data,color='blue')
  len_trainP = len(list_train_pred)
  len_testP = len(list_test_pred)
  plt.plot(range(config.TIME_STEP,config.TIME_STEP+len_trainP),list_train_pred,color='orange')
  plt.plot(range(config.INITIAL_WINDOW,config.INITIAL_WINDOW+len_testP),list_test_pred,color='red')
  plt.legend(['Real', 'Train Predict', 'Test Predict'],loc='best')
  plt.title('Performance of ' + dict_player_name[cur_player_id])

  plt.savefig(config.Foldername + str(cur_player_id) + '.jpg')
  print('\nSave fig to:' + config.Foldername + str(cur_player_id) + '.jpg')
  plt.close('all')
  #plt.show()

  return np.mean(list_mae),np.mean(list_mse),np.mean(list_r2),np.mean(list_rmsle)

In [None]:
for player_id in tqdm(dict_player.keys()):
  #print('\nRead File......')
  df_result = pd.read_csv(config.Filename, dtype={"NAME": str,"ID": int,"MAE": float,"MSE": float,"RMSLE": float,"R2": float})
  if(player_id in df_result['ID'].values.tolist()):
    #print(' Record exist, skipping....')
    continue
  print('\nCurrent Length:' + str(len(df_result)))
  print('Now working on player: '+dict_player_name[player_id])
  data = dict_player[player_id]
  
  mae,mse,r2,rmsle = sliding_cv_fit(data, config, player_id)
  print('\n\tmae:'+str(mae))
  print('\tmse:'+str(mse))
  print('\tr2:'+str(r2))
  print('\trmsle:'+str(rmsle))
  df_result = df_result.append({'name':dict_player_name[player_id],\
        'ID':player_id,\
        'MAE':mae,\
        'MSE':mse,\
        'R2':r2,\
        'RMSLE':rmsle},ignore_index='true')
  df_result.to_csv(config.Filename,index=0)

 79%|███████▉  | 213/269 [00:00<00:00, 323.43it/s]


Current Length:244
Now working on player: Dan Gosling


 91%|█████████ | 245/269 [03:49<00:51,  2.15s/it] 


Save fig to:./Pic_LSTM_CV_80/1570.jpg

	mae:0.2021946334838867
	mse:0.06142490921517395
	r2:-0.24389837801672248
	rmsle:0.032367448621900496

Current Length:245
Now working on player: Ismaïla Sarr


 91%|█████████▏| 246/269 [09:14<02:27,  6.43s/it]


Save fig to:./Pic_LSTM_CV_80/94797.jpg

	mae:0.1844222402572632
	mse:0.057132817329737115
	r2:0.5350864121051481
	rmsle:0.03084941924919982

Current Length:246
Now working on player: Joshua King


 92%|█████████▏| 247/269 [15:56<05:03, 13.78s/it]


Save fig to:./Pic_LSTM_CV_80/3659.jpg

	mae:0.1987609170913697
	mse:0.06212593744094084
	r2:0.3888843882478118
	rmsle:0.031903451040840705

Current Length:247
Now working on player: Wayne Hennessey


 92%|█████████▏| 248/269 [18:36<06:08, 17.56s/it]


Save fig to:./Pic_LSTM_CV_80/1343.jpg

	mae:0.20252683687210088
	mse:0.06670011151994884
	r2:-0.1901883297903743
	rmsle:0.03482574700033436

Current Length:248
Now working on player: Matthew Lowton


 93%|█████████▎| 249/269 [21:12<07:30, 22.51s/it]


Save fig to:./Pic_LSTM_CV_80/3542.jpg

	mae:0.19241900777816778
	mse:0.0623571534165622
	r2:0.44312505559695997
	rmsle:0.03436840607623348

Current Length:249
Now working on player: Erik Pieters


 93%|█████████▎| 250/269 [26:37<11:45, 37.16s/it]


Save fig to:./Pic_LSTM_CV_80/444.jpg

	mae:0.18908932995796207
	mse:0.05388379899548956
	r2:-0.303491321025207
	rmsle:0.02982884493240346

Current Length:250
Now working on player: Charlie Taylor


 93%|█████████▎| 251/269 [29:19<13:34, 45.24s/it]


Save fig to:./Pic_LSTM_CV_80/36276.jpg

	mae:0.1145022878646849
	mse:0.019689392020571628
	r2:0.255709084973197
	rmsle:0.018711892004450576

Current Length:251
Now working on player: Maxwel Cornet


 94%|█████████▎| 252/269 [35:51<21:07, 74.54s/it]


Save fig to:./Pic_LSTM_CV_80/16725.jpg

	mae:0.19829887351989747
	mse:0.07629858984365448
	r2:0.39260448310142676
	rmsle:0.03325867515853879

Current Length:252
Now working on player: Benjamin Mee


 94%|█████████▍| 253/269 [41:03<26:42, 100.18s/it]


Save fig to:./Pic_LSTM_CV_80/3708.jpg

	mae:0.15528516435623171
	mse:0.035047454905697134
	r2:0.24977996183532442
	rmsle:0.02464887587061229

Current Length:253
Now working on player: James Tarkowski


 94%|█████████▍| 254/269 [45:06<29:49, 119.33s/it]


Save fig to:./Pic_LSTM_CV_80/36247.jpg

	mae:0.17691773573557543
	mse:0.04470285531561303
	r2:0.40561816912021414
	rmsle:0.02725264085196974

Current Length:254
Now working on player: Connor Roberts


 95%|█████████▍| 255/269 [50:27<35:22, 151.59s/it]


Save fig to:./Pic_LSTM_CV_80/74144.jpg

	mae:0.16943500137329107
	mse:0.04509073016745975
	r2:0.28857631013974117
	rmsle:0.02745503903988228

Current Length:255
Now working on player: Jóhann Berg Guðmundsson


 95%|█████████▌| 256/269 [53:03<33:00, 152.38s/it]


Save fig to:./Pic_LSTM_CV_80/35241.jpg

	mae:0.10806148719787595
	mse:0.019633415832989547
	r2:0.7020163865375445
	rmsle:0.019682154608597527

Current Length:256
Now working on player: Josh Brownhill


 96%|█████████▌| 257/269 [55:41<30:41, 153.46s/it]


Save fig to:./Pic_LSTM_CV_80/37149.jpg

	mae:0.12288605451583864
	mse:0.025894194450438765
	r2:0.6671924714393445
	rmsle:0.01891995293775481

Current Length:257
Now working on player: Jack Cork


 96%|█████████▌| 258/269 [1:02:28<38:52, 212.03s/it]


Save fig to:./Pic_LSTM_CV_80/2073.jpg

	mae:0.12064742679595945
	mse:0.023162058686832274
	r2:0.42751537984089366
	rmsle:0.020023629647597026

Current Length:258
Now working on player: Aaron Lennon


 96%|█████████▋| 259/269 [1:03:46<29:48, 178.89s/it]


Save fig to:./Pic_LSTM_CV_80/63.jpg

	mae:0.17636463165283206
	mse:0.03993682248472849
	r2:-0.05888276818136706
	rmsle:0.029190376934900723

Current Length:259
Now working on player: Dale Stephens


 97%|█████████▋| 260/269 [1:07:48<29:18, 195.42s/it]


Save fig to:./Pic_LSTM_CV_80/36118.jpg

	mae:0.13746020571390785
	mse:0.031282965302957315
	r2:0.4672832573665245
	rmsle:0.022736205392087608

Current Length:260
Now working on player: Ashley Westwood


 97%|█████████▋| 261/269 [1:13:12<30:43, 230.40s/it]


Save fig to:./Pic_LSTM_CV_80/3295.jpg

	mae:0.1132848305702209
	mse:0.02082400004075822
	r2:0.17125166961460253
	rmsle:0.018276399579699693

Current Length:261
Now working on player: Ashley Barnes


 97%|█████████▋| 262/269 [1:15:49<24:28, 209.82s/it]


Save fig to:./Pic_LSTM_CV_80/2335.jpg

	mae:0.15691906118392945
	mse:0.04203120484372229
	r2:0.599502987437418
	rmsle:0.029152257086978835

Current Length:262
Now working on player: Jay Rodriguez


 98%|█████████▊| 263/269 [1:21:00<23:53, 238.84s/it]


Save fig to:./Pic_LSTM_CV_80/1799.jpg

	mae:0.169943976163864
	mse:0.0452006911152603
	r2:0.4136101694633888
	rmsle:0.02867481764776148

Current Length:263
Now working on player: Wout Weghorst


 98%|█████████▊| 264/269 [1:31:45<29:42, 356.50s/it]


Save fig to:./Pic_LSTM_CV_80/24726.jpg

	mae:0.2630905555486677
	mse:0.09999112430505193
	r2:0.04678836413658849
	rmsle:0.039359058888672416

Current Length:264
Now working on player: Tim Krul


 99%|█████████▊| 265/269 [1:35:44<21:27, 321.95s/it]


Save fig to:./Pic_LSTM_CV_80/1056.jpg

	mae:0.22253353977203374
	mse:0.07887496579989496
	r2:0.0620197873767269
	rmsle:0.03458710041501226

Current Length:265
Now working on player: Max Aarons


 99%|█████████▉| 266/269 [1:38:26<13:44, 274.72s/it]


Save fig to:./Pic_LSTM_CV_80/207070.jpg

	mae:0.09450101327896102
	mse:0.014134096228891365
	r2:0.6263143443033312
	rmsle:0.015001525528985857

Current Length:266
Now working on player: Pierre Lees Melou


 99%|█████████▉| 267/269 [1:42:29<08:51, 265.52s/it]


Save fig to:./Pic_LSTM_CV_80/72890.jpg

	mae:0.15311600844065357
	mse:0.04140723111058908
	r2:0.40095483558274686
	rmsle:0.02499592990979205

Current Length:267
Now working on player: Milot Rashica


100%|█████████▉| 268/269 [1:49:13<05:06, 306.58s/it]


Save fig to:./Pic_LSTM_CV_80/69412.jpg

	mae:0.1792590484619141
	mse:0.05340398360870994
	r2:-0.17792568517936572
	rmsle:0.028794390270017695

Current Length:268
Now working on player: Teemu Pukki


100%|██████████| 269/269 [1:57:06<00:00, 26.12s/it] 


Save fig to:./Pic_LSTM_CV_80/15666.jpg

	mae:0.21275987895329795
	mse:0.06660604465934429
	r2:0.5213902478647988
	rmsle:0.033433933251761215





#Not in Use(Previous Code)

In [None]:
#初始化文件，更换文件名称后的第一次点（做一个空文件），做的过程中千万不要点，点了就清空了。
df_result = pd.DataFrame(columns = ['NAME','ID','MAE','MSE','RMSLE','R2'],dtype = float)
df_result.to_csv('LSTM_CV_result80.csv',index=0)