In [None]:
import pandas as pd 
import numpy as np 
import lightgbm as lgb
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error,mean_absolute_error

In [None]:
data=pd.read_csv('../data/data.csv')
for col in ['first_name', 'second_name', 'position','id_x','team','is_home', 'opposition_team',]:
    data[col] = data[col].astype('category')

In [None]:
current_gw=max(set(data['event']))
#Data from past gameweeks
past_df=data[data['event']<current_gw]
#Data for current gameweek (points to be predicted)
current_df=data[data['event']==current_gw]

In [None]:
#Train-Test Split
train=past_df[past_df['event']<current_gw-2]
test=past_df[past_df['event']==current_gw-2]
X_train=train.drop(['event','total_points','first_name','second_name','code','av_value','av(2)_value'],axis=1)
y_train=train['total_points']
X_test=test.drop(['event','total_points','first_name','second_name','code','av_value','av(2)_value'],axis=1)
y_test=test['total_points']

In [None]:
reg=lgb.LGBMRegressor(n_estimators=20)
reg.fit(X_train,y_train)
preds=reg.predict(X_test)

In [None]:
print('MSE : {:.2f}'.format(mean_squared_error(y_test,preds)))
print('MAE : {:.2f}'.format(mean_absolute_error(y_test,preds)))

In [None]:
lgb.plot_importance(reg,max_num_features=10)
plt.show()

In [None]:
#Error analysis
errors=test.copy()[['first_name', 'second_name', 'team', 'position','opposition_team','av_total_points']]
errors['predictions']=np.round(preds)
errors['true_points']=y_test
errors['error']=errors['true_points']-errors['predictions']
errors.sort_values('error',ascending=False).head(20)

### Predictions for next gameweek

In [None]:
train=past_df
test=current_df
X_train=train.drop(['event','total_points','first_name','second_name','code','av_value','av(2)_value'],axis=1)
y_train=train['total_points']
X_test=test.drop(['event','total_points','first_name','second_name','code','av_value','av(2)_value'],axis=1)
y_test=test['total_points']

In [None]:
reg=lgb.LGBMRegressor(n_estimators=50)
reg.fit(X_train,y_train)
preds=reg.predict(X_test)

In [None]:
predictions=test.copy()[['id_x','first_name', 'second_name', 'team', 'position','opposition_team']]
predictions['predicted_points']=np.round(preds)
# Players with highest predicted points
predictions.sort_values('predicted_points',ascending=False).head(20)