In [29]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from tensorflow import keras

In [30]:
games = pd.read_csv('games.csv')
turns = pd.read_csv('turns.csv')
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [31]:
avg_points_per_turn = turns[['points','game_id','nickname']].groupby(['game_id','nickname']).mean().reset_index()

In [32]:
total_number_of_turns = turns[['turn_number','game_id']].groupby(['game_id']).last().reset_index()

In [33]:
def player_bot_categorize(data):
    bot_player = []
    for i in range(data.shape[0]):
        if data.nickname[i] in ['BetterBot','STEEBot','HastyBot']:
            bot_player.append('bot')
        else:
            bot_player.append('player')
    data['bot_player'] = bot_player
    return data

In [34]:
avg_points_per_turn = player_bot_categorize(avg_points_per_turn)

In [35]:
total_number_of_turns =  total_number_of_turns.merge(avg_points_per_turn[avg_points_per_turn['bot_player']=='bot'][['game_id','nickname']], on='game_id', how='left')
total_number_of_turns = total_number_of_turns.merge(avg_points_per_turn[avg_points_per_turn['bot_player']=='bot'][['game_id','points']], on='game_id', how='left')
total_number_of_turns = total_number_of_turns.merge(avg_points_per_turn[avg_points_per_turn['bot_player']=='player'][['game_id','points']], on='game_id', how='left')

In [36]:
turns_new = total_number_of_turns.rename(columns={'turn_number':'total_turns','nickname':'bot_name','points_x':'bot_score_per_turn','points_y':'player_score_per_turn'})

In [37]:
turns_new

Unnamed: 0,game_id,total_turns,bot_name,bot_score_per_turn,player_score_per_turn
0,1,27,BetterBot,25.769231,30.642857
1,2,26,BetterBot,33.416667,34.857143
2,3,27,BetterBot,24.461538,31.428571
3,4,29,BetterBot,31.866667,8.500000
4,5,31,STEEBot,28.466667,20.312500
...,...,...,...,...,...
72768,72769,31,STEEBot,48.153846,21.333333
72769,72770,32,HastyBot,36.875000,6.062500
72770,72771,31,HastyBot,26.200000,24.375000
72771,72772,30,BetterBot,31.571429,24.250000


In [38]:
train = player_bot_categorize(train)
test = player_bot_categorize(test)
tt_concat = pd.concat([train,test])

In [39]:
tt_concat

Unnamed: 0,game_id,nickname,score,rating,bot_player
0,1,BetterBot,335,1637.0,bot
1,1,stevy,429,1500.0,player
2,3,davidavid,440,1811.0,player
3,3,BetterBot,318,2071.0,bot
4,4,Inandoutworker,119,1473.0,player
...,...,...,...,...,...
44721,72762,kyjo55555,367,,player
44722,72768,HastyBot,524,2356.0,bot
44723,72768,Maximilian,357,,player
44724,72769,STEEBot,626,2110.0,bot


In [40]:
turns_new = turns_new.merge(tt_concat[tt_concat['bot_player']=='bot'][['game_id','score']],on='game_id',how='left')
turns_new = turns_new.merge(tt_concat[tt_concat['bot_player']=='player'][['game_id','score']],on='game_id',how='left')
turns_new = turns_new.merge(tt_concat[tt_concat['bot_player']=='bot'][['game_id','rating']],on='game_id',how='left')
turns_new = turns_new.merge(tt_concat[tt_concat['bot_player']=='player'][['game_id','rating']],on='game_id',how='left')

In [41]:
turns_new = turns_new.rename(columns={'score_x':'bot_total_score','score_y':'player_total_score','rating_x':'bot_rating','rating_y':'player_rating'})

In [42]:
turns_new

Unnamed: 0,game_id,total_turns,bot_name,bot_score_per_turn,player_score_per_turn,bot_total_score,player_total_score,bot_rating,player_rating
0,1,27,BetterBot,25.769231,30.642857,335,429,1637.0,1500.0
1,2,26,BetterBot,33.416667,34.857143,401,488,2000.0,
2,3,27,BetterBot,24.461538,31.428571,318,440,2071.0,1811.0
3,4,29,BetterBot,31.866667,8.500000,478,119,1936.0,1473.0
4,5,31,STEEBot,28.466667,20.312500,427,325,1844.0,1500.0
...,...,...,...,...,...,...,...,...,...
72768,72769,31,STEEBot,48.153846,21.333333,626,384,2110.0,
72769,72770,32,HastyBot,36.875000,6.062500,590,97,2237.0,1332.0
72770,72771,31,HastyBot,26.200000,24.375000,393,390,1614.0,1500.0
72771,72772,30,BetterBot,31.571429,24.250000,442,388,1674.0,1364.0


In [43]:
data = games.merge(turns_new, on='game_id', how='left')

In [44]:
train_new = data[data.player_rating.isnull() == False].reset_index(drop=True)
test_new = data[data.player_rating.isnull() == True].reset_index(drop=True)

In [45]:
def preprocessing(data):
    game_id = data['game_id']
    data.drop(columns=['game_id','created_at'],inplace=True)
    
    first = []
    for i in range(data.shape[0]):
        if data['first'][i] in ['BetterBot','STEEBot','HastyBot']:
            first.append(0)
        else:
            first.append(1)
    data['first'] = first

    ohe = OneHotEncoder()
    features_transformed = ohe.fit_transform(data[['time_control_name','game_end_reason','lexicon','rating_mode','bot_name']]).toarray()
    feature_labels = ohe.categories_
    feature_labels = np.hstack(feature_labels)

    data = pd.DataFrame(features_transformed,columns=feature_labels).merge(data.drop(columns=['time_control_name','game_end_reason','lexicon','rating_mode','bot_name']),left_index=True,right_index=True,how='inner')
    return data,game_id

In [46]:
train_new, game_id_train = preprocessing(train_new)

In [47]:
train_new = train_new.drop(columns='NSWL20')

In [48]:
x_train, x_test, y_train, y_test = train_test_split(train_new.iloc[:,:28],train_new.iloc[:,28],test_size=0.15,random_state=42)

In [49]:
# ss = StandardScaler()
# x_train = ss.fit_transform(x_train)
# x_test = ss.fit_transform(x_test)

In [50]:
rfr = RandomForestRegressor(n_estimators=300,max_depth=20,n_jobs=-1)
rfr.fit(x_train,y_train)

RandomForestRegressor(max_depth=20, n_estimators=300, n_jobs=-1)

In [51]:
y_pred = rfr.predict(x_test)

In [52]:
print(np.sqrt(mean_squared_error(y_test,y_pred)),'\n',mean_absolute_error(y_test,y_pred))

107.2284760986435 
 69.98389298125768


In [53]:
print(np.sqrt(mean_squared_error(y_test,y_pred)),'\n',mean_absolute_error(y_test,y_pred))

107.2284760986435 
 69.98389298125768


In [54]:
test, game_id_test = preprocessing(test_new)

In [56]:
final = pd.concat([game_id_test,pd.Series(rfr.predict(test.drop(columns=['player_rating'])))],axis=1)

In [59]:
final.rename(columns={0:'rating'},inplace=True)

In [61]:
final.to_csv('my_sub.csv',index=False)

In [None]:
train.columns

Index(['game_id', 'nickname', 'score', 'rating', 'bot_player'], dtype='object')