In [None]:
import random
import numpy as np
import pandas as pd
import lightgbm as lgb

history = {
    "action":     [0, 1, 2, 0], # my action records
    "opponent":   [0, 1, 2], # opponent action records
    "input_2seq" : pd.DataFrame([[0, 0, 1, 1]]), # dataframe for 2seq-predictor
    "input_3seq" : pd.DataFrame([0, 0, 1, 1, 2, 2]), # dataframe for 3seq-predictor
    "target": pd.Series([0]),
    "target_2" : pd.Series([0]),
    "strategy_weight" : [1, 1, 1, 1,  # 2seq, 3seq, 2seq_counter, 3seq_counter
                         0.5, 0.5, 0.5, 0.5,  # 2seq_2, 3seq_2, 2seq_counter_2, 3seq_counter_2
                         0.25, 0.25, 0.25, 0.25,  # 2seq_3, 3seq_3, 2seq_counter_3, 3seq_counter_3
                         0.1], # random strategy
    "last_strategy" : "random" # my strategy in last match
}

def check_win(my_action, opponent_action):
    if ((my_action == 2 and opponent_action == 1) or
        (my_action == 1 and opponent_action == 0) or
        (my_action == 0 and opponent_action == 2)):
        return True

def check_lose(my_action, opponent_action):
    if ((my_action == 2 and opponent_action == 0) or
        (my_action == 1 and opponent_action == 2) or
        (my_action == 0 and opponent_action == 1)):
        return True    
    

def my_agent(observation, configuration):    
    global history
    
    change_rate = 1.25 # weight change rate of strategy
    sub_change_rate = 1.1 # weight change rate of related strategy
    confirm_thr = 0.50 # confirm method threshold
    draw_penalty = 1.01 # weight change rate of strategy when last game is draw
    safe_thr = 0.15 # safe strategy threshold
    num_records = 50 # number of using records for training
    
    # lightGBM parameters
    params = {
        'objective': 'multiclass',
        'num_class': 3,
        'max_depth' : 6,
        'num_iterations' : 10,
        'learning_rate' : 0.1,
        'verbosity' : -1
        }
    
    opponent_action = observation.lastOpponentAction if observation.step > 0 else 0
    
    history['opponent'].append(opponent_action)
    
    # change weight of strategy
    
    # 2seq
    if history['last_strategy'] == "2seq":
        if check_win(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][0] = history["strategy_weight"][0] * change_rate
            history["strategy_weight"][8] = history["strategy_weight"][8] / sub_change_rate
        
        elif check_lose(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][0] = history["strategy_weight"][0] / change_rate
            history["strategy_weight"][4] = history["strategy_weight"][4] * sub_change_rate
            
        elif history["action"][-1] == history["opponent"][-1]:
            history["strategy_weight"][4] = history["strategy_weight"][4] / sub_change_rate
            history["strategy_weight"][8] = history["strategy_weight"][8] * sub_change_rate
            history["strategy_weight"][0] = history["strategy_weight"][0] * draw_penalty
            
    # 3seq
    elif history['last_strategy'] == "3seq":
        if check_win(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][1] = history["strategy_weight"][1] * change_rate
            history["strategy_weight"][9] = history["strategy_weight"][9] / sub_change_rate
        
        elif check_lose(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][1] = history["strategy_weight"][1] / change_rate
            history["strategy_weight"][5] = history["strategy_weight"][5] * sub_change_rate
        
        elif history["action"][-1] == history["opponent"][-1]:
            history["strategy_weight"][5] = history["strategy_weight"][5] / sub_change_rate
            history["strategy_weight"][9] = history["strategy_weight"][9] * sub_change_rate
            history["strategy_weight"][1] = history["strategy_weight"][1] * draw_penalty
            
    # 2seq counter
    elif history['last_strategy'] == "2seq_counter":
        if check_win(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][2] = history["strategy_weight"][2] * change_rate
            history["strategy_weight"][10] = history["strategy_weight"][10] / sub_change_rate
        
        elif check_lose(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][2] = history["strategy_weight"][2] / change_rate
            history["strategy_weight"][6] = history["strategy_weight"][6] * sub_change_rate
            
        elif history["action"][-1] == history["opponent"][-1]:
            history["strategy_weight"][6] = history["strategy_weight"][6] / sub_change_rate
            history["strategy_weight"][10] = history["strategy_weight"][10] * sub_change_rate
            history["strategy_weight"][2] = history["strategy_weight"][2] * draw_penalty
            
    # 3seq counter
    elif history['last_strategy'] == "3seq_counter":
        if check_win(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][3] = history["strategy_weight"][3] * change_rate
            history["strategy_weight"][11] = history["strategy_weight"][11] / sub_change_rate
            
        elif check_lose(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][3] = history["strategy_weight"][3] / change_rate
            history["strategy_weight"][7] = history["strategy_weight"][7] * sub_change_rate
            
        elif history["action"][-1] == history["opponent"][-1]:
            history["strategy_weight"][7] = history["strategy_weight"][7] / sub_change_rate
            history["strategy_weight"][11] = history["strategy_weight"][11] * sub_change_rate
            history["strategy_weight"][3] = history["strategy_weight"][3] * draw_penalty
            
    # 2seq_2
    elif history['last_strategy'] == "2seq_2":
        if check_win(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][4] = history["strategy_weight"][4] * change_rate
            history["strategy_weight"][0] = history["strategy_weight"][0] / sub_change_rate
        
        elif check_lose(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][4] = history["strategy_weight"][4] / change_rate
            history["strategy_weight"][8] = history["strategy_weight"][8] * sub_change_rate
        
        elif history["action"][-1] == history["opponent"][-1]:
            history["strategy_weight"][0] = history["strategy_weight"][0] * sub_change_rate
            history["strategy_weight"][8] = history["strategy_weight"][8] / sub_change_rate
            history["strategy_weight"][4] = history["strategy_weight"][4] * draw_penalty
            
    # 3seq_2
    elif history['last_strategy'] == "3seq_2":
        if check_win(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][5] = history["strategy_weight"][5] * change_rate
            history["strategy_weight"][1] = history["strategy_weight"][1] / sub_change_rate
        
        elif check_lose(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][5] = history["strategy_weight"][5] / change_rate
            history["strategy_weight"][9] = history["strategy_weight"][9] * sub_change_rate
            
        elif history["action"][-1] == history["opponent"][-1]:
            history["strategy_weight"][1] = history["strategy_weight"][1] * sub_change_rate
            history["strategy_weight"][9] = history["strategy_weight"][9] / sub_change_rate
            history["strategy_weight"][5] = history["strategy_weight"][5] * draw_penalty
    
    # 2seq counter_2
    elif history['last_strategy'] == "2seq_counter_2":
        if check_win(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][6] = history["strategy_weight"][6] * change_rate
            history["strategy_weight"][2] = history["strategy_weight"][2] / sub_change_rate
        
        elif check_lose(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][6] = history["strategy_weight"][6] / change_rate
            history["strategy_weight"][10] = history["strategy_weight"][10] * sub_change_rate
            
        elif history["action"][-1] == history["opponent"][-1]:
            history["strategy_weight"][2] = history["strategy_weight"][2] * sub_change_rate
            history["strategy_weight"][10] = history["strategy_weight"][10] / sub_change_rate
            history["strategy_weight"][6] = history["strategy_weight"][6] * draw_penalty
    
    # 3seq counter_2
    elif history['last_strategy'] == "3seq_counter_2":
        if check_win(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][7] = history["strategy_weight"][7] * change_rate
            history["strategy_weight"][3] = history["strategy_weight"][3] / sub_change_rate
        
        elif check_lose(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][7] = history["strategy_weight"][7] / change_rate
            history["strategy_weight"][11] = history["strategy_weight"][11] * sub_change_rate
            
        elif history["action"][-1] == history["opponent"][-1]:
            history["strategy_weight"][3] = history["strategy_weight"][3] * sub_change_rate
            history["strategy_weight"][11] = history["strategy_weight"][11] / sub_change_rate
            history["strategy_weight"][7] = history["strategy_weight"][7] * draw_penalty
    
    # 2seq_3
    elif history['last_strategy'] == "2seq_3":
        if check_win(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][8] = history["strategy_weight"][8] * change_rate
            history["strategy_weight"][4] = history["strategy_weight"][4] / sub_change_rate
        
        elif check_lose(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][8] = history["strategy_weight"][8] / change_rate
            history["strategy_weight"][0] = history["strategy_weight"][0] * sub_change_rate
            
        elif history["action"][-1] == history["opponent"][-1]:
            history["strategy_weight"][4] = history["strategy_weight"][4] * sub_change_rate
            history["strategy_weight"][0] = history["strategy_weight"][0] / sub_change_rate
            history["strategy_weight"][8] = history["strategy_weight"][8] * draw_penalty
    
    # 3seq_3
    elif history['last_strategy'] == "3seq_3":
        if check_win(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][9] = history["strategy_weight"][9] * change_rate
            history["strategy_weight"][5] = history["strategy_weight"][5] / sub_change_rate
        
        elif check_lose(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][9] = history["strategy_weight"][9] / change_rate
            history["strategy_weight"][1] = history["strategy_weight"][1] * sub_change_rate
            
        elif history["action"][-1] == history["opponent"][-1]:
            history["strategy_weight"][5] = history["strategy_weight"][5] * sub_change_rate
            history["strategy_weight"][1] = history["strategy_weight"][1] / sub_change_rate
            history["strategy_weight"][9] = history["strategy_weight"][9] * draw_penalty
    
    # 2seq counter_3
    elif history['last_strategy'] == "2seq_counter_3":
        if check_win(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][10] = history["strategy_weight"][10] * change_rate
            history["strategy_weight"][6] = history["strategy_weight"][6] / sub_change_rate
        
        elif check_lose(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][10] = history["strategy_weight"][10] / change_rate
            history["strategy_weight"][2] = history["strategy_weight"][2] * sub_change_rate
            
        elif history["action"][-1] == history["opponent"][-1]:
            history["strategy_weight"][6] = history["strategy_weight"][6] * sub_change_rate
            history["strategy_weight"][2] = history["strategy_weight"][2] / sub_change_rate
            history["strategy_weight"][10] = history["strategy_weight"][10] * draw_penalty
    
    # 2seq counter_3
    elif history['last_strategy'] == "3seq_counter_3":
        if check_win(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][11] = history["strategy_weight"][11] * change_rate
            history["strategy_weight"][7] = history["strategy_weight"][7] / sub_change_rate
        
        elif check_lose(history["action"][-1], history["opponent"][-1]):
            history["strategy_weight"][11] = history["strategy_weight"][11] / change_rate
            history["strategy_weight"][3] = history["strategy_weight"][3] * sub_change_rate
            
        elif history["action"][-1] == history["opponent"][-1]:
            history["strategy_weight"][7] = history["strategy_weight"][7] * sub_change_rate
            history["strategy_weight"][3] = history["strategy_weight"][3] / sub_change_rate
            history["strategy_weight"][11] = history["strategy_weight"][11] * draw_penalty        
    
    
    # initialize last_strategy
    history["last_strategy"] = "random"
    
    # log save
    history['input_2seq'] = history['input_2seq'].append([[history['action'][-3], history['opponent'][-3],
                                                           history['action'][-2], history['opponent'][-2]]])
    history['input_3seq'] = history['input_3seq'].append([[history['action'][-4], history['opponent'][-4],
                                                           history['action'][-3], history['opponent'][-3],
                                                           history['action'][-2], history['opponent'][-2]]])
    
    history['target'] = history['target'].append(pd.Series([history['opponent'][-1]]))
    history['target_2'] = history['target_2'].append(pd.Series([history['action'][-1]]))
    
    # save last "num_records" games
    history['input_2seq'] = history['input_2seq'].tail(num_records)
    history['input_3seq'] = history['input_3seq'].tail(num_records)
    history['target'] = history['target'].tail(num_records)
    history['target_2'] = history['target_2'].tail(num_records)
    
    
    # strategy select
    if observation.step < num_records:
        action = random.randint(0, 2)
    else:
        # choose strategy
        strategy = random.choices(["2seq", "3seq", 
                                   "2seq_counter", "3seq_counter", 
                                   "2seq_2", "3seq_2",
                                   "2seq_counter_2", "3seq_counter_2",
                                   "2seq_3", "3seq_3",
                                   "2seq_counter_3", "3seq_counter_3",
                                   "random"], weights=history["strategy_weight"])[0]
        
        # predict next opponent move by lightGBM(input last 2 matches)
        if strategy == "2seq" or strategy == "2seq_2" or strategy == "2seq_3":
            X_train = history['input_2seq']       
            y_train = history['target']
        
            # lgb training
            dataset = lgb.Dataset(X_train, y_train)
        
            model = lgb.train(params, dataset)
        
            # prediction
            i = len(history['opponent']) - 2
            predict_input = [[history['action'][i], history['opponent'][i],
                              history['action'][i+1], history['opponent'][i+1]]]
            predicted_opponent_move_proba = model.predict(predict_input)
            predicted_opponent_move = np.argmax(predicted_opponent_move_proba)
            
            # select action
            if np.max(predicted_opponent_move_proba) > confirm_thr:
                history["last_strategy"] = strategy
                
                if strategy == "2seq":
                    action = int((predicted_opponent_move + 1) % 3)
                elif strategy == "2seq_2":
                    action = int(predicted_opponent_move % 3)
                elif strategy == "2seq_3":
                    action = int((predicted_opponent_move + 2) % 3)
            elif (np.min(predicted_opponent_move_proba) < safe_thr) and (strategy == "2seq"):
                not_predicted_move = np.argmin(predicted_opponent_move_proba)
                history["last_strategy"] = strategy
                action = int((not_predicted_move + 2) % 3)
            else:
                action = random.randint(0, 2)
                
                
            # debug code
            print(observation.step)
            print(history["last_strategy"])
            print(history["strategy_weight"])
            print("opponent_move_probability", predicted_opponent_move_proba)
            print(predicted_opponent_move, action)
        
        # predict next opponent move by lightGBM(input last 3 matches)
        elif strategy == "3seq" or strategy == "3seq_2" or strategy == "3seq_3":
            X_train = history['input_3seq']
            y_train = history['target']
        
        
            # lgb training
            dataset = lgb.Dataset(X_train, y_train)
        
            model = lgb.train(params, dataset)
        
            # prediction
            i = len(history['opponent']) - 3
            predict_input = [[history['action'][i], history['opponent'][i],
                              history['action'][i+1], history['opponent'][i+1],
                              history['action'][i+2], history['opponent'][i+2]]]
            predicted_opponent_move_proba = model.predict(predict_input)
            predicted_opponent_move = np.argmax(predicted_opponent_move_proba)
            
            # select action
            if np.max(predicted_opponent_move_proba) > confirm_thr:
                history["last_strategy"] = strategy
                
                if strategy == "3seq":
                    action = int((predicted_opponent_move + 1) % 3)
                elif strategy == "3seq_2":
                    action = int(predicted_opponent_move % 3)
                elif strategy == "3seq_3":
                    action = int((predicted_opponent_move + 2) % 3)
            elif (np.min(predicted_opponent_move_proba) < safe_thr) and (strategy == "3seq"):
                not_predicted_move = np.argmin(predicted_opponent_move_proba)
                history["last_strategy"] = strategy
                action = int((not_predicted_move + 2) % 3)
            else:
                action = random.randint(0, 2)
                
            # debug code
            print(observation.step)
            print(history["last_strategy"])
            print(history["strategy_weight"])
            print("opponent_move_probability", predicted_opponent_move_proba)
            print(predicted_opponent_move, action)
        
        # predict next my move by lightGBM(input last 2 matches)
        elif strategy == "2seq_counter" or strategy == "2seq_counter_2" or strategy == "2seq_counter_3":
            X_train = history['input_2seq']       
            y_train = history['target_2']
        
            # lgb training
            dataset = lgb.Dataset(X_train, y_train)
        
            model = lgb.train(params, dataset)
        
            # prediction
            i = len(history['opponent']) - 2
            predict_input = [[history['action'][i], history['opponent'][i],
                              history['action'][i+1], history['opponent'][i+1]]]
            predicted_my_move_proba = model.predict(predict_input)
            predicted_my_move = np.argmax(predicted_my_move_proba)
            
            # select action
            if np.max(predicted_my_move_proba) > confirm_thr:
                history["last_strategy"] = strategy
                if strategy == "2seq_counter":
                    action = int((predicted_my_move + 2) % 3)
                elif strategy == "2seq_counter_2":
                    action = int((predicted_my_move + 1) % 3)
                elif strategy == "2seq_counter_3":
                    action = int((predicted_my_move) % 3)
                    
            else:
                action = random.randint(0, 2)
                
            # debug code
            print(observation.step)
            print(history["last_strategy"])
            print(history["strategy_weight"])
            print("my_move_probability", predicted_my_move_proba)
            print(predicted_my_move, action)
        
        # predict next my move by lightGBM(input last 3 matches)
        elif strategy == "3seq_counter" or strategy == "3seq_counter_2" or strategy == "3seq_counter_3":
            X_train = history['input_3seq']       
            y_train = history['target_2']
        
            # lgb training
            dataset = lgb.Dataset(X_train, y_train)

            model = lgb.train(params, dataset)
        
            # prediction
            i = len(history['opponent']) - 3
            predict_input = [[history['action'][i], history['opponent'][i],
                              history['action'][i+1], history['opponent'][i+1],
                              history['action'][i+2], history['opponent'][i+2]]]
            predicted_my_move_proba = model.predict(predict_input)
            predicted_my_move = np.argmax(predicted_my_move_proba)
            
            # select action
            if np.max(predicted_my_move_proba) > confirm_thr:
                history["last_strategy"] = strategy
                if strategy == "3seq_counter":
                    action = int((predicted_my_move + 2) % 3)
                elif strategy == "3seq_counter_2":
                    action = int((predicted_my_move + 1) % 3)
                elif strategy == "3seq_counter_3":
                    action = int((predicted_my_move) % 3)
            else:
                action = random.randint(0, 2)
                
            # debug code
            print(observation.step)
            print(history["last_strategy"])
            print(history["strategy_weight"])
            print("my_move_probability", predicted_my_move_proba)
            print(predicted_my_move, action)


        # random move
        else:
            action = random.randint(0, 2)
            print(observation.step)
            print(history["last_strategy"])
            print(history["strategy_weight"])
            print(action)
    
    # Record action
    history['action'].append(action)
    
    return action