# 0. 세팅

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import random
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

game_round = 10
strategies = ["Copycat", "All Cooperate", "All Cheat", "Grudger", "Detective", 
              "Copykitten", "Simpleton", "Random", "Cheat-Downing", "Cooperate-Downing", 
              "Joss", "Cheat-Tester", "Cooperate-Tester", "Tranquilizer", "Gradual", 
              "Prober", "Pavlov", "Mistrust", "Per-Kind", "Per-Nasty"]

In [None]:
class REINFORCE(nn.Module):
    def __init__(self):
        super(REINFORCE, self).__init__()
        self.data = []
        
        self.fc1 = nn.Linear(game_round * 2, 128)
        self.fc2 = nn.Linear(128, 2)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.softmax(self.fc2(x), dim=0)
        return x

In [None]:
def reward(my_action, opponent_action):
    if [my_action, opponent_action] == [1, 1]: # 협력, 협력
        return 2
    elif [my_action, opponent_action] == [1, 0]: # 협력, 배신
        return -1
    elif [my_action, opponent_action] == [0, 1]: # 배신, 협력
        return 3
    elif [my_action, opponent_action] == [0, 0]: # 배신, 배신
        return 0

In [None]:
def get_action_of_agent(agent, state, order):
    if order == 1:
        prob = agent(torch.tensor(state).float())
    elif order == 2:
        state_ = []
        for i in range(0, len(state), 2):
            state_.append(state[i+1])
            state_.append(state[i])
        prob = agent(torch.tensor(state_).float())

    return int(torch.argmax(prob))

In [None]:
def get_action_of_strategy(state, strategy, order):
    my_state = []
    opp_state = []
    for i, s in enumerate(state):
        if s == -1:
            cur_game_round = i // 2
            break
        if order == 1:
            if i % 2 == 0:
                my_state.append(s)
            else:
                opp_state.append(s)
        elif order == 2:
            if i % 2 == 0:
                opp_state.append(s)
            else:
                my_state.append(s)

    if strategy == "Copycat":
        if cur_game_round == 0:
            my_action = 1
        else:
            prev_opp_action = opp_state[-1]
            my_action = prev_opp_action

    elif strategy == "All Cooperate":
        my_action = 1

    elif strategy == "All Cheat":
        my_action = 0

    elif strategy == "Grudger":
        if cur_game_round == 0:
            my_action = 1
        else:
            my_action = 1 if 0 not in opp_state else 0

    elif strategy == "Detective":
        if cur_game_round in [0, 2, 3]:
            my_action = 1
        elif cur_game_round == 1:
            my_action = 0
        else:
            if 0 in opp_state[:4]:
                prev_opp_action = opp_state[-1]
                my_action = prev_opp_action
            else:
                my_action = 0

    elif strategy == "Copykitten":
        if cur_game_round in [0, 1]:
            my_action = 1
        else:
            prev_prev_opp_action, prev_opp_action = opp_state[-2], opp_state[-1]
            my_action = 0 if [prev_prev_opp_action, prev_opp_action] == [0, 0] else 1

    elif strategy == "Simpleton":
        if cur_game_round == 0:
            my_action = 1
        else:
            prev_action, prev_opp_action = my_state[-1], opp_state[-1]
            if prev_opp_action == 0:
                if prev_action == 0:
                    my_action = 1
                else:
                    my_action = 0
            else:
                my_action = prev_action

    elif strategy == "Random":
        my_action = random.randint(0, 1)

    elif strategy == "Cheat-Downing":
        opp_coop, opp_cheat = 0, 0
        for os in opp_state:      
            if os == 0:
                opp_cheat += 1
            else:
                opp_coop += 1
        if opp_cheat >= opp_coop:
            my_action = 0
        else:
            my_action = 1                

    elif strategy == "Cooperate-Downing":
        opp_coop, opp_cheat = 0, 0
        for os in opp_state:      
            if os == 0:
                opp_cheat += 1
            else:
                opp_coop += 1
        if opp_cheat > opp_coop:
            my_action = 0
        else:
            my_action = 1
    
    elif strategy == "Joss":
        if cur_game_round == 0:
            prob = random.randint(0, 9)
            if prob == 0:
                my_action = 0
            else:
              my_action = 1
        else:
            prev_opp_action = opp_state[-1]
            my_action = prev_opp_action

    elif strategy == "Cheat-Tester":
        if cur_game_round < game_round // 2:
            my_action = random.randint(0, 1)
        else:
            opp_coop, opp_cheat = 0, 0
            for i in range(game_round // 2):      
                if opp_state[i] == 0:
                    opp_cheat += 1
                else:
                    opp_coop += 1
            if opp_cheat >= opp_coop:
                my_action = 0
            else:
                my_action = 1

    elif strategy == "Cooperate-Tester":
        if cur_game_round < game_round // 2:
            my_action = random.randint(0, 1)
        else:
            opp_coop, opp_cheat = 0, 0
            for i in range(game_round // 2):      
                if opp_state[i] == 0:
                    opp_cheat += 1
                else:
                    opp_coop += 1
            if opp_cheat > opp_coop:
                my_action = 0
            else:
                my_action = 1

    elif strategy == "Tranquilizer":
        my_coop, my_cheat = 0, 0
        for ms in my_state:
            if ms == 0:
                my_cheat += 1
            else:
                my_coop += 1
        if (my_cheat + 1) / (my_coop + my_cheat + 1) < 0.25:
            my_action = 0
        else:
            my_action = 1

    elif strategy == "Gradual":
        if cur_game_round == 0:
              my_action = 1
        else:
            opp_cheat = 0
            for os in opp_state:
                if os == 0:
                    opp_cheat += 1
            x, cheat_sum = 1, 0
            for oc in range(opp_cheat):
                cheat_sum += x
                x += 1
            my_cheat = 0
            for ms in my_state:
                if ms == 0:
                    my_cheat += 1
            if cheat_sum > my_cheat:
                my_action = 0
            else:
                my_action = 1

    elif strategy == "Prober":
        if cur_game_round in [0, 1, 2]:
            if cur_game_round == 0:
                my_action = 1
            else:
                my_action = 0
        else:
            prev_three_round = [3, 4, 5]
            while True:
                if cur_game_round in prev_three_round:
                    for i in range(3):
                        prev_three_round[i] -= 3
                    break
                else:
                    for i in range(3):
                        prev_three_round[i] += 3
            if opp_state[prev_three_round[1]] == 1 and opp_state[prev_three_round[2]] == 0:
                my_action = opp_state[-1]
            else:
                if cur_game_round % 3 == 0:
                    my_action = 0
                else:
                    my_action = 1

    elif strategy == "Pavlov":
        if cur_game_round == 0:
            my_action = 1
        else:
            if my_state[-1] == opp_state[-1]:
                my_action = 1
            else:
                my_action = 0
    
    elif strategy == "Mistrust":
        if cur_game_round == 0:
            my_action = 0
        else:
            prev_opp_action = opp_state[-1]
            my_action = prev_opp_action

    elif strategy == "Per-Kind":
        if cur_game_round % 3 in [0, 1]:
            my_action = 1
        else:
            my_action = 0

    elif strategy == "Per-Nasty":
        if cur_game_round % 3 == 0:
            my_action = 1
        else:
            my_action = 0

    return my_action

# 1. 게임

In [None]:
from itertools import combinations
from collections import defaultdict
import random
import copy

tournaments = 100
candidates = copy.deepcopy(strategies)
limit = 5
for t in range(tournaments):
    random.shuffle(candidates)
    score_dict = defaultdict(int)
    for i in range(len(candidates)):
        candidates[i] = candidates[i] + "_" + str(i)
    
    for cand1, cand2 in list(combinations(candidates, 2)):
        state = [-1 for _ in range(game_round * 2)]
        cur_game_round = 1
        while cur_game_round <= game_round:
            cand1_action = get_action_of_strategy(state, cand1.split("_")[0], 1)
            cand2_action = get_action_of_strategy(state, cand2.split("_")[0], 2)

            cand1_reward = reward(cand1_action, cand2_action)
            cand2_reward = reward(cand2_action, cand1_action)
            score_dict[cand1] += cand1_reward
            score_dict[cand2] += cand2_reward

            state[(cur_game_round-1) * 2] = cand1_action
            state[((cur_game_round-1) * 2) + 1] = cand2_action

            cur_game_round += 1
    
    print('----Tournament <' + str(t+1) + '>----')
    print('Rank : ')
    rank = sorted(score_dict.items(), key=lambda x:-x[1])
    print(rank)

    print('\nReproduce : ')
    candidates_ = []
    for i in range(limit):
        print('\t' + rank[i][0].split("_")[0])
        candidates_.append(rank[i][0].split("_")[0])
        candidates_.append(rank[i][0].split("_")[0])

    # print('\nCopy : ')
    for i in range(limit, len(candidates) - limit):
    #     print('\t' + rank[i][0].split("_")[0])
        candidates_.append(rank[i][0].split("_")[0])

    print('\nEliminate')
    for i in range(len(candidates) - limit, len(candidates)):
        print('\t' + rank[i][0].split("_")[0])

    candidates = candidates_
    print('\n')

print('\nResult : ')
print(candidates)

----Tournament <1>----
Rank : 
[('Gradual_3', 269), ('Copycat_0', 265), ('Cooperate-Downing_17', 265), ('Joss_6', 262), ('All Cooperate_14', 254), ('Pavlov_4', 253), ('Simpleton_16', 252), ('Per-Kind_15', 248), ('Copykitten_11', 247), ('Grudger_10', 244), ('Detective_12', 240), ('Random_18', 227), ('Tranquilizer_19', 226), ('Mistrust_5', 208), ('Cheat-Tester_9', 207), ('Cooperate-Tester_1', 197), ('All Cheat_13', 189), ('Per-Nasty_8', 185), ('Cheat-Downing_7', 181), ('Prober_2', 173)]

Reproduce : 
	Gradual
	Copycat
	Cooperate-Downing
	Joss
	All Cooperate

Eliminate
	Cooperate-Tester
	All Cheat
	Per-Nasty
	Cheat-Downing
	Prober


----Tournament <2>----
Rank : 
[('Copycat_7', 348), ('Copycat_13', 341), ('Gradual_19', 334), ('Simpleton_14', 331), ('Joss_4', 330), ('Pavlov_1', 326), ('Gradual_10', 326), ('Cooperate-Downing_5', 324), ('Copykitten_15', 318), ('All Cooperate_0', 317), ('Cooperate-Downing_2', 315), ('All Cooperate_3', 314), ('Detective_11', 302), ('Joss_12', 301), ('Grudger_9

In [None]:
agent = REINFORCE()
agent.load_state_dict(torch.load('/content/drive/My Drive/Colab Notebooks/model.pth'))

tournaments = 10
candidates = copy.deepcopy(strategies + ["Agent"])
limit = 5
for t in range(tournaments):
    score_dict = defaultdict(int)
    random.shuffle(candidates)
    for i in range(len(candidates)):
        candidates[i] = candidates[i] + "_" + str(i)

    for cand1, cand2 in list(combinations(candidates, 2)):
        state = [-1 for _ in range(game_round * 2)]
        cur_game_round = 1
        while cur_game_round <= game_round:
            if cand1.split("_")[0] == "Agent": 
                cand1_action = get_action_of_agent(agent, state, 1)
            else:
                cand1_action = get_action_of_strategy(state, cand1.split("_")[0], 1)

            if cand2.split("_")[0] == "Agent": 
                cand2_action = get_action_of_agent(agent, state, 2)
            else:
                cand2_action = get_action_of_strategy(state, cand2.split("_")[0], 2)

            cand1_reward = reward(cand1_action, cand2_action)
            cand2_reward = reward(cand2_action, cand1_action)
            score_dict[cand1] += cand1_reward
            score_dict[cand2] += cand2_reward

            state[(cur_game_round-1) * 2] = cand1_action
            state[((cur_game_round-1) * 2) + 1] = cand2_action

            cur_game_round += 1

    print('----Tournament <' + str(t+1) + '>----')
    print('Rank : ')
    rank = sorted(score_dict.items(), key=lambda x:-x[1])
    print(rank)

    print('\nReproduce : ')
    candidates_ = []
    for i in range(limit):
        print('\t' + rank[i][0].split("_")[0])
        candidates_.append(rank[i][0].split("_")[0])
        candidates_.append(rank[i][0].split("_")[0])

    # print('\nCopy : ')
    for i in range(limit, len(candidates) - limit):
    #     print('\t' + rank[i][0].split("_")[0])
        candidates_.append(rank[i][0].split("_")[0])

    print('\nEliminate')
    for i in range(len(candidates) - limit, len(candidates)):
        print('\t' + rank[i][0].split("_")[0])

    candidates = candidates_
    print('\n')

print('\nResult : ')
print(candidates)

----Tournament <1>----
Rank : 
[('Agent_2', 338), ('Copycat_5', 300), ('Gradual_4', 292), ('Joss_1', 285), ('Copykitten_17', 285), ('Cooperate-Downing_11', 282), ('Grudger_8', 281), ('Detective_10', 271), ('Simpleton_3', 266), ('Pavlov_20', 254), ('All Cooperate_14', 253), ('Tranquilizer_12', 248), ('Per-Kind_6', 244), ('Cheat-Tester_15', 220), ('Cooperate-Tester_19', 217), ('Mistrust_13', 207), ('All Cheat_16', 198), ('Cheat-Downing_9', 197), ('Prober_0', 187), ('Per-Nasty_7', 184), ('Random_18', 183)]

Reproduce : 
	Agent
	Copycat
	Gradual
	Joss
	Copykitten

Eliminate
	All Cheat
	Cheat-Downing
	Prober
	Per-Nasty
	Random


----Tournament <2>----
Rank : 
[('Agent_20', 393), ('Agent_1', 391), ('Copycat_13', 366), ('Joss_17', 366), ('Copycat_7', 364), ('Grudger_3', 353), ('Joss_11', 351), ('All Cooperate_16', 349), ('Gradual_10', 347), ('Copykitten_12', 347), ('Copykitten_4', 344), ('Pavlov_8', 343), ('Gradual_15', 329), ('Simpleton_2', 327), ('Cooperate-Downing_9', 315), ('Detective_14'