<a href="https://colab.research.google.com/github/kazuma-kosuge/clone/blob/master/%E7%A4%BE%E4%BC%9A%E3%82%B7%E3%83%9F%E3%83%A5%E3%83%AC%E3%83%BC%E3%82%B7%E3%83%A7%E3%83%B3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import random as rnd
import numpy as np
import networkx as nx
import pandas as pd
import gym
import math

In [0]:
class Agent:
    def __init__(self,alpha_normal,gamma_normal):
        self.point = 0.0
        #pre_の初期値設定が以下二つ
        self.pre_state = 2#initializeで半分半分に定義しているため
        self.pre_action = None#Simulationのinitializeで定義している
        #self.next_strategy = None 
        #self.neighbors_id = []
        #学習率と割引率をエージェントごとに特有とする
        self.alpha = alpha_normal#0.2# #学習率
        self.gamma = 0.99 if gamma_normal > 0.99 else gamma_normal#0.99 #割引率
        #↑コメントアウトしてあるものはalphaとgammaを個体ごとで帰る場合
        self.q_table = np.random.uniform(low=-1, high=1, size=(5,2))

    def decide_next_strategy(self, now_state):
      #Q_tableからCorDを導出
      self.now_action = np.argmax(self.q_table[now_state])#CorD
      self.now_state = now_state #現状の状態を各個人が記憶する
      
    def update_strategy(self):#Qの更新を行う関数
      #now_action時の「即時報酬」
      Reward = self.point

      #Qtable更新
      self.q_table[self.pre_state,self.pre_action] = (1- self.alpha) * q_table[self.pre_state,self.pre_action] \
      + self.alpha * (Reward + self.gamma * self.q_table[self.now_state,self.now_action])

      #一回前の状態と行動を保持しておく
      self.pre_state = self.now_state
      self.pre_action = self.now_action

In [0]:
class Simulation:
  def __init__(self,population,average_degree):
    self.agents = self.__generate_agents(population, average_degree)
    self.initialize = self.__initialize()

  def __generate_agents(self,population,average_degree):
    rearange_edges = average_degree//2
    #network = nx.barabasi_albert_graph(population, rearange_edges)

    alpha_normal = np.random.normal(0.2,0.05,population)
    gamma_normal = np.random.normal(0.94,0.05,population)
    agents = [Agent(alpha_normal[id],gamma_normal[id]) for id in range(population)]
    #for index, focal in enumerate(agents):
      #neighbors_id = list(network[index])  # list()無しだとgeneratorになってしまうので注意
      #for agent_id in neighbors_id:
        #focal.neighbors_id.append(agent_id)
    return agents

  def __initialize(self):
    population = len(self.agents)
    initialize = rnd.sample(range(population), k = int(population/2))

    return initialize

  def __initialize_strategy(self):
      for index, focal in enumerate(self.agents):
        if index in self.initialize:#配列の中にindexがいたら
          focal.now_action = 0 #"C"
        else:
          focal.now_action = 1 #"D"

  def __count_payoff(self):
        #特定の回の全エージェントが獲得する利得を計算

        for focal in self.agents:
            focal.point = 0.0
            if focal.now_action == 0: #"C":
              focal.point = self.__count_fc()#self.sumC()/len(self.agents)
            else:
              focal.point = self.__count_fc() + 0.5 #(self.sumC()/len(self.agents)) + 0.5

  def __update_strategy(self):
        """全エージェントに戦略を更新させる"""
        now_state = self.__count_fc()
        #小数点切り捨て
        #state = int(math.floor(state))

        #stateを5段階にする
        if now_state <= 20:
          now_state = 0
        elif now_state > 20 and now_state <=40:
          now_state = 1
        elif now_state > 40 and now_state <=60:
          now_state = 2
        elif now_state >60 and now_state <= 80:
          now_state = 3
        else:
          now_state = 4

        for focal in self.agents:
            #pre_action = focal.strategy
            focal.decide_next_strategy(now_state)

        self.__count_payoff()

        for focal in self.agents:
            focal.update_strategy()

  def __count_fc(self):
        """C戦略エージェントの割合を計算"""
        #このagent.strategy==0はの0は"C"
        fc = (len([agent for agent in self.agents if agent.now_action == 0])/len(self.agents)) * 100
        return fc

  def __play_game(self, episode,Dr,Dg):
        """一つのパラメータ設定で協調率が収束するまで計算"""
        #初期化
        self.__initialize_strategy()
        #C戦略の割合計算
        fc = self.__count_fc()
        fc_hist = [fc]
        #print(f"Episode:{episode}, Dr:{Dr:.1f}, Dg:{Dg:.1f}, Time: 0, Fc:{fc:.3f}")
        print(f"Episode:{episode}, Time: 0, Fc:{fc:.3f}")

        #↑ここまでstateは存在せず、actionはinitializeによって決められている。↑
        #time0の際の初期設定だけ回している

        tmax = 3000
        for t in range(1,tmax):
            #self.__count_payoff()
            self.__update_strategy()
            fc = self.__count_fc()
            fc_hist.append(fc)
            print(f"Episode:{episode}, Time:{t}, Fc:{fc:.3f}")

            ##### 収束判定 #####
            # 100回以上戦略更新を繰り返し、過去100回のゲームで得られた協調率の平均値と次のゲームでの協調率の差が十分小さくなったら計算を打ち切る
            if (t >= 100 and np.absolute(np.mean(fc_hist[t-100:t-1]) - fc)/fc < 0.001) or t == tmax-1:
                fc_converged = np.mean(fc_hist[t-99:t])  # 過去100回分のゲームで得られた協調率の平均値を取る
                break
            
            #振動したら終わり
            elif t>= 50 and fc_hist[t] == fc_hist[t-2] and fc_hist[t] == fc_hist[t-4] and fc_hist[t] == fc_hist[t-6]:
              fc_converged = fc
              print("振動")
              break

            # 囚人のジレンマゲームでは全員C戦略 or 全員D戦略の状態に収束しやすいため、そうなったらすぐに計算を打ち切る
            elif fc in [0, 100]:
                fc_converged = fc
                break

        #print(f"Dr:{Dr:.1f}, Dg:{Dg:.1f}, Time:{t}, Fc:{fc_converged:.3f}")
        print(f" Time:{t}, Fc:{fc_converged:.3f}")

        return fc_converged
        
  def run_one_episode(self, episode):
    result = pd.DataFrame({'Dg': [], 'Dr': [], 'Fc': []})
    #self.__choose_initial_cooperators()

    for Dr in range(1):#np.arange(0, 1.1, 0.1)
      for Dg in range(1):#np.arange(0.1, 1.1, 0.1)
        fc_converged = self.__play_game(episode,Dr,Dg)
        new_result = pd.DataFrame([[format(Dg, '.1f'), format(Dr, '.1f'), fc_converged]], columns = ['Dg', 'Dr', 'Fc'])
        result = result.append(new_result)

    result.to_csv(f"episode{episode}.csv")

In [0]:
def run():
    population = 1000          # エージェント数
    average_degree = 8          # 社会ネットワークの平均次数
    num_episode = 7            # シミュレーションの試行回数
    simulation = Simulation(population, average_degree)

    for episode in range(num_episode):
        rnd.seed()
        simulation.run_one_episode(episode)

#下のq_tableだと全体で学習が共有される。alphaとgammaを大きくずらさない限り一瞬で収束する。
#現実の状態としては知の共有とかいえるかもしれないけど、今回は転職でやってるのでなし
#q_table = np.random.uniform(low=-1, high=1, size=(5,2))
if __name__ == '__main__':
    run()

Episode:0, Time: 0, Fc:50.000
Episode:0, Time:1, Fc:48.900
Episode:0, Time:2, Fc:0.000
 Time:2, Fc:0.000
Episode:1, Time: 0, Fc:50.000
Episode:1, Time:1, Fc:51.100
Episode:1, Time:2, Fc:0.000
 Time:2, Fc:0.000
Episode:2, Time: 0, Fc:50.000
Episode:2, Time:1, Fc:0.000
 Time:1, Fc:0.000
Episode:3, Time: 0, Fc:50.000
Episode:3, Time:1, Fc:17.500
Episode:3, Time:2, Fc:51.400
Episode:3, Time:3, Fc:17.500
Episode:3, Time:4, Fc:51.400
Episode:3, Time:5, Fc:17.500
Episode:3, Time:6, Fc:51.400
Episode:3, Time:7, Fc:17.500
Episode:3, Time:8, Fc:51.400
Episode:3, Time:9, Fc:17.500
Episode:3, Time:10, Fc:51.400
Episode:3, Time:11, Fc:17.500
Episode:3, Time:12, Fc:51.400
Episode:3, Time:13, Fc:17.500
Episode:3, Time:14, Fc:51.400
Episode:3, Time:15, Fc:17.500
Episode:3, Time:16, Fc:51.400
Episode:3, Time:17, Fc:17.500
Episode:3, Time:18, Fc:51.400
Episode:3, Time:19, Fc:17.500
Episode:3, Time:20, Fc:51.400
Episode:3, Time:21, Fc:17.500
Episode:3, Time:22, Fc:51.400
Episode:3, Time:23, Fc:17.500
Epi

KeyboardInterrupt: ignored