In [None]:
pip install pygame==1.9.6
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pygame
import pandas as pd
import random
import time
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import math
import cv2
from google.colab.patches import cv2_imshow
from google.colab import output

In [None]:
# window size
WIDTH = 530
HEIGHT = 360
FPS = 120 # how fast game is
EPISODENUM = 3

# colors
WHITE = (255,255,255)
BLACK = (0,0,0)
RED = (255, 0, 0) # RGB
GREEN = (0, 255, 0)
BLUE = (0, 0, 255)
YELLOW = (255,255,0)

In [None]:
#player
class Player(pygame.sprite.Sprite):
    def __init__(self):
        pygame.sprite.Sprite.__init__(self)
        self.image = pygame.Surface((25,25))
        self.image.fill(WHITE)

        self.rect = self.image.get_rect()
        self.rect.centerx = 15
        self.rect.centery = HEIGHT/2

        self.x_speed = 0
        self.y_speed = 0

    def update(self,action):
        # Controls
        key_state = pygame.key.get_pressed()

        if key_state[pygame.K_w] or action == 0:
            self.y_speed = -20

        elif key_state[pygame.K_s] or action == 1:
            self.y_speed = 20

        elif key_state[pygame.K_SPACE] or action == 2:
            self.y_speed = 0
            self.x_speed = 0

        #Control update
        self.rect.x += self.x_speed
        self.rect.y += self.y_speed

        self.x_speed = 0
        self.y_speed = 0

        #Screeen options
        if self.rect.right > WIDTH:
            self.rect.right = WIDTH

        if self.rect.left < 0:
            self.rect.left = 0

        if self.rect.top < 0:
            self.rect.top = 0

        if self.rect.bottom > HEIGHT:
            self.rect.bottom = HEIGHT

    def getCordinates(self):
        return (self.rect.x,self.rect.y)

**ENEMY**

In [None]:
class Enemy(pygame.sprite.Sprite):
    def __init__(self):
        pygame.sprite.Sprite.__init__(self)
        self.image = pygame.Surface((20,20))
        self.image.fill(RED)

        self.rect = self.image.get_rect()
        self.y_center = random.randint(10,HEIGHT-10)
        self.x_center = random.randint(WIDTH-265,WIDTH-10)
        self.rect.center = (self.x_center,self.y_center)

        self.x_speed = -10
        self.y_speed = 0

    def update(self):
        self.rect.x += self.x_speed

        if self.rect.left < 0:
            self.y_center = random.randint(10,HEIGHT-10)
            self.x_center = random.randint(WIDTH-265,WIDTH-10)
            self.rect.center = (self.x_center,self.y_center)

    def getCordinates(self):
        return (self.rect.x,self.rect.y)

**FRIEND**

In [None]:
class Friend(pygame.sprite.Sprite):
    def __init__(self):
        pygame.sprite.Sprite.__init__(self)
        self.image = pygame.Surface((20,20))
        self.image.fill(YELLOW)

        self.rect = self.image.get_rect()
        self.y_center = random.randint(10,HEIGHT-10)
        self.x_center = random.randint(WIDTH-265,WIDTH-10)
        self.rect.center = (self.x_center,self.y_center)

        self.x_speed = -10

    def update(self):
        self.rect.x += self.x_speed

        if self.rect.left < 0:
            self.y_center = random.randint(10,HEIGHT-10)
            self.x_center = random.randint(WIDTH-265,WIDTH-10)
            self.rect.center = (self.x_center,self.y_center)

    def getCordinates(self):
        return (self.rect.x,self.rect.y)

In [None]:
class DQLAgent:
    def __init__(self):
        # parameter / hyperparameter
        self.state_size = 10   # [distances]
        self.action_size = 3   #  move top, move bottom, stop

        self.gamma = 0.99
        self.learning_rate = 0.0001

        self.epsilon = 1  # explore
        self.epsilon_decay = 0.9993
        self.epsilon_min = 0.01

        self.memory = deque(maxlen = 4000)

        self.model = self.build_model()
        self.target_model = self.build_model()

    def build_model(self):
        # neural network for deep q learning
        model = Sequential()
        model.add(Dense(64, input_dim=self.state_size, activation='relu' ))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        # storage
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        state = np.array(state)
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        # training
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory,batch_size)
        minibatch = np.array(minibatch)
        not_done_indices = np.where(minibatch[:, 4] == False)
        y = np.copy(minibatch[:, 2])
        if len(not_done_indices[0]) > 0:
            predict_sprime = self.model.predict(np.vstack(minibatch[:, 3]))
            predict_sprime_target = self.target_model.predict(np.vstack(minibatch[:, 3]))
            y[not_done_indices] += np.multiply(self.gamma, predict_sprime_target[not_done_indices, np.argmax(predict_sprime[not_done_indices, :][0], axis=1)][0])

        actions = np.array(minibatch[:, 1], dtype=int)
        y_target = self.model.predict(np.vstack(minibatch[:, 0]))
        y_target[range(batch_size), actions] = y
        self.model.fit(np.vstack(minibatch[:, 0]), y_target, epochs=1, verbose=0)

    def targetModelUpdate(self):
        self.target_model.set_weights(self.model.get_weights())


    def adaptiveEGreedy(self):
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

**ENV**

In [None]:
class Env:
    def __init__(self):
        pygame.sprite.Sprite.__init__(self)
        self.player_group = pygame.sprite.Group()
        self.enemy_group = pygame.sprite.Group()
        self.friend_group = pygame.sprite.Group()

        self.player = Player()
        self.player_group.add(self.player)

        self.friend = Friend()
        self.friend_group.add(self.friend)

        #enemies
        self.enemy_1 = Enemy()
        self.enemy_group.add(self.enemy_1)
        self.enemy_2 = Enemy()
        self.enemy_group.add(self.enemy_2)
        self.enemy_3 = Enemy()
        self.enemy_group.add(self.enemy_3)
        self.enemy_4 = Enemy()
        self.enemy_group.add(self.enemy_4)


        self.reward = 0
        self.total_reward = 0
        self.done = False
        self.agent = DQLAgent()

    def findDistance(self,a, b):
        d = a-b
        return d

    def step(self,action):
        state_list = []

        #update
        self.player.update(action)
        self.enemy_group.update()
        self.friend_group.update()

        # Get Cordinates
        next_player_states = self.player.getCordinates()
        next_enemy_1_states = self.enemy_1.getCordinates()
        next_enemy_2_states = self.enemy_2.getCordinates()
        next_enemy_3_states = self.enemy_3.getCordinates()
        next_enemy_4_states = self.enemy_4.getCordinates()
        next_friend_1_states = self.friend.getCordinates()

        #Find Distances
        state_list.append(self.findDistance(next_player_states[0],next_enemy_1_states[0])) #e1 x
        state_list.append(self.findDistance(next_player_states[1],next_enemy_1_states[1])) #e1 y
        state_list.append(self.findDistance(next_player_states[0],next_enemy_2_states[0])) #e2 x
        state_list.append(self.findDistance(next_player_states[1],next_enemy_2_states[1])) #e2 y
        state_list.append(self.findDistance(next_player_states[0],next_enemy_3_states[0])) #e3 x
        state_list.append(self.findDistance(next_player_states[1],next_enemy_3_states[1])) #e3 y
        state_list.append(self.findDistance(next_player_states[0],next_enemy_4_states[0])) #e4 x
        state_list.append(self.findDistance(next_player_states[1],next_enemy_4_states[1])) #e4 y
        state_list.append(self.findDistance(next_player_states[0],next_friend_1_states[0])) #friend x
        state_list.append(self.findDistance(next_player_states[1],next_friend_1_states[1])) #friend y

        return [state_list] #len(state_list) = 10

    def initialStates(self): #reset()
        self.player_group = pygame.sprite.Group()
        self.enemy_group = pygame.sprite.Group()
        self.friend_group = pygame.sprite.Group()

        self.friend = Friend()
        self.friend_group.add(self.friend)

        self.player = Player()
        self.player_group.add(self.player)
        #enemies
        self.enemy_1 = Enemy()
        self.enemy_group.add(self.enemy_1)
        self.enemy_2 = Enemy()
        self.enemy_group.add(self.enemy_2)
        self.enemy_3 = Enemy()
        self.enemy_group.add(self.enemy_3)
        self.enemy_4 = Enemy()
        self.enemy_group.add(self.enemy_4)

        self.reward = 0
        self.total_reward = 0
        self.done = False

        #state
        state_list = []
        # Get Cordinates
        player_states = self.player.getCordinates()
        enemy_1_states = self.enemy_1.getCordinates()
        enemy_2_states = self.enemy_2.getCordinates()
        enemy_3_states = self.enemy_3.getCordinates()
        enemy_4_states = self.enemy_4.getCordinates()
        friend_1_states = self.friend.getCordinates()


        #Find Distances
        state_list.append(self.findDistance(player_states[0],enemy_1_states[0])) #e1
        state_list.append(self.findDistance(player_states[1],enemy_1_states[1])) #e1
        state_list.append(self.findDistance(player_states[0],enemy_2_states[0])) #e2
        state_list.append(self.findDistance(player_states[1],enemy_2_states[1])) #e2
        state_list.append(self.findDistance(player_states[0],enemy_3_states[0])) #e3
        state_list.append(self.findDistance(player_states[1],enemy_3_states[1])) #e3
        state_list.append(self.findDistance(player_states[0],enemy_4_states[0])) #e4
        state_list.append(self.findDistance(player_states[1],enemy_4_states[1])) #e4
        state_list.append(self.findDistance(player_states[0],friend_1_states[0])) #friend x
        state_list.append(self.findDistance(player_states[1],friend_1_states[1])) #friend y


        return [state_list] #state

    def run(self):
        # Game Loop
        state = self.initialStates()
        batch_size = 32
        running = True
        while running:
            self.reward = 1
            # keep loop running at the right speed
            clock.tick(FPS)

            # process input
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    running = False

            #update
            action = self.agent.act(state)
            next_state = self.step(action)
            self.total_reward += self.reward

            # check to see if a enemy hit the player
            hits = pygame.sprite.spritecollide(self.player, self.enemy_group, False)
            if hits: #hits == True
                self.agent.targetModelUpdate()
                self.reward = -150
                self.total_reward += self.reward
                self.done = True
                running = False
                print("Total Reward: ", self.total_reward)

            hits_2 = pygame.sprite.spritecollide(self.player, self.friend_group, False)
            if hits_2: #hits == True
                self.reward = +30
                self.y_center = random.randint(10,HEIGHT-10)
                self.x_center = random.randint(WIDTH-265,WIDTH-10)
                self.friend.rect.center = (self.x_center,self.y_center)


            # remember (storage)
            self.agent.remember(state,action,self.reward, next_state, self.done)

            # update state
            state = next_state

            # training
            self.agent.replay(batch_size)

            # epsilon greedy
            self.agent.adaptiveEGreedy()

            #draw / render(show)
            screen.fill(BLACK)
            self.player_group.draw(screen)
            self.enemy_group.draw(screen)
            self.friend_group.draw(screen)

            #after drawing flip display
            pygame.display.flip()



        pygame.quit()

In [None]:
import os
os.environ['SDL_VIDEODRIVER']='dummy'

In [None]:
if __name__ == '__main__':
    env = Env()
    liste = []
    t = 0
    while True:
        t += 1
        print("Episode:",t)
        liste.append(env.total_reward)
        if t >= EPISODENUM:
            break

        # initialize pygame and create window
        pygame.init()
        screen = pygame.display.set_mode((WIDTH,HEIGHT))
        pygame.display.set_caption("kacma oyunu")
        clock = pygame.time.Clock()

        env.run()
    weights = []
    for layer in env.agent.model.layers:
        weights.append(layer.get_weights())

    pd.DataFrame(weights).to_csv(r"contentweights2.csv")