# Space Invaders

# Weights & Biases x Qualcomm - SpaceInvaders Challenge

This notebook contains code for loading models from a file saved in a wandb run, and evaluating the model.

For more details on the SpaceInvaders challenge, please visit the [competition website](https://app.wandb.ai/wandb/spaceinvaders-challenge/benchmark/).

![](https://thumbs.gfycat.com/CookedFriendlyAntarcticfurseal-size_restricted.gif)

## Running this notebook
1. Click "Open in playground" to create a copy of this notebook for yourself.
2. Save a copy in Google Drive for yourself.
3. To enable a GPU, please click Edit > Notebook Settings. Change the "hardware accelerator" to GPU.
4. Step through each section, pressing play on the code blocks to run the cells.
5. Add your own model code.

## Load the model

Please replace the model file (`model.h5`) and run_path (`username/project_name/run_name`) with your submissions model file and run_path respectively.

In [0]:
# restore a model file from a specific run by user "lavanyashukla" in project "qualcomm" from run "mnswzdre"
fname = "model.h5"
run_path="lavanyashukla/qualcomm/b0qh2jcw"

In [0]:
!pip install --upgrade wandb -qq
# import wandb
import wandb

In [0]:
from keras.models import load_model

# restore model
api = wandb.Api()
run = api.run(run_path)
local_path = None
with run.file(fname).download(replace=True) as f:
  local_path = f.name
agent = load_model(local_path)
agent.summary()

Using TensorFlow backend.


## Setup and Preproceesing

In [0]:
!pip install gym pyvirtualdisplay -qq
!apt-get install -y xvfb python-opengl ffmpeg -qq
!pip install xdpyinfo -qq

!apt-get update -qq
!apt-get install cmake -qq
!pip install --upgrade setuptools -qq
!pip install ez_setup -qq

In [0]:
import gym
from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(30)

import numpy as np
import random
import math
import glob
import io
import os
import cv2
import base64
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
from collections import deque
from datetime import datetime
import keras

from IPython.display import HTML
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf

In [0]:
color = np.array([210, 164, 74]).mean()

def preprocess_frame(obs):
    img = obs[25:201:2, ::2]

    # Convert to greyscale
    img = img.mean(axis=2)

    # Improve contrast
    img[img==color] = 0

    # Normalzie image
    img = (img - 128) / 128 - 1

    # Reshape to 80*80*1
    img = img.reshape(88, 80)

    return img 

## Evaluation


In [0]:
# **** Caution: Do not modify this cell ****
# initialize total reward across episodes
cumulative_reward = 0
episode = 0

def evaluate(episodic_reward, reset=False):
  '''
  Takes in the reward for an episode, calculates the cumulative_avg_reward
    and logs it in wandb. If episode > 100, stops logging scores to wandb.
    Called after playing each episode. See example below.

  Arguments:
    episodic_reward - reward received after playing current episode
  '''
  global episode
  global cumulative_reward
  if reset:
    cumulative_reward = 0
    episode = 0
    
  episode += 1
  print("Episode: %d"%(episode))

  # your models will be evaluated on 100-episode average reward
  # therefore, we stop logging after 100 episodes
  if (episode > 100):
    print("Scores from episodes > 100 won't be logged in wandb.")
    return

  # log total reward received in this episode to wandb
  wandb.log({'episodic_reward': episodic_reward})

  # add reward from this episode to cumulative_reward
  cumulative_reward += episodic_reward

  # calculate the cumulative_avg_reward
  # this is the metric your models will be evaluated on
  cumulative_avg_reward = cumulative_reward/episode

  # log cumulative_avg_reward over all episodes played so far
  wandb.log({'cumulative_avg_reward': cumulative_avg_reward})

  return cumulative_avg_reward

## Play the game for 100 episodes, log cumulative average reward, for 5 different values of seed

Please adjust this as needed to work with your model architecture.

In [0]:
from numpy.random import seed
from tensorflow import set_random_seed

cumulative_avg_rewards = []
for seed_ in [10, 50, 100, 200, 500]:
  seed(seed_)
  set_random_seed(seed_)
  print("Seed: ",seed_)
  episode = 0

  # initialize environment
  env = gym.make('SpaceInvaders-v0')
  state_size = env.observation_space.shape[0]
  action_size = env.action_space.n
  print("Actions available(%d): %r"%(env.action_space.n, env.env.get_action_meanings()))

  # initialize a new wandb run
  wandb.init(project="qualcomm-evaluation")

  # define hyperparameters
  wandb.config.episodes = 100
  wandb.config.runpath = run_path

  # record gameplay video
  display = Display(visible=0, size=(1400, 900))
  display.start()

  # run for 100 episodes
  # Note: Please adjust this as needed to work with your model architecture.
  # Make sure you still call evaluate() with the reward received in each episode
  for i in range(wandb.config.episodes):
    # Set reward received in this episode = 0 at the start of the episode
    episodic_reward = 0
    reset = False

    # record a video of the game using wrapper
    env = gym.wrappers.Monitor(env, './video', force=True)
    
    # play a random game
    state = env.reset()
    state = preprocess_frame(state)

    done = False
    action_count = 0
    while not done:
      # get prediction for next action from model
      actions = agent.predict(state)
      action = np.argmax(actions, axis=-1)
      action = np.argmax(action)

      # perform the action and fetch next state, reward
      next_state, reward, done, _ = env.step(action)
      next_state = preprocess_frame(next_state)
      state = next_state

      action_count += 1
      if(action_count == 50):
        done = True
        break

      episodic_reward += reward
    
    # call evaluation function - takes in reward received after playing an episode
    # calculates the cumulative_avg_reward over 100 episodes & logs it in wandb
    if(i==0):
      reset = True

    cumulative_avg_reward = evaluate(episodic_reward, reset)

    # your models will be evaluated on 100-episode average reward
    # therefore, we stop logging after 100 episodes
    if (i >= 99):
      cumulative_avg_rewards.append(cumulative_avg_reward)
      break

    record_video = False
    env.close() 

    # render gameplay video
    if (i %50 == 0):
      mp4list = glob.glob('video/*.mp4')
      if len(mp4list) > 0:
        print(len(mp4list))
        mp4 = mp4list[-1]
        video = io.open(mp4, 'r+b').read()
        encoded = base64.b64encode(video)

        # log gameplay video in wandb
        wandb.log({"gameplays": wandb.Video(mp4, fps=4, format="gif")})

        # display gameplay video
        ipythondisplay.display(HTML(data='''<video alt="" autoplay 
                    loop controls style="height: 400px;">
                    <source src="data:video/mp4;base64,{0}" type="video/mp4" />
                </video>'''.format(encoded.decode('ascii'))))

# Final score
The final score is evaluated as the cumulative_avg_reward, averaged across 5 seeds.

In [0]:
print("Final score: ", np.mean(cumulative_avg_rewards))