# Space Invaders

# Weights & Biases x Qualcomm - SpaceInvaders Challenge

This notebook contains code for loading models from a file saved in a wandb run, and evaluating the model.

For more details on the SpaceInvaders challenge, please visit the [competition website](https://app.wandb.ai/wandb/spaceinvaders-challenge/benchmark/).

![](https://thumbs.gfycat.com/CookedFriendlyAntarcticfurseal-size_restricted.gif)

## Running this notebook
1. Click "Open in playground" to create a copy of this notebook for yourself.
2. Save a copy in Google Drive for yourself.
3. To enable a GPU, please click Edit > Notebook Settings. Change the "hardware accelerator" to GPU.
4. Step through each section, pressing play on the code blocks to run the cells.
5. Add your own model code.

## Load the model

Please replace the model file (`model.h5`) and run_path (`username/project_name/run_name`) with your submissions model file and run_path respectively.

In [1]:
from __future__ import division
import os
import io
os.environ["OMP_NUM_THREADS"] = "1"
import argparse
import torch
from environment import atari_env
from utils import read_config, setup_logger
from model import A3Clstm
from player_util import Agent
import gym
import glob
import base64
import logging
import time
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
from gym.wrappers import Monitor
from torchsummary import summary 

import numpy as np
from IPython.display import HTML
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display

import wandb

In [2]:
# restore a model file from a specific run by user "lavanyashukla" in project "qualcomm" from run "mnswzdre"
fname = "model.h5"
run_path="trained_model"
local_path = None

# restore model
api = wandb.Api()
run = api.run(run_path)
with run.file(fname).download(replace=True) as f:
  local_path = f.name
local_path_model = local_path

## Setup and Preproceesing

!pip install gym pyvirtualdisplay -qq
!apt-get install -y xvfb python-opengl ffmpeg -qq
!pip install xdpyinfo -qq
!apt-get install x11-utils
!apt-get update -qq
!apt-get install cmake -qq
!pip install --upgrade setuptools -qq
!pip install ez_setup -qq

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [4]:
local_path_model = "trained_models/model.h5"

## Evaluation


In [5]:
# **** Caution: Do not modify this cell ****
# initialize total reward across episodes
cumulative_reward = 0
episode = 0

def evaluate(episodic_reward, reset=False):
  '''
  Takes in the reward for an episode, calculates the cumulative_avg_reward
    and logs it in wandb. If episode > 100, stops logging scores to wandb.
    Called after playing each episode. See example below.

  Arguments:
    episodic_reward - reward received after playing current episode
  '''
  global episode
  global cumulative_reward
  if reset:
    cumulative_reward = 0
    episode = 0
    
  episode += 1
  print("Episode: %d"%(episode))

  # your models will be evaluated on 100-episode average reward
  # therefore, we stop logging after 100 episodes
  if (episode > 100):
    print("Scores from episodes > 100 won't be logged in wandb.")
    return

  # log total reward received in this episode to wandb
  wandb.log({'episodic_reward': episodic_reward})

  # add reward from this episode to cumulative_reward
  cumulative_reward += episodic_reward

  # calculate the cumulative_avg_reward
  # this is the metric your models will be evaluated on
  cumulative_avg_reward = cumulative_reward/episode

  # log cumulative_avg_reward over all episodes played so far
  wandb.log({'cumulative_avg_reward': cumulative_avg_reward})

  return cumulative_avg_reward

## Play the game for 100 episodes, log cumulative average reward, for 5 different values of seed

Please adjust this as needed to work with your model architecture.

In [None]:
from numpy.random import seed

class ARGS():
    def __init__(self):
        self.max_episode_length = 10000
        self.skip_rate = 4
        self.env_config = "config.json" 

args = ARGS()

setup_json = read_config(args.env_config)
env_conf = setup_json["Default"]


cumulative_avg_rewards = []

for seed_ in [10, 50, 100, 200, 500]:
  seed(seed_)
  torch.manual_seed(seed_)
  if torch.cuda.is_available():
    torch.cuda.manual_seed(seed_)
  print("Seed: ",seed_)
  episode = 0
  
  saved_state = torch.load(local_path_model,map_location=lambda storage, loc: storage)
    
  # initialize environment
  env = atari_env("SpaceInvaders-v0", env_conf, args)
  player = Agent(None, env, args, None)
  player.model = A3Clstm(player.env.observation_space.shape[0],
                       player.env.action_space)



  player.model = player.model.to(device)
  
  player.model.load_state_dict(saved_state)
  player.model.eval()

  # initialize a new wandb run
  wandb.init(project="qualcomm-evaluation")

  # define hyperparameters
  wandb.config.episodes = 100
  #wandb.config.runpath = run_path

  # record gameplay video
  display = Display(visible=0, size=(1400, 900))
  display.start()
  
  # run for 100 episodes
  # Note: Please adjust this as needed to work with your model architecture.
  # Make sure you still call evaluate() with the reward received in each episode
  for i in range(wandb.config.episodes):
    # Set reward received in this episode = 0 at the start of the episode
    episodic_reward = 0
    reset = False

    # record a video of the game using wrapper
    player.env = gym.wrappers.Monitor(player.env, './video', force=True)
    
    # Start a random game
    player.state = player.env.reset()
    player.state = torch.from_numpy(player.state).float().to(device)

    
    if torch.cuda.is_available():
        player.gpu_id = 0
        
    done = False
    action_count = 0

    while True:
        player.action_test()

        episodic_reward += player.reward
        if player.done and not player.info:
            player.state = player.env.reset()
            player.state = torch.from_numpy(player.state).float().to(device)
        elif player.info:
            break
    #print("reward",episodic_reward)
    # call evaluation function - takes in reward received after playing an episode
    # calculates the cumulative_avg_reward over 100 episodes & logs it in wandb
    if(i==0):
      reset = True

    cumulative_avg_reward = evaluate(episodic_reward, reset)

    # your models will be evaluated on 100-episode average reward
    # therefore, we stop logging after 100 episodes
    if (i >= 99):
      cumulative_avg_rewards.append(cumulative_avg_reward)
      break

    record_video = False
    player.env.close() 

    # render gameplay video
    if (i %50 == 0):
      mp4list = glob.glob('video/*.mp4')
      if len(mp4list) > 0:
        print(len(mp4list))
        mp4 = mp4list[-1]
        video = io.open(mp4, 'r+b').read()
        encoded = base64.b64encode(video)

        # log gameplay video in wandb
        wandb.log({"gameplays": wandb.Video(mp4, fps=4, format="gif")})

        # display gameplay video
        ipythondisplay.display(HTML(data='''<video alt="" autoplay 
                    loop controls style="height: 400px;">
                    <source src="data:video/mp4;base64,{0}" type="video/mp4" />
                </video>'''.format(encoded.decode('ascii'))))

Seed:  10


  result = entry_point.load(False)


wandb: Wandb version 0.8.29 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


Episode: 1
2


Episode: 2
Episode: 3
Episode: 4
Episode: 5
Episode: 6
Episode: 7
Episode: 8
Episode: 9
Episode: 10
Episode: 11
Episode: 12
Episode: 13
Episode: 14
Episode: 15
Episode: 16
Episode: 17
Episode: 18
Episode: 19
Episode: 20
Episode: 21
Episode: 22
Episode: 23
Episode: 24
Episode: 25
Episode: 26
Episode: 27
Episode: 28
Episode: 29
Episode: 30
Episode: 31
Episode: 32
Episode: 33
Episode: 34
Episode: 35
Episode: 36
Episode: 37
Episode: 38
Episode: 39
Episode: 40
Episode: 41
Episode: 42
Episode: 43
Episode: 44
Episode: 45
Episode: 46
Episode: 47
Episode: 48
Episode: 49
Episode: 50
Episode: 51
1


Episode: 52
Episode: 53
Episode: 54
Episode: 55
Episode: 56
Episode: 57
Episode: 58
Episode: 59
Episode: 60
Episode: 61
Episode: 62
Episode: 63
Episode: 64
Episode: 65
Episode: 66
Episode: 67
Episode: 68
Episode: 69
Episode: 70
Episode: 71
Episode: 72
Episode: 73
Episode: 74
Episode: 75
Episode: 76
Episode: 77
Episode: 78
Episode: 79
Episode: 80
Episode: 81
Episode: 82
Episode: 83
Episode: 84
Episode: 85
Episode: 86
Episode: 87
Episode: 88
Episode: 89
Episode: 90
Episode: 91
Episode: 92
Episode: 93
Episode: 94
Episode: 95
Episode: 96
Episode: 97
Episode: 98
Episode: 99
Episode: 100
Seed:  50


  result = entry_point.load(False)
wandb: W&B is disabled in this directory.  Run `wandb on` to enable cloud syncing.


wandb: Wandb version 0.8.29 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


Episode: 1
1


Episode: 2
Episode: 3
Episode: 4
Episode: 5
Episode: 6
Episode: 7
Episode: 8
Episode: 9
Episode: 10
Episode: 11
Episode: 12
Episode: 13
Episode: 14
Episode: 15
Episode: 16
Episode: 17
Episode: 18
Episode: 19
Episode: 20
Episode: 21
Episode: 22
Episode: 23
Episode: 24
Episode: 25
Episode: 26
Episode: 27
Episode: 28
Episode: 29
Episode: 30
Episode: 31
Episode: 32
Episode: 33
Episode: 34
Episode: 35
Episode: 36
Episode: 37
Episode: 38
Episode: 39


# Final score
The final score is evaluated as the cumulative_avg_reward, averaged across 5 seeds.

In [None]:
print("Final score: ", np.mean(cumulative_avg_rewards))