Step 1: Check if the following libraries are installed before running the code:
1> glob
2> PIL
3> sklearn
4> moviepy

Step 2: Go to the last cell, set the parameters as instructed
Step 3: Run all cells below.

In [26]:
%pylab inline
import math
import json
import os
from PIL import Image
from glob import glob
import sklearn.manifold

from moviepy.editor import *

def resize_to_power_of_two(nSize):
    pos =  math.ceil(math.log2(nSize))  #(ceiling of log n with base 2)
    p   = pow(2, pos) 
    return p

def generate_spritesheet(photoSize, imageDir, outDir, game, corpus, jsonFile): 

    images = glob(imageDir + '/*.png')
    images = sorted(images, key=lambda x: int(os.path.basename(x)[:-4]))   
    
    spritesheet_name = outDir + game + '/' + corpus
    print(spritesheet_name)
    
    # Read current json
    f = open(jsonFile, 'r+')
    corporaDict = json.load(f)
    if corpus not in corporaDict:
        corporaDict[corpus] = {}
    
    # Calculate the size of the output image, based on the photo thumb sizes
    
    totalNum = len(images)
    print(len(images))
    
    ncols = 64
    nrows = math.ceil(totalNum/ncols)
    
    photow = photoSize[0]
    photoh = photoSize[1]
   
    
    imgWidth = ncols*photow
    imgHeight = nrows*photoh
    
    imgWidth_resized = resize_to_power_of_two(imgWidth)
    imgHeight_resized = resize_to_power_of_two(imgHeight)
    
    isize = (imgWidth_resized,imgHeight_resized) # adapt to THREE
    print('imageSize=',isize, "photow", photow, "photoh", photoh, "")

    # Create the new image. 
    white = (0,0,0)
    inew = Image.new('RGB',isize,white)
    
    # Create JSON file to store the UV position of each image
    spriteDict = {}
    spriteDict['rows'] = nrows
    spriteDict['columns'] = ncols
    spriteDict['totalCount'] = totalNum
    spriteDict['spriteWidth'] = photow
    spriteDict['spriteHeight'] = photoh 
    spriteDict['spritesheet'] = {} 
    
    count = 0
    # Insert each thumb:
    for irow in range(nrows):
        for icol in range(ncols):
            left = icol*(photow)
            right = left + photow
            upper = irow*(photoh)
            lower = upper + photoh
            bbox = (left,upper,right,lower)
            try:
                # Read in an image and resize appropriately
                img = Image.open(images[count]).resize((photow,photoh))
            except:
                break
            inew.paste(img,bbox)
            if count not in spriteDict['spritesheet'].keys():
                spriteDict['spritesheet'][count] = {}
            spriteDict['spritesheet'][count]['filename'] = os.path.basename(images[count])
            spriteDict['spritesheet'][count]['uvOffset_u'] = left/imgWidth
            spriteDict['spritesheet'][count]['uvOffset_v'] = 1.0-lower/imgHeight_resized 
            spriteDict['spritesheet'][count]['uvRepeat_u'] = photow/imgWidth_resized
            spriteDict['spritesheet'][count]['uvRepeat_v'] = photoh/imgHeight_resized
            count += 1 
    
    final_filename = spritesheet_name + '.png'
    inew.save(final_filename)
    
    #add new content
    corporaDict[corpus] = spriteDict
    js = json.dumps(corporaDict, indent=2)
    f.seek(0) 
    f.write(js)
    f.close()
    
    return inew, final_filename

Populating the interactive namespace from numpy and matplotlib


In [27]:
def get_embeddings(embeddingDir, game, corpus):
    embeddings = glob(embeddingDir + '/*.npy')
    embeddings = sorted(embeddings, key=lambda x: int(os.path.basename(x)[:-4]))
    length = len(embeddings)
    states = zeros((length, 256))
    for i in range(len(states)):
        states[i] = load(embeddings[i])
    return length, states

#Call this if several corpora are expected
def get_positions(gameDir, outputDir, dimensions, perplexity, game, corpora, jsonFile):
    state_lst = []
    position_dict = {}
    for corpus, value in corpora.items():  
        embeddingDir = gameDir + corpus + '/' + dir_embeddings
        length, states = get_embeddings(embeddingDir, game, corpus)
        
        # print(length, states, 'from', embeddingDir, game, corpus)
        
        state_lst.append(states)
        position_dict[corpus] = length
    
    # print(state_lst)
    all_states = numpy.concatenate( state_lst, axis=0 ) 
    # print(all_states)
    
    tsne = sklearn.manifold.TSNE(n_components = dimensions, perplexity = perplexity)
    all_positions = tsne.fit_transform(all_states)
    if all_positions is not None:
        tempName = outDir + game + '/' + corpus + '_' + str(dimensions) + 'D_temp.json'
        print('position file:', tempName)
        all_positions.tofile(tempName,sep=" ", format="%s")
    
    binfile_dict = {}
    positionbuffer_name = 'None'
    if all_positions is not None:
        file_idx = 0
        for corpus, value in corpora.items():
            length = position_dict[corpus]
            positionbuffer_name = outDir + game + '/' + corpus + '_' + str(dimensions) + 'D.bin'
            all_positions[file_idx : file_idx + length].tofile(positionbuffer_name) #default is binary write
            binfile_dict[corpus] = positionbuffer_name
            file_idx = length
    else:
        dimensions = 0
        
    # Read current json
    f = open(jsonFile, 'r+')
    corporaDict = json.load(f)
    
    for corpus, value in corpora.items():
        if corpus not in corporaDict:
            corporaDict[corpus] = {}
        posDict = {}
        posDict['dimensions'] = dimensions
        posDict['perplexity'] = perplexity
        posDict['bin'] = binfile_dict[corpus]

        corporaDict[corpus]['positions'] = posDict
        corporaDict[corpus]['spriteSheetPath'] = spritesheet_list[corpus]
    
    js = json.dumps(corporaDict, indent=2) 
    f.seek(0) 
    f.write(js)
    f.close()
    
#this is for singe corpus
def generate_positions(embeddingDir, outputDir, dimensions, perplexity, game, corpus, jsonFile):
    embeddings = glob(embeddingDir + '/*.npy')
    embeddings = sorted(embeddings, key=lambda x: int(os.path.basename(x)[:-4]))
    length = len(embeddings)
    states = zeros((length, 256))
    for i in range(len(states)):
        states[i] = load(embeddings[i])

    tsne = sklearn.manifold.TSNE(n_components = dimensions, perplexity = perplexity)
    positions = tsne.fit_transform(states)
    if positions is not None:
        tempName = outDir + game + '/' + corpus + '_' + str(dimensions) + 'D_temp.json'
        print('positions:', tempName)
        positions.tofile(tempName,sep=" ", format="%s")
    
    positionbuffer_name = 'None'
    if positions is not None:
        dimensions = positions.shape[1]
        positionbuffer_name = outDir + game + '/' + corpus + '_' + str(dimensions) + 'D.bin'
        positions.tofile(positionbuffer_name) #default is binary write
    else:
        dimensions = 0
        
    # Read current json
    f = open(jsonFile, 'r+')
    corporaDict = json.load(f)
    if corpus not in corporaDict:
        corporaDict[corpus] = {}
    posDict = {}
    posDict['dimensions'] = dimensions
    posDict['perplexity'] = perplexity
    posDict['bin'] = positionbuffer_name
    
    corporaDict[corpus]['positions'] = posDict
    corporaDict[corpus]['spriteSheetPath'] = spritesheet_list[corpus]
    
    js = json.dumps(corporaDict, indent=2) 
    f.seek(0) 
    f.write(js)
    f.close()
    

In [30]:
def get_screenshots(screenshotDir, game, corpus):
    screenshots = glob(screenshotDir + '/*.png')
    screenshots = sorted(screenshots, key=lambda x: int(os.path.basename(x)[:-4]))
    return screenshots

def generate_video(fps, gamedir, outDir, dir_screenshots, game, corpora, jsonFile):
    imageList = []
    for corpus, value in corpora.items():  
        screenshotDir = gamedir + corpus + '/' + dir_screenshots
        print(screenshotDir)
        screenshots = get_screenshots(screenshotDir, game, corpus)
        imageList.append(screenshots)
    
    # print(state_lst)
    all_images = numpy.concatenate( imageList, axis=0 ) 
        
    #Save video
    videoFile = outDir + game + '/movie.mp4'
    clips = [ImageClip(m).set_duration(1/fps)
             for m in all_images]
    concat_clip = concatenate_videoclips(clips, method="compose")
    concat_clip.write_videofile(videoFile, fps=fps)
    
    # Read current json
    f = open(jsonFile, 'r+')
    corporaDict = json.load(f)
    if 'video' not in corporaDict: 
        corporaDict['video'] = {}     
    corporaDict['video']['source'] = videoFile
    corporaDict['video']['fps'] = fps
    corporaDict['video']['total'] = len(all_images)
    
    js = json.dumps(corporaDict, indent=2)
    f.seek(0) 
    f.write(js)
    f.close()
    
        
       

In [None]:
%%time
#Note:
#    1. All the directories should be relative to the root path of your web server where the index.html file is located.
#       If your are not running Jupyter Notebook from that root path, restart your Notebook from that path, and then continue.
#    2. Your game data folder should come in as: 
#       - game
#         - expert 
#           - screenshots
#           - embeddings
#         - human
#           - screenshots
#           - embeddings
#    'expert' and 'human' are just example names for your corpora, you can choose whatever you want as long as you can tell the 
#     difference. These corpora will be displayed as separate buttons when you toggle on the 'corpora' button on the tsnemap page.

######Parameters Begin###########
#Directory settings
game = 'Batman Returns (USA)' # Distinctive name for your game. It will be displayed in the dropdown list on the main page
gamedir = 'Game_data/Batman Returns (USA)/' # The root path of your data 
dir_screenshots = 'screenshots' # the folder name of your screenshots
dir_embeddings = 'embeddings' # the folder name of your embeddings
outDir = './pic/' # the path to output the generated results
#Spritesheet settings
photow,photoh = 64,56 # width and height: resize for your high-resolution screenshots 
#t-SNE settings
#For detailed explanation, please refer to: http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html. 
runTSNE = True # True if you want to run tsne 
dimension = 3 # dimensionality used for tsne mapping
perplexity = 500 # perplexity used for tsne mapping
#video settings
runVideo = True # True if you want to generate a video for all your screenshots, you can then use the animation feature on the website. 
fps=24 # Frame per second.
########Parameters End###########

runSpritesheet = True
photoSize = (64,56)
#Find corpora
corpora = {}
corpora['dir'] = gamedir
corpora['game'] = game
corpora['screenshots_folder'] = dir_screenshots
corpora['embeddings_folder'] = dir_embeddings
corpora['corpus'] = {}

folders = glob(gamedir + '*')
for i in range(len(folders)):
    folder = folders[i]
    idx = folder.rfind('/')
    corpus = folder[idx+1:]
    corpora['corpus'][corpus] = folder
    
#Create output dir
destFolder = outDir + game
if not os.path.exists(destFolder):
    os.makedirs(destFolder)

#Create json file
jsonFile = outDir + game + '_' + str(dimension) + 'D.json'
f = open(jsonFile, 'w+')
js = json.dumps(corpora, indent=2)
f.write(js)
f.close()

#Add Gameinfo
gameinfo_file = 'gameinfo.json'
f = open(gameinfo_file, 'r+')
gameinfo = json.load(f)
if game not in gameinfo:
    gameinfo[game] = {}
gameinfo[game][str(dimension)] = jsonFile

js = json.dumps(gameinfo, indent=2) 
f.seek(0) 
f.write(js)
f.close()

spritesheet_list = {}

if runSpritesheet:
    for corpus, value in corpora['corpus'].items():
        imageDir = value + '/'+ dir_screenshots
        inew, filename = generate_spritesheet(photoSize, imageDir, outDir, game, corpus, jsonFile)
        spritesheet_list[corpus] = filename
    #inew.show()

if runTSNE:
    # print(corpora['corpus'])
    get_positions(gamedir, outDir, dimension, perplexity, game, corpora['corpus'], jsonFile)
    #for corpus, value in corpora['corpora'].items():
        #embeddingDir = value + '/' + dir_embeddings
        #generate_positions(embeddingDir, outputDir, dimension, perplexity, game, corpus, jsonFile)
if runVideo:
    generate_video(fps, gamedir, outDir, dir_screenshots, game, corpora['corpus'], jsonFile)
          

./pic/Batman Returns (USA)/human
5765
imageSize= (4096, 8192) photow 64 photoh 56 
