In [252]:
%pylab inline
from glob import glob
import math
from PIL import Image
import json
import sklearn.manifold
import os

def resize_to_power_of_two(nSize):
    pos =  math.ceil(math.log2(nSize))  #(ceiling of log n with base 2)
    p   = pow(2, pos) 
    return p

def generate_spritesheet(photoSize, imageDir, outDir, game, corpus, jsonFile): 
    """\
    Make a contact sheet from a group of filenames:
    fnames       A list of names of the image files  
    ncols        Number of columns in the contact sheet
    nrows        Number of rows in the contact sheet
    photow       The width of the photo thumbs in pixels
    photoh       The height of the photo thumbs in pixels
    returns a PIL image object.
    """  
    images = glob(imageDir + '/*.png')
    images = sorted(images, key=lambda x: int(os.path.basename(x)[:-4]))   
    spritesheet_name = outDir + game + '/' + corpus
    print(spritesheet_name)
    

    # Read current json
    f = open(jsonFile, 'r+')
    corporaDict = json.load(f)
    if corpus not in corporaDict:
        corporaDict[corpus] = {}
    
    # Calculate the size of the output image, based on the photo thumb sizes
    
    totalNum = len(images)
    print(len(images))
    
    ncols = 64
    nrows = math.ceil(totalNum/ncols)
    
    photow = photoSize[0]
    photoh = photoSize[1]
   
    
    imgWidth = ncols*photow
    imgHeight = nrows*photoh
    
    imgWidth_resized = resize_to_power_of_two(imgWidth)
    imgHeight_resized = resize_to_power_of_two(imgHeight)
    
    isize = (imgWidth_resized,imgHeight_resized) # adapt to THREE
    print('imageSize=',isize, "photow", photow, "photoh", photoh, "")

    # Create the new image. The background doesn't have to be white
    white = (0,0,0)
    inew = Image.new('RGB',isize,white)
    
    # Create JSON file to store the UV position of each image
    spriteDict = {}
    spriteDict['rows'] = nrows
    spriteDict['columns'] = ncols
    spriteDict['totalCount'] = totalNum
    spriteDict['spriteWidth'] = photow
    spriteDict['spriteHeight'] = photoh 
    spriteDict['spritesheet'] = {} 
    
    count = 0
    # Insert each thumb:
    for irow in range(nrows):
        for icol in range(ncols):
            left = icol*(photow)
            right = left + photow
            upper = irow*(photoh)
            lower = upper + photoh
            bbox = (left,upper,right,lower)
            try:
                # Read in an image and resize appropriately
                img = Image.open(images[count]).resize((photow,photoh))
            except:
                break
            inew.paste(img,bbox)
            if count not in spriteDict['spritesheet'].keys():
                spriteDict['spritesheet'][count] = {}
            spriteDict['spritesheet'][count]['filename'] = os.path.basename(images[count])
            spriteDict['spritesheet'][count]['uvOffset_u'] = left/imgWidth
            spriteDict['spritesheet'][count]['uvOffset_v'] = 1.0-lower/imgHeight_resized 
            spriteDict['spritesheet'][count]['uvRepeat_u'] = photow/imgWidth_resized
            spriteDict['spritesheet'][count]['uvRepeat_v'] = photoh/imgHeight_resized
            count += 1 
    
    final_filename = spritesheet_name + '.png'
    inew.save(final_filename)
    
    #add new content
    corporaDict[corpus] = spriteDict
    js = json.dumps(corporaDict, indent=2)
    f.seek(0) 
    f.write(js)
    f.close()
    
    return inew, final_filename

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [253]:
def get_embeddings(embeddingDir, game, corpus):
    embeddings = glob(embeddingDir + '/*.npy')
    embeddings = sorted(embeddings, key=lambda x: int(os.path.basename(x)[:-4]))
    length = len(embeddings)
    states = zeros((length, 256))
    for i in range(len(states)):
        states[i] = load(embeddings[i])
    return length, states

#Call this if several corpora are expected
def get_positions(gameDir, outputDir, dimensions, perplexity, game, corpora, jsonFile):
    state_lst = []
    position_dict = {}
    for corpus, value in corpora.items():  
        # embeddingDir = gameDir + 'corpus' + '/' + dir_embeddings
        embeddingDir = gameDir + corpus + '/' + dir_embeddings
        length, states = get_embeddings(embeddingDir, game, corpus)
        
        # print(length, states, 'from', embeddingDir, game, corpus)
        
        state_lst.append(states)
        position_dict[corpus] = length
    
    # print(state_lst)
    all_states = numpy.concatenate( state_lst, axis=0 ) 
    # print(all_states)
    
    tsne = sklearn.manifold.TSNE(n_components = dimensions, perplexity = perplexity)
    all_positions = tsne.fit_transform(all_states)
    if all_positions is not None:
        tempName = outDir + game + '/' + corpus + '_' + str(dimensions) + 'D_temp.json'
        print('position file:', tempName)
        all_positions.tofile(tempName,sep=" ", format="%s")
    
    binfile_dict = {}
    positionbuffer_name = 'None'
    if all_positions is not None:
        file_idx = 0
        for corpus, value in corpora.items():
            length = position_dict[corpus]
            positionbuffer_name = outDir + game + '/' + corpus + '_' + str(dimensions) + 'D.bin'
            all_positions[file_idx : file_idx + length].tofile(positionbuffer_name) #default is binary write
            binfile_dict[corpus] = positionbuffer_name
            file_idx = length
    else:
        dimensions = 0
        
    # Read current json
    f = open(jsonFile, 'r+')
    corporaDict = json.load(f)
    
    for corpus, value in corpora.items():
        if corpus not in corporaDict:
            corporaDict[corpus] = {}
        posDict = {}
        posDict['dimensions'] = dimensions
        posDict['perplexity'] = perplexity
        posDict['bin'] = binfile_dict[corpus]

        corporaDict[corpus]['positions'] = posDict
        corporaDict[corpus]['spriteSheetPath'] = spritesheet_list[corpus]
    
    js = json.dumps(corporaDict, indent=2) 
    f.seek(0) 
    f.write(js)
    f.close()
    
#this is for singe corpus
def generate_positions(embeddingDir, outputDir, dimensions, perplexity, game, corpus, jsonFile):
    embeddings = glob(embeddingDir + '/*.npy')
    embeddings = sorted(embeddings, key=lambda x: int(os.path.basename(x)[:-4]))
    length = len(embeddings)
    states = zeros((length, 256))
    for i in range(len(states)):
        states[i] = load(embeddings[i])

    tsne = sklearn.manifold.TSNE(n_components = dimensions, perplexity = perplexity)
    positions = tsne.fit_transform(states)
    if positions is not None:
        tempName = outDir + game + '/' + corpus + '_' + str(dimensions) + 'D_temp.json'
        print('positions:', tempName)
        positions.tofile(tempName,sep=" ", format="%s")
    
    positionbuffer_name = 'None'
    if positions is not None:
        dimensions = positions.shape[1]
        positionbuffer_name = outDir + game + '/' + corpus + '_' + str(dimensions) + 'D.bin'
        positions.tofile(positionbuffer_name) #default is binary write
    else:
        dimensions = 0
        
    # Read current json
    f = open(jsonFile, 'r+')
    corporaDict = json.load(f)
    if corpus not in corporaDict:
        corporaDict[corpus] = {}
    posDict = {}
    posDict['dimensions'] = dimensions
    posDict['perplexity'] = perplexity
    posDict['bin'] = positionbuffer_name
    
    corporaDict[corpus]['positions'] = posDict
    corporaDict[corpus]['spriteSheetPath'] = spritesheet_list[corpus]
    
    js = json.dumps(corporaDict, indent=2) 
    f.seek(0) 
    f.write(js)
    f.close()
    

In [254]:
%%time
######Parameters Begin#########
dimensions = 2
perplexity = 500
game = 'The 7th Saga (USA)'
gamedir = 'Game_data/7th Saga, The (USA)/'
runTSNE = True
runSpritesheet = True
dir_screenshots = 'screenshots'
dir_embeddings = 'states'
photow,photoh = 64,56
photoSize = (64,56)
outputDir = './pic/'
########Parameters End###########

#Find corpora
corpora = {}
corpora['dir'] = gamedir
corpora['game'] = game
corpora['screenshots_folder'] = dir_screenshots
corpora['embeddings_folder'] = dir_embeddings
corpora['corpus'] = {}

folders = glob(gamedir + '*')
for i in range(len(folders)):
    folder = folders[i]
    idx = folder.rfind('/')
    corpus = folder[idx+1:]
    corpora['corpus'][corpus] = folder
    
#Create output dir
destFolder = outDir + game
if not os.path.exists(destFolder):
    os.makedirs(destFolder)

#Create json file
jsonFile = outDir + game + '_' + str(dimensions) + 'D.json'
f = open(jsonFile, 'w+')
js = json.dumps(corpora, indent=2)
f.write(js)
f.close()

spritesheet_list = {}

if runSpritesheet:
    for corpus, value in corpora['corpus'].items():
        imageDir = value + '/'+ dir_screenshots
        inew, filename = generate_spritesheet(photoSize, imageDir, outputDir, game, corpus, jsonFile)
        spritesheet_list[corpus] = filename
    #inew.show()

if runTSNE:
    # print(corpora['corpus'])
    get_positions(gamedir, outputDir, dimensions, perplexity, game, corpora['corpus'], jsonFile)
    #for corpus, value in corpora['corpora'].items():
        #embeddingDir = value + '/' + dir_embeddings
        #generate_positions(embeddingDir, outputDir, dimensions, perplexity, game, corpus, jsonFile)
          

./pic/The 7th Saga (USA)/human
4953
imageSize= (4096, 8192) photow 64 photoh 56 
position file: ./pic/The 7th Saga (USA)/human_2D_temp.json
CPU times: user 13min 18s, sys: 34 s, total: 13min 52s
Wall time: 14min 4s
