# TODO  
## Card Similarity Search  
### Data Prep (raw card data > clean card data) 
  - ~~merge card, set, and legality data~~  
  - ~~concat token cards (later - may not want to)~~  
  - ensure clean  
    - fillna, non-english issues, maybe replace symbols ({T}...) w/ text  
  - **AWS**  
    - ~~Load raw MTGJson from S3~~  
    - ~~Lambda (or Glue): Prep > to S3~~  
### Similarity (clean card data > embeddings, similarity matrix)
  - USE embeddings from card text  
  - explore including other card props as text (color, type, mana cost...)  
  - similarity matrix  
  - pre-sort and save each card (50K+ cards)
  - **AWS**  
    - Load clean card data from S3  
    - SM Batch Job: USE embeddings & similarity matrix > to S3  
    - Lambda: embeddings/sim_matrix from S3 > EFS (?)  
    - Lambda: sort each card by similarity EFS > EFS (?)
    - Data Pipeline: cards data from S3 to Dynamo (?)  
### App Backend  
  - API accepts card name and returns topK similar cards (w/ some metadata for filtering)  
  - **AWS**  
    - Lambda: API queries EFS or Dynamo  
    - StepFunctions or Lambda destinations: refresh data pipeline as needed
### App Frontend  
  - Home page  
    - Search box and results  
    - A few filters (color, type, mana cost...)  
    - add placement  
    - sign in (eventually)  
### Deploy  
  - Backend
    - Serverless  
    - Seed  
  - Frontend  
    - React  
    - Amplify

# Data Prep

In [1]:
import os
import json
import numpy as np
import pandas as pd

#import tensorflow as tf
#import tensorflow_hub as hub
#import matplotlib.pyplot as plt

In [75]:
cards_df = pd.read_csv('../data/mtgjson/cards.csv')\
    .drop(columns=['index'])

print(cards_df.shape)
print('{} MB'.format(round(cards_df.memory_usage().sum()/1000000, 2)))
cards_df.head(1)

(55943, 74)
33.12 MB


Unnamed: 0,id,artist,asciiName,availability,borderColor,cardKingdomFoilId,cardKingdomId,colorIdentity,colorIndicator,colors,...,subtypes,supertypes,tcgplayerProductId,text,toughness,type,types,uuid,variations,watermark
0,1,Rebecca Guay,,"mtgo,paper",black,123335.0,122967.0,G,,G,...,,,15023.0,"If you would draw a card, you may instead choo...",,Enchantment,Enchantment,38513fa0-ea83-5642-8ecd-4f0b3daa6768,,


In [76]:
sets_df = pd.read_csv('../data/mtgjson/sets.csv')[['code','name']]\
    .rename(columns={'name': 'setName', 'code':'setCode'})

print(sets_df.shape)
print('{} MB'.format(round(sets_df.memory_usage().sum()/1000000, 2)))
sets_df.head(1)

(545, 2)
0.01 MB


Unnamed: 0,setCode,setName
0,10E,Tenth Edition


In [77]:
# Merge set names into cards
cards_df = cards_df\
    .merge(sets_df, how='left', on='setCode')

print(cards_df.shape)
print('{} MB'.format(round(cards_df.memory_usage().sum()/1000000, 2)))
cards_df.head(1)

(55943, 75)
34.01 MB


Unnamed: 0,id,artist,asciiName,availability,borderColor,cardKingdomFoilId,cardKingdomId,colorIdentity,colorIndicator,colors,...,supertypes,tcgplayerProductId,text,toughness,type,types,uuid,variations,watermark,setName
0,1,Rebecca Guay,,"mtgo,paper",black,123335.0,122967.0,G,,G,...,,15023.0,"If you would draw a card, you may instead choo...",,Enchantment,Enchantment,38513fa0-ea83-5642-8ecd-4f0b3daa6768,,,Tenth Edition


In [78]:
legs_df = pd.read_csv('../data/mtgjson/legalities.csv')\
    .pivot(index='uuid', columns='format', values='status')\
    .reset_index()\
    .fillna('Blank')

print(legs_df.shape)
print('{} MB'.format(round(legs_df.memory_usage().sum()/1000000, 2)))
legs_df.head(1)

(54718, 14)
6.13 MB


format,uuid,brawl,commander,duel,future,historic,legacy,modern,oldschool,pauper,penny,pioneer,standard,vintage
0,00010d56-fe38-5e35-8aed-518019aa36a5,Blank,Legal,Legal,Blank,Blank,Legal,Legal,Blank,Blank,Blank,Legal,Blank,Legal


In [79]:
# Merge legalities into cards
cards_df = cards_df\
    .merge(legs_df, how='left', on='uuid')

print(cards_df.shape)
print('{} MB'.format(round(cards_df.memory_usage().sum()/1000000, 2)))
cards_df.head(1)

(55943, 88)
39.83 MB


Unnamed: 0,id,artist,asciiName,availability,borderColor,cardKingdomFoilId,cardKingdomId,colorIdentity,colorIndicator,colors,...,future,historic,legacy,modern,oldschool,pauper,penny,pioneer,standard,vintage
0,1,Rebecca Guay,,"mtgo,paper",black,123335.0,122967.0,G,,G,...,Blank,Blank,Legal,Legal,Blank,Blank,Legal,Blank,Blank,Legal


In [82]:
cards_df.to_csv('cards.csv', index=False)

### Handle token cards later

In [53]:
tokens_df = pd.read_csv('../data/mtgjson/tokens.csv')

print(tokens_df.shape)
print('{} MB'.format(round(tokens_df.memory_usage().sum()/1000000, 2)))
tokens_df.head(1)

(1704, 45)
0.61 MB


Unnamed: 0,index,id,artist,asciiName,availability,borderColor,colorIdentity,colors,edhrecRank,faceName,...,side,subtypes,supertypes,tcgplayerProductId,text,toughness,type,types,uuid,watermark
0,0,1,Jim Pavelec,,paper,black,R,R,,,...,,Dragon,,78608.0,Flying,5,Token Creature — Dragon,"Token,Creature",7decf258-eb10-50da-83f7-c7eba74adbfb,


In [3]:
print(cards_df.shape)
cards_df.head(2)

(55943, 74)


Unnamed: 0,id,artist,asciiName,availability,borderColor,cardKingdomFoilId,cardKingdomId,colorIdentity,colorIndicator,colors,...,subtypes,supertypes,tcgplayerProductId,text,toughness,type,types,uuid,variations,watermark
0,1,Rebecca Guay,,"mtgo,paper",black,123335.0,122967.0,G,,G,...,,,15023.0,"If you would draw a card, you may instead choo...",,Enchantment,Enchantment,38513fa0-ea83-5642-8ecd-4f0b3daa6768,,
1,2,Stephen Daniele,,"mtgo,paper",black,123149.0,122781.0,U,,U,...,"Human,Wizard",,15024.0,When Academy Researchers enters the battlefiel...,2.0,Creature — Human Wizard,Creature,b8a68840-4044-52c0-a14e-0a1c630ba42c,,


In [55]:
cards_df.columns

Index(['id', 'artist', 'asciiName', 'availability', 'borderColor',
       'cardKingdomFoilId', 'cardKingdomId', 'colorIdentity', 'colorIndicator',
       'colors', 'convertedManaCost', 'duelDeck', 'edhrecRank',
       'faceConvertedManaCost', 'faceName', 'flavorName', 'flavorText',
       'frameEffects', 'frameVersion', 'hand', 'hasAlternativeDeckLimit',
       'isFullArt', 'isOnlineOnly', 'isOversized', 'isPromo', 'isReprint',
       'isReserved', 'isStarter', 'isStorySpotlight', 'isTextless',
       'isTimeshifted', 'keywords', 'layout', 'leadershipSkills', 'life',
       'loyalty', 'manaCost', 'mcmId', 'mcmMetaId', 'mtgArenaId',
       'mtgjsonV4Id', 'mtgoFoilId', 'mtgoId', 'multiverseId', 'name', 'number',
       'originalReleaseDate', 'originalText', 'originalType', 'otherFaceIds',
       'power', 'printings', 'promoTypes', 'purchaseUrls', 'rarity',
       'scryfallId', 'scryfallIllustrationId', 'scryfallOracleId', 'setCode',
       'side', 'subtypes', 'supertypes', 'tcgplayerProd

In [7]:
cards_df.type.nunique()

1971

In [8]:
cards_df.setCode.nunique()

531

In [9]:
cards_df.memory_usage().sum()/1000000

33.118384

In [56]:
for col in cards_df.columns:
    print(col + ': ' + str(cards_df[col][0]) + '\n')

id: 1

artist: Rebecca Guay

asciiName: nan

availability: mtgo,paper

borderColor: black

cardKingdomFoilId: 123335.0

cardKingdomId: 122967.0

colorIdentity: G

colorIndicator: nan

colors: G

convertedManaCost: 4.0

duelDeck: nan

edhrecRank: 1111.0

faceConvertedManaCost: nan

faceName: nan

flavorName: nan

flavorText: nan

frameEffects: nan

frameVersion: 2003

hand: nan

hasAlternativeDeckLimit: 0


hasFoil: 1

hasNonFoil: 1

isAlternative: 0

isFullArt: 0

isOnlineOnly: 0

isOversized: 0

isPromo: 0

isReprint: 1

isReserved: 0

isStarter: 0

isStorySpotlight: 0

isTextless: 0

isTimeshifted: 0

keywords: nan

layout: normal

leadershipSkills: nan

life: nan

loyalty: nan

manaCost: {2}{G}{G}

mcmId: 16413.0

mcmMetaId: 19.0

mtgArenaId: nan

mtgjsonV4Id: 1669af17-d287-5094-b005-4b143441442f

mtgoFoilId: 27283.0

mtgoId: 27282.0

multiverseId: 130483.0

name: Abundance

number: 249

originalReleaseDate: nan

originalText: If you would draw a card, you may instead choose land or

# Download and Store USE Model from TFHub

In [5]:
module_url = 'https://tfhub.dev/google/universal-sentence-encoder-large/5'
model = hub.load(module_url)
print ("module %s loaded" % module_url)

module https://tfhub.dev/google/universal-sentence-encoder-large/5 loaded


## Saved the downloaded USE-Large model

In [2]:
tf.saved_model.save(model, "../models/use-large")

NameError: name 'model' is not defined

## Load USE-Large from local disk

In [3]:
use_embed = hub.KerasLayer('../models/use-large')

In [8]:
use_embed = tf.saved_model.load('../models/use-large')

In [10]:
use_embed.signatures['serving_default']

<ConcreteFunction signature_wrapper(inputs) at 0x17984330688>

***
# Get USE Embeddings

In [45]:
arena_txt_dict = pd.read_csv('cards.csv')\
    .query('mtgArenaId.notnull()')\
    .reset_index(drop=True)\
    .fillna(value={'text': 'Blank'})\
    .head(20)\
    [['text']]\
    .rename(columns={'text':'instances'})\
    .to_dict(orient='list')


len(arena_txt_dict['instances'])

20

In [48]:
arena_txt_json = arena_txt_dict
arena_txt_json

{'instances': ['Flying, lifelink\nPegasus creatures you control have lifelink.\nConstellation — Whenever an enchantment enters the battlefield under your control, create a 2/2 white Pegasus creature token with flying.',
  'Whenever Audacious Thief attacks, you draw a card and you lose 1 life.',
  'When Banishing Light enters the battlefield, exile target nonland permanent an opponent controls until Banishing Light leaves the battlefield.',
  'Return target creature card from your graveyard to the battlefield. It gains haste until your next turn.',
  'Flying\nCarnifex Demon enters the battlefield with two -1/-1 counters on it.\n{B}, Remove a -1/-1 counter from Carnifex Demon: Put a -1/-1 counter on each other creature.',
  '{B}, {T}, Sacrifice Doomed Necromancer: Return target creature card from your graveyard to the battlefield.',
  "{T}: Look at the top card of your library. If it's a land card, you may reveal it and put it into your hand.",
  'When Fanatic of Mogis enters the battlef

In [31]:
arena_txt_json = json.loads(json.dumps(arena_txt_dict).replace('\\u', 'utf').replace('\\n', ' _ '))
arena_txt_json['instances'][0:2]

['Flying, lifelink _ Pegasus creatures you control have lifelink. _ Constellation utf2014 Whenever an enchantment enters the battlefield under your control, create a 2/2 white Pegasus creature token with flying.',
 'Whenever Audacious Thief attacks, you draw a card and you lose 1 life.']

In [49]:
with open('arena_txt20.json', 'w') as f:
    json.dump(arena_txt_json, f)

In [53]:
s3.upload_file('arena_txt20.json', 'magicml-clean-data.dev', 'cards/arena_txt20.json')

In [9]:
arena_txt = list(arena_df.text)
arena_txt[0]

'Flying, lifelink\nPegasus creatures you control have lifelink.\nConstellation — Whenever an enchantment enters the battlefield under your control, create a 2/2 white Pegasus creature token with flying.'

In [10]:
arena_name = [(name + '-' + set_name).replace(' ','_') for name, set_name in zip(arena_df.name, arena_df.setCode)]
arena_name[0]

"Archon_of_Sun's_Grace-AJMP"

In [11]:
arena_txt[23]

'Blank'

In [12]:
embeddings = use_embed(arena_txt[:5])
print(embeddings.shape)

(5, 512)


In [69]:
# from sm output json
embeddings = pred_df\
    .assign(embeddings=lambda df: df.predictions.apply(np.asarray))\
    [['embeddings']]

In [70]:
corr = np.inner(embeddings, embeddings)
print(corr.shape)

(5, 5)


In [72]:
import plotly.express as px

In [73]:
card_df = pd.DataFrame(corr, columns=arena_name, index=arena_name)
card_df.head()

NameError: name 'arena_name' is not defined

In [29]:
card_df[["Archon_of_Sun's_Grace-AJMP"]].sort_values(by="Archon_of_Sun's_Grace-AJMP", ascending=False)

Unnamed: 0,Archon_of_Sun's_Grace-AJMP
Archon_of_Sun's_Grace-AJMP,1.000000
Archon_of_Sun's_Grace-THB,1.000000
"Alela,_Artful_Provocateur-ELD",0.868950
Ethereal_Absolution-RNA,0.832532
Depose_//_Deploy-RNA,0.817002
...,...
Clearwater_Pathway_//_Murkwater_Pathway-ZNR,-0.019223
Island-ANA,-0.019223
Wind_Strider-XLN,-0.023939
Living_Tempest-ZNR,-0.023939


In [107]:
test_card = 'Golos,_Tireless_Pilgrim'
test_card

'Golos,_Tireless_Pilgrim'

In [106]:
[card for card in card_df.columns if card.startswith('Golos')]

['Golos,_Tireless_Pilgrim']

In [108]:
card_df[[test_card]].sort_values(by=test_card, ascending=False)

Unnamed: 0,"Golos,_Tireless_Pilgrim"
3059,1.000000
1687,0.805584
1583,0.801413
2606,0.798853
835,0.796900
...,...
5170,-0.020036
3215,-0.020036
5212,-0.041830
4441,-0.041830


In [109]:
test_card = test_card.replace('_',' ')
arena_df.query('name == @test_card').text.values

array(['When Golos, Tireless Pilgrim enters the battlefield, you may search your library for a land card, put that card onto the battlefield tapped, then shuffle your library.\n{2}{W}{U}{B}{R}{G}: Exile the top three cards of your library. You may play them this turn without paying their mana costs.'],
      dtype=object)

In [112]:
test_name = arena_name[1583].replace('_',' ')
test_name

'Emergent Ultimatum'

In [113]:
arena_df.query('name == @test_name').text.values

array(['Search your library for up to three monocolored cards with different names and exile them. An opponent chooses one of those cards. Shuffle that card into your library. You may cast the other cards without paying their mana costs. Exile Emergent Ultimatum.'],
      dtype=object)

In [52]:
fig = px.imshow(card_df)
fig.show()

# Sagemaker workflow

In [2]:
import os
import pathlib
import boto3
from boto3.session import Session

def aws_connect(service, profile='default', session=False):
    # Connect to AWS with IAM Role
    sess = Session(profile_name=profile)

    try:
        resource = sess.resource(service)
        client = resource.meta.client

        if session:
            return resource, client, sess
        else:
            return resource, client
    except:
        client = sess.client(service)

        if session:
            return client, sess
        else:
            return client

In [3]:
_, s3, boto_sess = aws_connect('s3', 'lw2134', session=True)

In [12]:
MODELS_BUCKET = 'magicml-models.dev'

In [13]:
res = s3.list_objects_v2(
    Bucket=MODELS_BUCKET,
    Prefix='use-large'
)

model_files = [file['Key'] for file in res['Contents']]

In [None]:
# Tar the model.tar.gz package
# from models/use-large directory
'tar -czvf model.tar.gz 1 code'

In [6]:
import os
import sagemaker
from sagemaker.tensorflow.model import TensorFlowModel
from sagemaker.transformer import Transformer

In [54]:
sagemaker_session = sagemaker.Session(boto_session=boto_sess)
role = 'arn:aws:iam::553371509391:role/magicml-sagemaker'
bucket = 'magicml-models.dev'
prefix = 'use-large'
model_data = 's3://{}/{}/model.tar.gz'.format(bucket, prefix)

In [55]:
# The "Model" object doesn't create a SageMaker Model until a Transform Job or Endpoint is created.
tf_serving_model = TensorFlowModel(
    name='use-large',
    model_data=model_data,
    role=role,
    framework_version='2.3',
    sagemaker_session=sagemaker_session
)

In [56]:
# USE THIS ONE IN LAMBDA W/ PRE-CREATED MODEL INSTANCE IN SM
env_vars = {
    'SAGEMAKER_TFS_ENABLE_BATCHING':'true',
    'SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS':'100000000',
    'SAGEMAKER_TFS_MAX_BATCH_SIZE':'32',
    'SAGEMAKER_TFS_MAX_ENQUEUED_BATCHES':'10000'
}

output_path = 's3://magicml-inference.dev/use-large/'

tf_transformer = tf_serving_model.transformer(
    instance_count=1,
    instance_type='ml.m5.4xlarge',
    max_concurrent_transforms=32,
    output_path=output_path,
    env=env_vars
)

'''
tf_transformer = Transformer(
    sagemaker_session=sagemaker_session,
    model_name='use-large',
    instance_count=1,
    instance_type='ml.m5.4xlarge',
    strategy='MultiRecord',
    max_concurrent_transforms=32,
    output_path=output_path,
    env=env_vars
)
'''

"\ntf_transformer = Transformer(\n    sagemaker_session=sagemaker_session,\n    model_name='use-large',\n    instance_count=1,\n    instance_type='ml.m5.4xlarge',\n    strategy='MultiRecord',\n    max_concurrent_transforms=32,\n    output_path=output_path,\n    env=env_vars\n)\n"

In [58]:
input_path = 's3://magicml-clean-data.dev/cards/arena_txt10.jsons'

tf_transformer.transform(
    input_path,
    content_type='application/json',
    wait=False
)

In [229]:
s3.download_file('magicml-inference.dev', 'use-large/arena_txt17.json.out', 'embeddings.json')

In [230]:
pred_df = pd.read_json('embeddings.json')
pred_df

Unnamed: 0,predictions
0,"[-0.00138572545, 0.063942939, -0.0796460658, -..."
1,"[0.011156451000000001, 0.110008411, 0.01300614..."
2,"[0.0372243486, 0.07786072050000001, -0.0052833..."
3,"[0.0077040591299999996, 0.13170014300000002, -..."
4,"[0.069555454, 0.039440006, -0.0244939253, -0.0..."
5,"[0.010344215700000001, 0.10546942100000001, -0..."
6,"[-0.052046835400000005, 0.0982446596, -0.00892..."
7,"[-0.017581369700000002, 0.10219389200000001, -..."
8,"[0.0440738797, 0.0773351565, -0.0781300515, -0..."
9,"[0.0284371749, 0.11402348400000001, -0.0342804..."


In [100]:
env_vars = {
    'SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS':'10000000'
}

output_path = 's3://magicml-inference.dev/use-large/'

tf_transformer = tf_serving_model.transformer(
    instance_count=1,
    instance_type='ml.m5.2xlarge',
    max_concurrent_transforms=32,
    output_path=output_path,
    env=env_vars
)

Using already existing model: use-large
