In [7]:
import re
import spacy
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

In [2]:
# What is a lemma? 
# A lemma is a step past stemming in out NLP processing. It takes the meaning of the word while retaining relative juxaposition of the letters to encapsulate that meaning
# First thing we have to do is create a dataframe object
df = pd.read_csv(r'C:\Users\Alex Lucchesi\OneDrive\Desktop\bonfire-119-mongo-and-streamlit\oracle_cards.csv', low_memory=False)
df.head()

Unnamed: 0,object,id,oracle_id,multiverse_ids,mtgo_id,mtgo_foil_id,tcgplayer_id,cardmarket_id,name,lang,...,frame_effects,tcgplayer_etched_id,promo_types,loyalty,life_modifier,hand_modifier,attraction_lights,color_indicator,content_warning,flavor_name
0,card,86bf43b1-8d4e-4759-bb2d-0b2e03ba7012,0004ebd0-dfd6-4276-b4a6-de0003e94237,[15862],15870.0,15871.0,3094.0,3081.0,Static Orb,en,...,,,,,,,,,,
1,card,7050735c-b232-47a6-a342-01795bfd0d46,0006faf6-7a61-426c-9034-579f2cfcfa83,[370780],49283.0,49284.0,69965.0,262945.0,Sensory Deprivation,en,...,,,,,,,,,,
2,card,e718b21b-46d1-4844-985c-52745657b1ac,0007c283-5b7a-4c00-9ca1-b455c8dff8c3,[470580],77122.0,,196536.0,391692.0,Road of Return,en,...,,,,,,,,,,
3,card,036ef8c9-72ac-46ce-af07-83b79d736538,000d5588-5a4c-434e-988d-396632ade42c,[83282],22609.0,22610.0,12835.0,12551.0,Storm Crow,en,...,,,,,,,,,,
4,card,b125d1e7-5d9b-4997-88b0-71bdfc19c6f2,000e5d65-96c3-498b-bd01-72b1a1991850,[12380],12637.0,12638.0,6412.0,10604.0,Walking Sponge,en,...,,,,,,,,,,


In [4]:
# Let's preview the column we are trying to transform!
df['oracle_text']

0        As long as Static Orb is untapped, players can...
1         Enchant creature\nEnchanted creature gets -3/-0.
2        Choose one —\n• Return target permanent card f...
3        Flying (This creature can't be blocked except ...
4        {T}: Target creature loses your choice of flyi...
                               ...                        
28924    Target creature you control gains indestructib...
28925    Red instant and sorcery spells you control hav...
28926                                                  NaN
28927    +1: Up to one target creature gains double str...
28928                      All Sliver creatures get +1/+1.
Name: oracle_text, Length: 28929, dtype: object

In [5]:
!python -m spacy download en_core_web_md

Collecting en-core-web-md==3.5.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.5.0/en_core_web_md-3.5.0-py3-none-any.whl (42.8 MB)
                                              0.0/42.8 MB ? eta -:--:--
                                              0.1/42.8 MB 3.2 MB/s eta 0:00:14
                                              0.2/42.8 MB 3.4 MB/s eta 0:00:13
                                              0.4/42.8 MB 3.3 MB/s eta 0:00:14
                                              0.6/42.8 MB 3.8 MB/s eta 0:00:12
                                              0.8/42.8 MB 4.0 MB/s eta 0:00:11
                                              1.0/42.8 MB 4.1 MB/s eta 0:00:11
     -                                        1.2/42.8 MB 4.2 MB/s eta 0:00:10
     -                                        1.4/42.8 MB 4.1 MB/s eta 0:00:10
     -                                        1.7/42.8 MB 4.2 MB/s eta 0:00:10
     -                                   

In [6]:
# Dropping all null values from the oracle text column
df.dropna(subset=['oracle_text'], axis = 0, inplace=True)

# Dropping all values from the oracle text column that are empty
df.drop(df.index[df['oracle_text'] == ''], inplace = True)

In [8]:
# Using regex, we remove all non alpha-numeric values from our column before lemmatizing
df['oracle_text'] = [re.sub('[^0-9a-zA-Z]+', " ", i) for i in df.oracle_text]
df['oracle_text']

0        As long as Static Orb is untapped players can ...
1            Enchant creature Enchanted creature gets 3 0 
2        Choose one Return target permanent card from y...
3        Flying This creature can t be blocked except b...
4         T Target creature loses your choice of flying...
                               ...                        
28922    When Quarry Beetle enters the battlefield you ...
28924    Target creature you control gains indestructib...
28925    Red instant and sorcery spells you control hav...
28927     1 Up to one target creature gains double stri...
28928                        All Sliver creatures get 1 1 
Name: oracle_text, Length: 26627, dtype: object

In [9]:
# Using spacy! First, we need instantiate the dictionary that we just downloaded
nlp = spacy.load('en_core_web_md')
lemmas = []
for doc in df['oracle_text']:
    lemmas.append([token.lemma_.lower().strip() for token in nlp(str(doc)) if (token.is_stop != True) and (token.is_punct != True) and (token.is_space != True)])
df['lemmas'] = lemmas

In [10]:
df

Unnamed: 0,object,id,oracle_id,multiverse_ids,mtgo_id,mtgo_foil_id,tcgplayer_id,cardmarket_id,name,lang,...,tcgplayer_etched_id,promo_types,loyalty,life_modifier,hand_modifier,attraction_lights,color_indicator,content_warning,flavor_name,lemmas
0,card,86bf43b1-8d4e-4759-bb2d-0b2e03ba7012,0004ebd0-dfd6-4276-b4a6-de0003e94237,[15862],15870.0,15871.0,3094.0,3081.0,Static Orb,en,...,,,,,,,,,,"[long, static, orb, untapped, player, t, untap..."
1,card,7050735c-b232-47a6-a342-01795bfd0d46,0006faf6-7a61-426c-9034-579f2cfcfa83,[370780],49283.0,49284.0,69965.0,262945.0,Sensory Deprivation,en,...,,,,,,,,,,"[enchant, creature, enchanted, creature, get, ..."
2,card,e718b21b-46d1-4844-985c-52745657b1ac,0007c283-5b7a-4c00-9ca1-b455c8dff8c3,[470580],77122.0,,196536.0,391692.0,Road of Return,en,...,,,,,,,,,,"[choose, return, target, permanent, card, grav..."
3,card,036ef8c9-72ac-46ce-af07-83b79d736538,000d5588-5a4c-434e-988d-396632ade42c,[83282],22609.0,22610.0,12835.0,12551.0,Storm Crow,en,...,,,,,,,,,,"[fly, creature, t, block, creature, flying, re..."
4,card,b125d1e7-5d9b-4997-88b0-71bdfc19c6f2,000e5d65-96c3-498b-bd01-72b1a1991850,[12380],12637.0,12638.0,6412.0,10604.0,Walking Sponge,en,...,,,,,,,,,,"[t, target, creature, lose, choice, fly, strik..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28922,card,69e11478-bfc7-4bcc-b65c-dc2d4449e99f,fff66029-24bb-4354-a650-99a205fd168f,[430816],64738.0,64739.0,136709.0,298854.0,Quarry Beetle,en,...,,,,,,,,,,"[quarry, beetle, enter, battlefield, return, t..."
28924,card,d6695fa8-881c-407c-91d9-3ac770372d35,fffa9334-3576-4f70-9605-2ad062cdbc69,[430770],64646.0,64647.0,136683.0,298834.0,Without Weakness,en,...,,,,,,,,,,"[target, creature, control, gain, indestructib..."
28925,card,4d5edf4f-5695-42fc-9e57-c4faef60fbc3,fffaa634-42a0-4038-b027-24f28754fec7,[571548],101914.0,,276869.0,665173.0,Firesong and Sunspeaker,en,...,,,,,,,,,,"[red, instant, sorcery, spell, control, lifeli..."
28927,card,09f06f55-7918-46c4-80ff-0bf39e091a4a,fffdc2ac-bde4-4e4c-a5bd-0e6c6e49ad91,[430833],64772.0,64773.0,132234.0,298282.0,"Samut, the Tested",en,...,,,4,,,,,,,"[1, target, creature, gain, double, strike, en..."


In [11]:
# Create a vectorizor to save our vocabulary and map out our lemmas to a euclidean plane

# To do so, we will create a function called dummy_fun that takes in a doc and returns it
def dummy_fun(doc):
    return doc

# Create vect:
vect = TfidfVectorizer(preprocessor=dummy_fun,
                       token_pattern=None,
                       tokenizer=dummy_fun)

# Fit the vectorizer
vect.fit(df.lemmas)

In [17]:
# Last step for our modeling is to create a model and fit it
model = NearestNeighbors(n_neighbors=12)
model.fit(vect.transform(df.lemmas))

x, y = model.kneighbors(vect.transform(df['lemmas'][df['name'] == 'The World Tree']))
for val in y:
    display(df['name'][val])

5321     Dusk Legion Duelist
17502       Skyline Predator
21721       Act of Authority
6563            Dimir Locket
14654          Myr Reservoir
330           Jasconian Isle
16177        Pterodon Knight
24391       Brush with Death
20936     Crystalline Sliver
9553              Blood Lust
11560              Persecute
21832      Temporal Trespass
Name: name, dtype: object

In [2]:
from model import dummy_func, Model
m = Model()
m.recommended_cards('sol ring')

['Sol', 'Ring']
['Thran', 'Dynamo']
["Sisay's", 'Ring']
["Kozilek's", 'Channeler']
["Ur-Golem's", 'Eye']
['Sunastian', 'Falconer']
['Weaver', 'of', 'Currents']
['Palladium', 'Myr']
['Apprentice', 'Wizard']
['Chromatic', 'Orrery']
["Ashnod's", 'Altar']
["Mishra's", 'Workshop']


[<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=488x680>,
 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=488x680>,
 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=488x680>,
 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=488x680>,
 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=488x680>,
 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=488x680>,
 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=488x680>,
 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=488x680>,
 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=488x680>,
 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=488x680>,
 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=488x680>]