# Project Fletcher
# Magic the Gathering: The Trading Card Game

### Kaushik Vasudevan 11/11/16

In [22]:
from __future__ import print_function, division
import pandas as pd 
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

from gensim import corpora, models, similarities, matutils

from sklearn import datasets
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.cross_validation import train_test_split
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors
import sklearn.metrics.pairwise as smp

from sklearn.decomposition import NMF

from sklearn.externals import joblib

# import logging
# logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)


# Data
Pulled from Kaggle dataset.

In [23]:
magic_cards = pd.read_csv("MagicDatasets/Magic_Pandas_DF")

In [24]:
magic_cards = magic_cards.drop(magic_cards.columns[0], axis=1)

# Unsupervised Learning Models
- Used ______ model
- Used ______ model
- Used ______ model

In [25]:
magic_cards_fill = magic_cards.fillna(" ")
magic_cards_fill['combined_text'] = magic_cards_fill['name']+" "+magic_cards_fill['type']+" "+magic_cards_fill['colors']+" "+magic_cards_fill["text"]+" "+magic_cards_fill["flavor"]+" "+magic_cards_fill["rarity"]
magic_cards_s = magic_cards_fill['combined_text']
magic_list = magic_cards_s.tolist
magic_list = list(magic_cards_s)

In [26]:
tfidf = TfidfVectorizer(stop_words="english", 
                        token_pattern="\\b[a-zA-Z0-9][a-zA-Z0-9]+\\b", 
                        min_df=10)


In [27]:
tfidf_vecs = tfidf.fit_transform(magic_list)


# Combining text dataframe and original dataframe

In [28]:
tfidif_df = pd.DataFrame(tfidf_vecs.todense(), 
             columns=tfidf.get_feature_names()
            )

In [29]:
new_cards = magic_cards
new_cards = new_cards.fillna(0.0)

magic_cards_fill_cut = new_cards.iloc[:,[2,11,21,34]]
magic_cards_fill_cut.info()

dummied_df = pd.get_dummies(magic_cards_fill_cut)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31705 entries, 0 to 31704
Data columns (total 4 columns):
cmc          31705 non-null float64
loyalty      31705 non-null float64
power        31705 non-null object
toughness    31705 non-null object
dtypes: float64(2), object(2)
memory usage: 990.9+ KB


In [30]:
combined_df = pd.concat([dummied_df, tfidif_df], axis=1, join_axes=[magic_cards_fill_cut.index])

## KNN Model

In [31]:
model = NearestNeighbors(n_neighbors=20,n_jobs=-1)

In [32]:
knn = model.fit(combined_df)


## LSI Model

In [33]:
tfidf_corpus = matutils.Sparse2Corpus(tfidf_vecs.transpose())

id2word = dict((v, k) for k, v in tfidf.vocabulary_.items())

id2word = corpora.Dictionary.from_corpus(tfidf_corpus, id2word=id2word)

In [34]:
lsi = models.LsiModel(tfidf_corpus, id2word=id2word, num_topics=300)

In [35]:
lsi_corpus = lsi[tfidf_corpus]

doc_vecs = [doc for doc in lsi_corpus]

In [36]:
sim_index = similarities.MatrixSimilarity(doc_vecs, num_features=len(id2word))

# Card Thesaurus

### Pick a card:

In [37]:
name = "Elspeth, Knight-Errant"
card_index = (magic_cards_fill[magic_cards_fill['name']==name]).index.tolist()
card_index = card_index[0]

distances, indices = knn.kneighbors(combined_df.iloc[card_index,:])

index = indices[0]
distance = distances[0]



### Use LSI model:

In [38]:
# Let's take a look at how we did
sims = sorted(enumerate(sim_index[doc_vecs[card_index]]), key=lambda item: -item[1])

nearest_list = []
i = 0
for sim_doc_id, sim_score in sims[0:20]: 
    card_name = magic_cards.iloc[sim_doc_id,16]
    if card_name not in nearest_list:
        if i<6:
            nearest_list.append(card_name)
            print (str(i) + ".")
            print (card_name)
            print("Score: " + str(sim_score))
            print("Text on Card: " + magic_list[sim_doc_id])
            print ("------------")
            i+=1
        else:
            continue
    else:
        continue

print ("This is the chosen card and the 5 closest cards: "+str(nearest_list))

0.
Elspeth, Knight-Errant
Score: 1.0
Text on Card: Elspeth, Knight-Errant Planeswalker — Elspeth [White] +1: Put a 1/1 white Soldier creature token onto the battlefield.
+1: Target creature gets +3/+3 and gains flying until end of turn.
−8: You get an emblem with "Artifacts, creatures, enchantments, and lands you control have indestructible."   Mythic Rare
------------
1.
Elspeth, Sun's Champion
Score: 0.571108
Text on Card: Elspeth, Sun's Champion Planeswalker — Elspeth [White] +1: Put three 1/1 white Soldier creature tokens onto the battlefield.
−3: Destroy all creatures with power 4 or greater.
−7: You get an emblem with "Creatures you control get +2/+2 and have flying."   Mythic Rare
------------
2.
Elspeth Tirel
Score: 0.54558
Text on Card: Elspeth Tirel Planeswalker — Elspeth [White] +2: You gain 1 life for each creature you control.
−2: Put three 1/1 white Soldier creature tokens onto the battlefield.
−5: Destroy all other permanents except for lands and tokens.   Mythic Rare
--

### Use KNN model:

In [39]:
nearest_list = []
i=0
for k in range(0,len(index)):
    card_name = magic_cards.iloc[index[k],16]
    if card_name not in nearest_list:
        if i<6:
            nearest_list.append(card_name)
            print (str(i) + ".")
            print (card_name)
            print ("Distance Away: "+str(distance[k]))
            print ("CMC: "+str(magic_cards.iloc[index[k],2])+" Power/Toughness: "+str(magic_cards.iloc[index[k],21])+"/"+str(magic_cards.iloc[index[k],21]))
            print ("Type: "+str(magic_cards.iloc[index[k],35]))
            print ("Text: "+str(magic_cards.iloc[index[k],32]))
            print ("Flavor: "+str(magic_cards.iloc[index[k],5]))
            print ("------------")
            i+=1
        else:
            continue
    else:
        continue

print ("This is the chosen card and the 5 closest cards: "+str(nearest_list))

0.
Elspeth, Knight-Errant
Distance Away: 0.0
CMC: 4.0 Power/Toughness: nan/nan
Type: Planeswalker — Elspeth
Text: +1: Put a 1/1 white Soldier creature token onto the battlefield.
+1: Target creature gets +3/+3 and gains flying until end of turn.
−8: You get an emblem with "Artifacts, creatures, enchantments, and lands you control have indestructible."
Flavor: nan
------------
1.
Ajani Steadfast
Distance Away: 1.18545311408
CMC: 4.0 Power/Toughness: nan/nan
Type: Planeswalker — Ajani
Text: +1: Until end of turn, up to one target creature gets +1/+1 and gains first strike, vigilance, and lifelink.
−2: Put a +1/+1 counter on each creature you control and a loyalty counter on each other planeswalker you control.
−7: You get an emblem with "If a source would deal damage to you or a planeswalker you control, prevent all but 1 of that damage."
Flavor: nan
------------
2.
Gideon, Ally of Zendikar
Distance Away: 1.20268889827
CMC: 4.0 Power/Toughness: nan/nan
Type: Planeswalker — Gideon
Text: +

## Store models

In [40]:
#joblib.dump(knn, 'models/magic_knn.pkl')

In [41]:
#joblib.dump(sim_index, 'models/magic_lsi.pkl')

In [42]:
#combined_df.to_csv('Magic_Combined_DF',encoding='utf-8')

## Try NMF

nmf_data = (combined_df + 1).values

model = NMF(n_components=300)

nmf = model.fit(nmf_data)

type(nmf)

## Try KDTree

from sklearn.neighbors import KDTree

kdt = KDTree(nmf, leaf_size=30, metric='euclidean')
kdt.query(nmf, k=5, return_distance=False)