# This notebook is the start of the "trial and error approach"

References:

- https://github.com/xurxodiz/cardwalker/tree/master/oracle
- https://laterna--magica.blogspot.com/2011/10/oracle-parser.html

In [None]:
import json
import nltk
import pandas as pd
import re
from collections import defaultdict

In [None]:
sets = json.load(open('./AllSets.json', 'rb'))

In [None]:
for k, v in sorted(sets.items()):
    print(k, v['name'])

In [None]:
cards_usaga = sets['USG']['cards']

In [None]:
cards_usaga

In [None]:
cards_all=[]
for k, sett in sets.items():
    if (k in ['UGL', 'UST']) or (len(k)>3): # Ignore Unglued, Unstable and promotional things
        continue
    cards_all.extend(sett['cards'])    

# Let's start by trying to extract static habilities from cards

In [None]:
texts = [card['text'].replace(card['name'], 'SELF') for card in cards_usaga if 'text' in card.keys()]

In [None]:
patterns = [
    (r'^([A-Za-z]+ ?[A-Za-z]+)[$|\n|,]| \(', 'STATICABILITY'),
    (r', ([A-Za-z]+ ?[A-Za-z]+)[$|\n||,]| \(', 'STATICABILITY'),
]

In [None]:
#regexp_tagger = nltk.RegexpTagger(patterns)

In [None]:
res = defaultdict(list)
r=None
for text in texts:
#     if r: break
    for pat, tag in patterns:
        r = re.search(pat, text)
        if r:
            res[text].append((r.groups(), tag))
#             break
pretty = pd.DataFrame.from_dict(res, orient='index')
pretty

# DF version: Let's start by trying to extract static habilities from cards

In [None]:
#cards_df = pd.DataFrame.from_dict(cards_usaga)
cards_df = pd.DataFrame.from_dict(cards_all)

In [None]:
cards_df['text_preworked'] = cards_df.apply(lambda x: str(x['text']).replace(x['name'], 'SELF'), axis=1)

In [None]:
cards_df.head(4).transpose()

In [None]:
#regexp_tagger = nltk.RegexpTagger(patterns)

In [None]:
patterns_static_abilities = [
    (r'^(?!(Oubleday ikestray|Combined|Enchant|Choose|Target))([A-Z][a-z]+ ?[A-za-z]+)[$|\n|,]| \(', 'STATICABILITY'),
    (r', (?!(Choose|Scheming))([A-Z][a-z]+ ?[A-za-z]+)[$|\n||,]| \(', 'STATICABILITY'),
]

def detect_staticabilities(text):
    res = []# defaultdict(list)
    r = None
    for pat, tag in patterns_static_abilities:
        r = re.search(pat, text)
        if r:
            for group in r.groups():
                if group: res.append(group)
    
    return tuple(res) or pd.np.nan

cards_df['static_abilities'] = cards_df['text_preworked'].apply(detect_staticabilities)
cards_df.head(10).transpose()

In [None]:
cards_df.dropna(subset=['static_abilities'])['static_abilities'].unique()

In [None]:
cards_df[cards_df['static_abilities']==('Phasing',)].text.values

# Domain specific vocabulary

Let's build some domain specific vocabulary for MTG. For example, let's list supertypes, types, subtypes, know all card names, this kind f thing.

In [None]:
# Create set of cards names
cards_names = set(cards_df.name.unique())

In [None]:
# Create set of supertypes
array_of_supertypes_tuples = cards_df['supertypes'].dropna().apply(tuple).unique()
cards_supertypes = tuple()
for tup in array_of_supertypes_tuples:
    cards_supertypes += tup
    
cards_supertypes = set(cards_supertypes)
cards_supertypes

In [None]:
# Create set of types
array_of_types_tuples = cards_df['types'].dropna().apply(tuple).unique()
cards_types = tuple()
for tup in array_of_types_tuples:
    cards_types += tup
    
cards_types = set(cards_types)
cards_types

In [None]:
# Create set of types
array_of_subtypes_tuples = cards_df['subtypes'].dropna().apply(tuple).unique()
cards_subtypes = tuple()
for tup in array_of_subtypes_tuples:
    cards_subtypes += tup
    
cards_subtypes = set(cards_subtypes)
cards_subtypes

In [None]:
cards_df.head(10).transpose()

In [None]:
import requests
r = requests.get('http://media.wizards.com/2018/downloads/MagicCompRules%2020180713.txt')
comprules = r.text
kw_abilities_pat = r'702\.\d+\. ([A-Za-z ]+)'
abilities = re.findall(kw_abilities_pat, comprules)
abilities.pop(0) # Its just the rulings 
abilities

# Spacy

In [None]:
test_sentence = cards_df[cards_df['static_abilities']==('Phasing',)].text.values[0]
test_sentence = test_sentence +'\nWhenever SELF attacks, it gets +1/+1.'
test_sentence

In [None]:
import spacy
nlp = spacy.load('en_core_web_sm')
doc = nlp('Hello World!')
for token in doc:
    print('"' + token.text + '"')

In [None]:
doc = nlp('Hello     World!')
for token in doc:
    print('"' + token.text + '"', token.idx)

In [None]:
doc = nlp("Next week I'll   be in Madrid.")
for token in doc:
    print("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}".format(
        token.text,
        token.idx,
        token.lemma_,
        token.is_punct,
        token.is_space,
        token.shape_,
        token.pos_,
        token.tag_
    ))

In [None]:
doc = nlp(test_sentence)
for token in doc:
    print("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}".format(
        token.text,
        token.idx,
        token.lemma_,
        token.is_punct,
        token.is_space,
        token.shape_,
        token.pos_,
        token.tag_
    ))

In [None]:
# Sentence detection
doc = nlp("These are apples. These are oranges.")
 
for sent in doc.sents:
    print(sent)

In [None]:
# Sentence detection
doc = nlp(test_sentence)
 
for sent in doc.sents:
    print(sent)

In [None]:
# POS tagging
doc = nlp(test_sentence)
print([(token.text, token.tag_) for token in doc])

In [None]:
# NER named entity recognition
doc = nlp("Next week I'll be in Madrid.")
for ent in doc.ents:
    print(ent.text, ent.label_)

In [None]:
from nltk.chunk import conlltags2tree
 
doc = nlp("Next week I'll be in Madrid.")
iob_tagged = [
    (
        token.text, 
        token.tag_, 
        "{0}-{1}".format(token.ent_iob_, token.ent_type_) if token.ent_iob_ != 'O' else token.ent_iob_
    ) for token in doc
]
 
print(iob_tagged)
 
# In case you like the nltk.Tree format
print(conlltags2tree(iob_tagged))
 

In [None]:
from spacy import displacy
 
doc = nlp('I just bought 2 shares at 9 a.m. because the stock went up 30% in just 2 days according to the WSJ')
displacy.render(doc, style='ent', jupyter=True)

In [None]:
# Noun phrases
doc = nlp("Wall Street Journal just published an interesting piece on crypto currencies")
for chunk in doc.noun_chunks:
    print(chunk.text, chunk.label_, chunk.root.text)

In [None]:
# Dependency parser
doc = nlp('Wall Street Journal just published an interesting piece on crypto currencies')
 
for token in doc:
    print("{0}/{1} <--{2}-- {3}/{4}".format(
        token.text, token.tag_, token.dep_, token.head.text, token.head.tag_))

In [None]:
doc = nlp('Wall Street Journal just published an interesting piece on crypto currencies')
displacy.render(doc, style='dep', jupyter=True, options={'distance': 90})


In [None]:
doc = nlp(test_sentence)
displacy.render(doc, style='dep', jupyter=True, options={'distance': 90})


In [None]:
nlp = spacy.load('en_core_web_lg')
print(nlp.vocab['banana'].vector)

# For testing

In [None]:
nltk.download('all')

In [None]:
# https://www.nltk.org/book/ch10.html section 5.2
dt = nltk.DiscourseTester(['A student dances', 'Every student is a person'])
dt.readings()


In [None]:
dt.add_sentence('No person dances', consistchk=True)

In [None]:
dt.retract_sentence('No person dances', verbose=True)

In [None]:
dt.add_sentence('A person dances', informchk=True)

In [None]:
from nltk.tag import RegexpTagger
tagger = RegexpTagger(
    [('^(chases|runs)$', 'VB'),
     ('^(a)$', 'ex_quant'),
     ('^(every)$', 'univ_quant'),
     ('^(dog|boy)$', 'NN'),
     ('^(He)$', 'PRP')
])
rc = nltk.DrtGlueReadingCommand(depparser=nltk.MaltParser(tagger=tagger))
dt = nltk.DiscourseTester(['Every dog chases a boy', 'He runs'], rc)
dt.readings()