In [1]:
import pickle
from sklearn.model_selection import train_test_split
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
import nltk
from nltk.corpus import wordnet as wn
from nltk.wsd import lesk
from typing import Iterable, List, Set, Callable, Optional, Union, Sequence
from grasp import GrASP, CustomAttribute

## Load the data
- Download and unzip the spam dataset **if you have not done this before**

In [None]:
import urllib.request
url = 'http://www.dt.fee.unicamp.br/~tiago/smsspamcollection/smsspamcollection.zip'
filename = './data/smsspamcollection.zip'
urllib.request.urlretrieve(url, filename)

In [None]:
!unzip ./data/smsspamcollection.zip -d ./data

- Load the data

In [3]:
def get_data():
    f = open('data/SMSSpamCollection.txt', 'r')
    texts, labels = [], []
    for line in f:
        line = line.strip()
        tab_idx = line.index('\t')
        label = line[:tab_idx]
        text = line[tab_idx+1:]
        if label == 'ham':
            label = 0
        elif label == 'spam':
            label = 1
        else:
            raise Exception(f"Invalid label - {label}")
        texts.append(text)
        labels.append(label)
    return texts, labels

In [4]:
texts, labels = get_data()
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=1)
len(texts), sum(labels), len(X_test), sum(y_test)

(5574, 747, 1115, 138)

In [5]:
positive = [t for idx, t in enumerate(X_train) if y_train[idx]]
negative = [t for idx, t in enumerate(X_train) if not y_train[idx]]

## Example 1

In [6]:
# Create the GrASP engine
grasp_model = GrASP(gaps_allowed = 0, num_patterns = 100)

In [7]:
# Fit GrASP to the dataset
the_patterns = grasp_model.fit_transform(positive, negative)

  0%|          | 0/488 [00:00<?, ?it/s]

Step 1: Create augmented texts


100%|██████████| 488/488 [00:35<00:00, 13.56it/s]
100%|██████████| 3079/3079 [02:26<00:00, 20.99it/s]


Step 2: Find frequent attributes


  0%|          | 4/1336 [00:00<00:39, 33.70it/s]

Total number of candidate alphabet = 1336, such as ['SPACY:DEP-ROOT', 'SPACY:POS-VERB', 'SPACY:POS-NOUN', 'SPACY:DEP-punct', 'SPACY:POS-PUNCT']
Step 3: Find alphabet set


100%|██████████| 1336/1336 [00:52<00:00, 25.49it/s]


Finding top k: 10 / 100
Finding top k: 20 / 100
Finding top k: 30 / 100
Finding top k: 40 / 100
Finding top k: 50 / 100
Finding top k: 60 / 100
Finding top k: 70 / 100
Finding top k: 80 / 100
Finding top k: 90 / 100
Finding top k: 100 / 100


  0%|          | 0/100 [00:00<?, ?it/s]

Total number of alphabet = 100
['SPACY:POS-NUM', 'SPACY:DEP-compound', 'SPACY:NER-ORG', 'SPACY:POS-PROPN', 'SPACY:NER-DATE', 'TEXT:call', 'HYPERNYM:communication.n.02', 'SPACY:DEP-amod', 'TEXT:i', 'SPACY:POS-SYM', 'TEXT:free', 'TEXT:txt', 'TEXT:claim', 'TEXT:!', 'HYPERNYM:message.n.02', 'TEXT:mobile', 'SPACY:DEP-pobj', 'HYPERNYM:transferred_property.n.01', 'HYPERNYM:abstraction.n.06', 'TEXT:to', 'TEXT:prize', 'HYPERNYM:relation.n.01', 'HYPERNYM:win.v.01', 'SPACY:DEP-appos', 'SENTIMENT:pos', 'HYPERNYM:act.n.02', 'HYPERNYM:object.n.01', 'SPACY:NER-PERSON', 'TEXT:your', 'SPACY:POS-ADJ', 'TEXT:or', 'HYPERNYM:symbol.n.01', 'TEXT:text', 'HYPERNYM:entity.n.01', 'HYPERNYM:statement.n.01', 'TEXT:stop', 'SPACY:DEP-nmod', 'HYPERNYM:converse.v.01', 'HYPERNYM:book.n.01', 'TEXT:-', 'TEXT:150p', 'TEXT:guaranteed', 'TEXT:urgent', 'TEXT:win', 'HYPERNYM:written_communication.n.01', 'TEXT:+', 'SPACY:POS-PRON', 'TEXT:16', 'HYPERNYM:cash.n.02', 'HYPERNYM:artifact.n.01', 'HYPERNYM:creation.n.02', 'TEXT:from

100%|██████████| 100/100 [02:18<00:00,  1.38s/it]


Length 2 / 5; New candidates = 14950
Finding top k: 10 / 100
Finding top k: 20 / 100
Finding top k: 30 / 100
Finding top k: 40 / 100
Finding top k: 50 / 100
Finding top k: 60 / 100
Finding top k: 70 / 100
Finding top k: 80 / 100
Finding top k: 90 / 100
Finding top k: 100 / 100
Example of current patterns
Pattern: [['SPACY:POS-NUM']]
Window size: 1
Class: Negative
Precision: 0.526
Match: 936 (26.2%)
Gain = 0.223
[5m[7m[32mExamples[0m ~ Class Negative:
[5m[32m[MATCH][0m: Do whatever you want . You know what the rules are . We had a talk earlier this week about what had to start happening , you showing responsibility . Yet , every week it 's can i bend the rule this way ? What about that way ? Do whatever . I 'm tired of having thia same argument with you every week . And a   & lt;#&gt ;   movie DOESNT inlude the previews . You 're still getting in after [7m[36m1:['SPACY:POS-NUM'][0m . 
-------------------------
[5m[32m[MATCH][0m: I not free today i haf [7m[36m2:['SPACY:POS

100%|██████████| 73/73 [01:41<00:00,  1.39s/it]


Length 3 / 5; New candidates = 14337
Finding top k: 10 / 100
Finding top k: 20 / 100
Finding top k: 30 / 100
Finding top k: 40 / 100
Finding top k: 50 / 100
Finding top k: 60 / 100
Finding top k: 70 / 100
Finding top k: 80 / 100
Finding top k: 90 / 100
Finding top k: 100 / 100
Example of current patterns
Pattern: [['SPACY:POS-NUM']]
Window size: 1
Class: Negative
Precision: 0.526
Match: 936 (26.2%)
Gain = 0.223
[5m[7m[32mExamples[0m ~ Class Negative:
[5m[32m[MATCH][0m: I luv u soo much u donÂ’t understand how special u r [7m[36m2:['SPACY:POS-NUM'][0m me ring u 2morrow luv u xxx 
-------------------------
[5m[32m[MATCH][0m: Derp . Which is worse , a dude who always wants to party or a dude who files a complaint about the [7m[36mthree:['SPACY:POS-NUM'][0m drug abusers he lives with 
-------------------------
[5m[7m[31mCounterexamples[0m ~ Not class Negative:
[5m[32m[MATCH][0m: You have been selected to stay in [7m[36m1:['SPACY:POS-NUM'][0m of 250 top British hot

100%|██████████| 22/22 [00:26<00:00,  1.20s/it]


Length 4 / 5; New candidates = 4357
Finding top k: 10 / 100
Finding top k: 20 / 100
Finding top k: 30 / 100
Finding top k: 40 / 100
Finding top k: 50 / 100
Finding top k: 60 / 100
Finding top k: 70 / 100
Finding top k: 80 / 100
Finding top k: 90 / 100


0it [00:00, ?it/s]

Finding top k: 100 / 100
Example of current patterns
Pattern: [['SPACY:POS-NUM']]
Window size: 1
Class: Negative
Precision: 0.526
Match: 936 (26.2%)
Gain = 0.223
[5m[7m[32mExamples[0m ~ Class Negative:
[5m[32m[MATCH][0m: is your hamster dead ? Hey so tmr i meet you at [7m[36m1:['SPACY:POS-NUM'][0m pm orchard mrt ? 
-------------------------
[5m[32m[MATCH][0m: I thk [7m[36m50:['SPACY:POS-NUM'][0m shd be ok he said plus minus 10 .. Did Ã¼ leave a line in between paragraphs ? 
-------------------------
[5m[7m[31mCounterexamples[0m ~ Not class Negative:
[5m[32m[MATCH][0m: Not heard from U4 a while . Call [7m[36m4:['SPACY:POS-NUM'][0m rude chat private line 01223585334 to cum . Wan 2C pics of me gettin shagged then text PIX to 8552 . 2End send STOP 8552 SAM xxx 
-------------------------
[5m[32m[MATCH][0m: [7m[36m449050000301:['SPACY:POS-NUM'][0m You have won a Â£2,000 price ! To claim , call 09050000301 . 
-------------------------
Pattern: [['SPACY:NER-DATE




In [8]:
# Print the patterns
for idx, p in enumerate(the_patterns):
    print(f'Rank {idx}')
    print(p)

Rank 0
Pattern: [['SPACY:POS-NUM']]
Window size: 1
Class: Negative
Precision: 0.526
Match: 936 (26.2%)
Gain = 0.223
[5m[7m[32mExamples[0m ~ Class Negative:
[5m[32m[MATCH][0m: [7m[36mOne:['SPACY:POS-NUM'][0m of best dialogue in cute reltnship .. ! ! " Wen i Die , Do nt Come Near My Body .. ! ! Bcoz My Hands May Not Come 2 Wipe Ur Tears Off That Time .. !Gud ni8 
-------------------------
[5m[32m[MATCH][0m: Hi i m having the most relaxing time ever ! we have to get up at [7m[36m7:['SPACY:POS-NUM'][0m am every day ! was the party good the other night ? I get home tomorrow at 5ish . 
-------------------------
[5m[7m[31mCounterexamples[0m ~ Not class Negative:
[5m[32m[MATCH][0m: [7m[36m83039:['SPACY:POS-NUM'][0m 62735=Â£450 UK Break AccommodationVouchers terms & conditions apply . 2 claim you mustprovide your claim number which is 15541 
-------------------------
[5m[32m[MATCH][0m: WINNER ! As a valued network customer you hvae been selected to receive a Â£900 r

Rank 28
Pattern: [['HYPERNYM:abstraction.n.06'], ['HYPERNYM:entity.n.01']]
Window size: 2
Class: Negative
Precision: 0.593
Match: 637 (17.9%)
Gain = 0.077
[5m[7m[32mExamples[0m ~ Class Negative:
[5m[32m[MATCH][0m: Yeah if we do have to get a random dude we need to change our [7m[36minfo:['HYPERNYM:abstraction.n.06'][0m [7m[36msheets:['HYPERNYM:entity.n.01'][0m to PARTY   & lt;#&gt ; /7 NEVER STUDY just to be safe 
-------------------------
[5m[32m[MATCH][0m: Playin [7m[36mspace:['HYPERNYM:abstraction.n.06'][0m [7m[36mpoker:['HYPERNYM:entity.n.01'][0m , u ? 
-------------------------
[5m[7m[31mCounterexamples[0m ~ Not class Negative:
[5m[32m[MATCH][0m: Had your mobile 11 months or more ? [7m[36mU:['HYPERNYM:abstraction.n.06'][0m [7m[36mR:['HYPERNYM:entity.n.01'][0m entitled to Update to the latest colour mobiles with camera for Free ! Call The Mobile Update Co FREE on 08002986030 
-------------------------
[5m[32m[MATCH][0m: Ur cash - balance is cur



In [9]:
for idx, p in enumerate(the_patterns):
    print(f'{idx:>3} {p.support_class}   {round(p.coverage*100, 1):>4}   {p.precision:.3f}   {p.get_pattern_id()}')

  0 Negative   26.2   0.526   [['SPACY:POS-NUM']]
  1 Positive    7.8   0.861   [['SPACY:NER-DATE', 'SPACY:POS-NUM']]
  2 Positive   12.6   0.641   [['SPACY:DEP-compound'], ['SPACY:POS-PROPN']]
  3 Positive   16.3   0.542   [['SPACY:DEP-compound', 'SPACY:POS-PROPN']]
  4 Negative   38.7   0.680   [['SPACY:DEP-compound']]
  5 Negative   21.8   0.559   [['SPACY:DEP-compound'], ['HYPERNYM:abstraction.n.06']]
  6 Negative   19.0   0.527   [['SPACY:NER-ORG']]
  7 Positive    8.6   0.708   [['HYPERNYM:abstraction.n.06'], ['SPACY:POS-PROPN']]
  8 Positive    8.7   0.694   [['SPACY:DEP-compound', 'SPACY:POS-PROPN'], ['HYPERNYM:abstraction.n.06']]
  9 Positive   15.6   0.508   [['HYPERNYM:abstraction.n.06', 'SPACY:POS-PROPN']]
 10 Positive    3.8   1.000   [['TEXT:call'], ['SPACY:POS-NUM']]
 11 Negative   37.8   0.690   [['SPACY:POS-PROPN']]
 12 Positive    7.1   0.743   [['SPACY:DEP-pobj', 'SPACY:POS-NUM']]
 13 Negative   18.3   0.548   [['SPACY:NER-DATE']]
 14 Positive   11.8   0.562   [['SPA

## Example 2

- Create a custom attribute - Hypernym3 (using only hypernyms which are at most three levels above the current synset)

In [10]:
HYPERNYM_DICT = dict()

def _get_anvr_pos(penn_tag: str): # Return Literal['a', 'n', 'v', 'r', None]
    if penn_tag.startswith('JJ'): # Adjective
        return 'a'
    elif penn_tag.startswith('NN'): # Noun
        return 'n'
    elif penn_tag.startswith('VB'): # Verb
        return 'v'
    elif penn_tag.startswith('RB'): # Adverb
        return 'r'
    else: # Invalid wordnet type
        return None 
    
def _get_all_hypernyms(synset: nltk.corpus.reader.wordnet.Synset, above: int) -> Set[nltk.corpus.reader.wordnet.Synset]:
    if above == 0:
        return set()
    
    if (str(synset), above) not in HYPERNYM_DICT:
        ans = set()
        direct_hypernyms = synset.hypernyms() # type: List[nltk.corpus.reader.wordnet.Synset]
        for ss in direct_hypernyms:
            ans.update(_get_all_hypernyms(ss, above-1))
            ans.update(set([ss]))
        HYPERNYM_DICT[(str(synset), above)] = ans
    return HYPERNYM_DICT[(str(synset), above)]
    
def _hypernym_extraction_3(text: str, tokens: List[str]) -> List[Set[str]]:
    ans = []
    for t in nlp(text):
        pos = _get_anvr_pos(t.tag_)
        if pos is not None:
            synset = lesk(tokens, t.text, pos)
            if synset is not None:
                all_hypernyms = set([synset]) # This version of hypernym extraction includes synset of the word itself
                all_hypernyms.update(_get_all_hypernyms(synset, 3))
                ans.append(set([str(ss)[8:-2] for ss in all_hypernyms]))
            else:
                ans.append(set([]))
        else:
            ans.append(set([]))
    return ans

Hypernym3Attribute = CustomAttribute(name = 'HYPERNYM3', extraction_function = _hypernym_extraction_3)

In [11]:
grasp_model_2 = GrASP(gaps_allowed = 2, num_patterns = 100, include_standard = ['TEXT', 'POS', 'SENTIMENT'],
                    include_custom = [Hypernym3Attribute],
                    correlation_threshold = 0.5, alphabet_size = 100)

In [12]:
the_patterns_2 = grasp_model_2.fit_transform(positive, negative)

  0%|          | 1/488 [00:00<01:12,  6.73it/s]

Step 1: Create augmented texts


100%|██████████| 488/488 [00:27<00:00, 17.65it/s]
100%|██████████| 3079/3079 [02:28<00:00, 20.74it/s]


Step 2: Find frequent attributes


  0%|          | 5/1215 [00:00<00:26, 45.17it/s]

Total number of candidate alphabet = 1215, such as ['SPACY:POS-VERB', 'SPACY:POS-NOUN', 'SPACY:POS-PUNCT', 'SPACY:POS-PRON', 'SPACY:POS-ADV']
Step 3: Find alphabet set


100%|██████████| 1215/1215 [00:50<00:00, 24.17it/s]


Finding top k: 10 / 100
Finding top k: 20 / 100
Finding top k: 30 / 100
Finding top k: 40 / 100
Finding top k: 50 / 100
Finding top k: 60 / 100
Finding top k: 70 / 100
Finding top k: 80 / 100
Finding top k: 90 / 100
Finding top k: 100 / 100


  0%|          | 0/100 [00:00<?, ?it/s]

Total number of alphabet = 100
['SPACY:POS-NUM', 'SPACY:POS-PROPN', 'TEXT:call', 'TEXT:i', 'SPACY:POS-SYM', 'TEXT:free', 'TEXT:txt', 'TEXT:claim', 'TEXT:!', 'TEXT:mobile', 'HYPERNYM3:cost.n.01', 'SPACY:POS-ADP', 'HYPERNYM3:message.n.02', 'TEXT:to', 'TEXT:prize', 'HYPERNYM3:communication.n.02', 'HYPERNYM3:win.v.01', 'SENTIMENT:pos', 'HYPERNYM3:symbol.n.01', 'HYPERNYM3:statement.n.01', 'TEXT:your', 'SPACY:POS-ADJ', 'TEXT:or', 'TEXT:text', 'TEXT:stop', 'TEXT:-', 'TEXT:150p', 'TEXT:guaranteed', 'TEXT:urgent', 'HYPERNYM3:textbook.n.01', 'HYPERNYM3:act.n.02', 'TEXT:win', 'HYPERNYM3:contest.v.01', 'TEXT:+', 'HYPERNYM3:written_communication.n.01', 'SPACY:POS-PRON', 'TEXT:16', 'TEXT:cash', 'HYPERNYM3:abstraction.n.06', 'TEXT:from', 'HYPERNYM3:assertion.n.01', 'TEXT:reply', 'TEXT:now', 'TEXT:.', 'TEXT:tone', 'TEXT:18', 'HYPERNYM3:acquisition.n.02', 'SPACY:POS-DET', 'TEXT:nokia', 'TEXT:a', 'HYPERNYM3:user.n.01', 'TEXT:our', 'HYPERNYM3:mobile.n.02', 'HYPERNYM3:minute.n.01', 'HYPERNYM3:person.n.01'

100%|██████████| 100/100 [01:39<00:00,  1.00it/s]


Length 2 / 5; New candidates = 14950
Finding top k: 10 / 100
Finding top k: 20 / 100
Finding top k: 30 / 100
Finding top k: 40 / 100
Finding top k: 50 / 100
Finding top k: 60 / 100
Finding top k: 70 / 100
Finding top k: 80 / 100
Finding top k: 90 / 100
Finding top k: 100 / 100
Example of current patterns
Pattern: [['SPACY:POS-NUM']]
Window size: 3
Class: Negative
Precision: 0.526
Match: 936 (26.2%)
Gain = 0.223
[5m[7m[32mExamples[0m ~ Class Negative:
[5m[32m[MATCH][0m: U studying in sch or going home ? Anyway i 'll b going [7m[36m2:['SPACY:POS-NUM'][0m sch later . 
-------------------------
[5m[32m[MATCH][0m: Wat so late still early mah . Or we juz go [7m[36m4:['SPACY:POS-NUM'][0m dinner lor . Aiya i dunno ... 
-------------------------
[5m[7m[31mCounterexamples[0m ~ Not class Negative:
[5m[32m[MATCH][0m: Latest Nokia Mobile or iPOD MP3 Player + Â£400 proze GUARANTEED ! Reply with : WIN to [7m[36m83355:['SPACY:POS-NUM'][0m now ! Norcorp Ltd . Â£1,50/Mtmsgrcvd1

100%|██████████| 68/68 [02:08<00:00,  1.89s/it]


Length 3 / 5; New candidates = 13344
Finding top k: 10 / 100
Finding top k: 20 / 100
Finding top k: 30 / 100
Finding top k: 40 / 100
Finding top k: 50 / 100
Finding top k: 60 / 100
Finding top k: 70 / 100
Finding top k: 80 / 100
Finding top k: 90 / 100
Finding top k: 100 / 100
Example of current patterns
Pattern: [['SPACY:POS-NUM']]
Window size: 3
Class: Negative
Precision: 0.526
Match: 936 (26.2%)
Gain = 0.223
[5m[7m[32mExamples[0m ~ Class Negative:
[5m[32m[MATCH][0m: Hi mate its RV did u hav a nice hol just a message [7m[36m3:['SPACY:POS-NUM'][0m say hello coz havenÂ’t sent u 1 in ages started driving so stay off roads!RVx 
-------------------------
[5m[32m[MATCH][0m: I will reach before [7m[36mten:['SPACY:POS-NUM'][0m morning 
-------------------------
[5m[7m[31mCounterexamples[0m ~ Not class Negative:
[5m[32m[MATCH][0m: Babe : U want me do nt [7m[36mu:['SPACY:POS-NUM'][0m baby ! I m nasty and have a thing 4 filthyguys . Fancy a rude time with a sexy bitch 

100%|██████████| 36/36 [01:22<00:00,  2.28s/it]


Length 4 / 5; New candidates = 7137
Finding top k: 10 / 100
Finding top k: 20 / 100
Finding top k: 30 / 100
Finding top k: 40 / 100
Finding top k: 50 / 100
Finding top k: 60 / 100
Finding top k: 70 / 100
Finding top k: 80 / 100
Finding top k: 90 / 100


  0%|          | 0/5 [00:00<?, ?it/s]

Finding top k: 100 / 100
Example of current patterns
Pattern: [['SPACY:POS-NUM']]
Window size: 3
Class: Negative
Precision: 0.526
Match: 936 (26.2%)
Gain = 0.223
[5m[7m[32mExamples[0m ~ Class Negative:
[5m[32m[MATCH][0m: Yeah so basically any time next week you can get away from your mom & amp ; get up before [7m[36m3:['SPACY:POS-NUM'][0m 
-------------------------
[5m[32m[MATCH][0m: Solve d Case : A Man Was Found Murdered On   & lt;DECIMAL&gt ; . & lt;#&gt ;   AfterNoon . [7m[36m1,His:['SPACY:POS-NUM'][0m wife called Police . 2,Police questioned everyone . 3,Wife : Sir , I was sleeping , when the murder took place . 4.Cook : I was cooking . 5.Gardener : I was picking vegetables . 6.House - Maid : I went 2 d post office . 7.Children : We went 2 play . 8.Neighbour : We went 2 a marriage . Police arrested d murderer Immediately . Who 's It ? Reply With Reason , If U r Brilliant . 
-------------------------
[5m[7m[31mCounterexamples[0m ~ Not class Negative:
[5m[32m[M

100%|██████████| 5/5 [00:09<00:00,  1.83s/it]


Length 5 / 5; New candidates = 994
Finding top k: 10 / 100
Finding top k: 20 / 100
Finding top k: 30 / 100
Finding top k: 40 / 100
Finding top k: 50 / 100
Finding top k: 60 / 100
Finding top k: 70 / 100
Finding top k: 80 / 100
Finding top k: 90 / 100
Finding top k: 100 / 100
Example of current patterns
Pattern: [['SPACY:POS-NUM']]
Window size: 3
Class: Negative
Precision: 0.526
Match: 936 (26.2%)
Gain = 0.223
[5m[7m[32mExamples[0m ~ Class Negative:
[5m[32m[MATCH][0m: â€ ¦ and donâ€˜t worry weâ€˜ll have finished by march â€ [7m[36m¦:['SPACY:POS-NUM'][0m ish ! 
-------------------------
[5m[32m[MATCH][0m: OH YEAH , AND HAV A GREAT TIME IN NEWQUAY - SEND ME A POSTCARD ! [7m[36m1:['SPACY:POS-NUM'][0m LOOK AFTER ALL THE GIRLS WHILE IM GONE(U KNOW THE 1IM TALKIN BOUT!)xx 
-------------------------
[5m[7m[31mCounterexamples[0m ~ Not class Negative:
[5m[32m[MATCH][0m: Will u meet ur dream partner soon ? Is ur career off [7m[36m2:['SPACY:POS-NUM'][0m a flyng start ? 2 

In [13]:
for idx, p in enumerate(the_patterns_2):
    print(f'Rank {idx}')
    print(p)

Rank 0
Pattern: [['SPACY:POS-NUM']]
Window size: 3
Class: Negative
Precision: 0.526
Match: 936 (26.2%)
Gain = 0.223
[5m[7m[32mExamples[0m ~ Class Negative:
[5m[32m[MATCH][0m: HIYA STU WOT U UP [7m[36m2.IM:['SPACY:POS-NUM'][0m IN SO MUCH TRUBLE AT HOME AT MOMENT EVONE HATES ME EVEN U ! WOT THE HELL AV I DONE NOW ? Y WONT U JUST TELL ME TEXT BCK PLEASE LUV DAN 
-------------------------
[5m[32m[MATCH][0m: Dnt worry ... use ice pieces in a cloth pack.also take [7m[36m2:['SPACY:POS-NUM'][0m tablets . 
-------------------------
[5m[7m[31mCounterexamples[0m ~ Not class Negative:
[5m[32m[MATCH][0m: from www . Applausestore.com MonthlySubscription@50p / msg max6/month T&CsC web [7m[36mage16:['SPACY:POS-NUM'][0m 2stop txt stop 
-------------------------
[5m[32m[MATCH][0m: SMS AUCTION You have won a Nokia [7m[36m7250i:['SPACY:POS-NUM'][0m . This is what you get when you win our FREE auction . To take part send Nokia to 86021 now . HG / Suite342/2Lands Row / W1JHL 

Rank 45
Pattern: [['TEXT:.'], ['SPACY:POS-PROPN'], ['SPACY:POS-PROPN']]
Window size: 5
Class: Positive
Precision: 0.784
Match: 111 (3.1%)
Gain = 0.051
[5m[7m[32mExamples[0m ~ Class Positive:
[5m[32m[MATCH][0m: You have been specially selected to receive a " 3000 award ! Call 08712402050 BEFORE the lines close . Cost 10ppm . 16 + . T&Cs apply [7m[36m.:['TEXT:.'][0m [7m[36mAG:['SPACY:POS-PROPN'][0m [7m[36mPromo:['SPACY:POS-PROPN'][0m 
-------------------------
[5m[32m[MATCH][0m: Panasonic & BluetoothHdset FREE [7m[36m.:['TEXT:.'][0m [7m[36mNokia:['SPACY:POS-PROPN'][0m [7m[36mFREE:['SPACY:POS-PROPN'][0m . Motorola FREE & DoubleMins & DoubleTxt on Orange contract . Call MobileUpd8 on 08000839402 or call 2optout 
-------------------------
[5m[7m[31mCounterexamples[0m ~ Not class Positive:
[5m[32m[MATCH][0m: Evry Emotion dsn't hav Words [7m[36m.:['TEXT:.'][0m [7m[36mEvry:['SPACY:POS-PROPN'][0m [7m[36mWish:['SPACY:POS-PROPN'][0m dsn't hav Prayrs .. 

Rank 71
Pattern: [['SENTIMENT:pos']]
Window size: 3
Class: Negative
Precision: 0.761
Match: 1425 (39.9%)
Gain = 0.042
[5m[7m[32mExamples[0m ~ Class Negative:
[5m[32m[MATCH][0m: Thinkin about someone is all [7m[36mgood:['SENTIMENT:pos'][0m . No drugs for that 
-------------------------
[5m[32m[MATCH][0m: Pls send me a [7m[36mcomprehensive:['SENTIMENT:pos'][0m mail about who i 'm paying , when and how much . 
-------------------------
[5m[7m[31mCounterexamples[0m ~ Not class Negative:
[5m[32m[MATCH][0m: FREE2DAY [7m[36msexy:['SENTIMENT:pos'][0m St George 's Day pic of Jordan!Txt PIC to 89080 do nt miss out , then every wk a saucy celeb!4 more pics c PocketBabe.co.uk 0870241182716 Â£3/wk 
-------------------------
[5m[32m[MATCH][0m: This is the 2nd time we have tried to contact u. U have [7m[36mwon:['SENTIMENT:pos'][0m the Â£1450 prize to claim just call 09053750005 b4 310303 . T&Cs / stop SMS 08718725756 . 140ppm 
-------------------------
Rank 72
Pattern:

- Sort the patterns by their information gain

In [14]:
for idx, p in enumerate(the_patterns_2):
    print(f'{idx:>3} {p.support_class}   {round(p.coverage*100, 1):>4}   {p.precision:.3f}   {p.get_pattern_id()}')

  0 Negative   26.2   0.526   [['SPACY:POS-NUM']]
  1 Positive    8.9   0.806   [['SPACY:POS-PROPN'], ['SPACY:POS-NUM']]
  2 Positive    5.1   0.984   [['TEXT:call'], ['SPACY:POS-NUM']]
  3 Positive   14.5   0.579   [['SPACY:POS-PROPN'], ['SPACY:POS-PROPN']]
  4 Positive   11.8   0.613   [['SPACY:POS-ADP'], ['SPACY:POS-NUM']]
  5 Positive    6.8   0.811   [['SPACY:POS-NUM'], ['SPACY:POS-PROPN']]
  6 Negative   18.1   0.526   [['SPACY:POS-NOUN'], ['SPACY:POS-PROPN']]
  7 Positive    5.4   0.856   [['TEXT:to'], ['SPACY:POS-NUM']]
  8 Negative   37.8   0.690   [['SPACY:POS-PROPN']]
  9 Positive    7.6   0.702   [['SPACY:POS-NOUN'], ['SPACY:POS-NOUN'], ['SPACY:POS-NUM']]
 10 Positive    7.1   0.722   [['SPACY:POS-PROPN'], ['SPACY:POS-PROPN'], ['SPACY:POS-PROPN']]
 11 Positive    5.7   0.797   [['TEXT:.'], ['SPACY:POS-NUM']]
 12 Positive    6.7   0.735   [['SPACY:POS-NUM'], ['SPACY:POS-NUM']]
 13 Negative   20.0   0.585   [['SPACY:POS-PROPN'], ['SPACY:POS-NOUN']]
 14 Positive    8.8   0.597

- Sort the patterns by their precision

In [15]:
for idx, p in enumerate(sorted(the_patterns_2, key = lambda x: -x.precision)):
    print(f'{idx:>3} {p.support_class}   {round(p.coverage*100, 1):>4}   {p.precision:.3f}   {p.get_pattern_id()}')

  0 Positive    2.2   1.000   [['TEXT:claim']]
  1 Positive    1.6   1.000   [['TEXT:prize']]
  2 Positive    1.2   1.000   [['TEXT:call'], ['SPACY:POS-NUM'], ['SPACY:POS-ADP']]
  3 Positive    1.2   1.000   [['SPACY:POS-PROPN'], ['SPACY:POS-NOUN'], ['TEXT:.'], ['SPACY:POS-NUM']]
  4 Positive    1.9   0.985   [['TEXT:.'], ['TEXT:call'], ['SPACY:POS-NUM']]
  5 Positive    5.1   0.984   [['TEXT:call'], ['SPACY:POS-NUM']]
  6 Negative   37.2   0.983   [['TEXT:i']]
  7 Positive    1.6   0.982   [['HYPERNYM3:call.v.28'], ['SPACY:POS-NUM']]
  8 Positive    1.5   0.982   [['SPACY:POS-NOUN'], ['SPACY:POS-NOUN'], ['TEXT:to'], ['SPACY:POS-NUM']]
  9 Positive    2.0   0.957   [['TEXT:call'], ['SPACY:POS-NUM'], ['SPACY:POS-NOUN']]
 10 Positive    2.4   0.942   [['TEXT:mobile']]
 11 Positive    2.7   0.938   [['TEXT:txt']]
 12 Positive    1.7   0.934   [['SPACY:POS-NUM'], ['TEXT:now']]
 13 Positive    3.4   0.926   [['SPACY:POS-PROPN'], ['SPACY:POS-ADP'], ['SPACY:POS-NUM']]
 14 Positive    2.2   0.