 See links for instructions on installation if not already installed.
  - [NLTK](https://www.nltk.org/install.html) (tested with 3.6.7 and with 3.2.5.)
  - [Scikit-Learn](https://scikit-learn.org/stable/install.html) (test with 1.0.2)
  - [SciPy](https://scipy.org/install/) (tested with 1.7.3 and with 1.4.1)

In [1]:
!pip install gdown
!gdown --id 1thWkUj7uGOApr_dXRvMr9TsEHpo_H_2q -O sst2.zip
!mkdir -p data
!unzip sst2.zip -d data
!rm sst2.zip

 # Building and Extracting features

In [2]:
from collections import Counter
import json
from pathlib import Path

from nltk.tokenize import WordPunctTokenizer

print("Build unigram vocab from sst2.train")
data_dir = Path('sst2/')
tokenizer = WordPunctTokenizer()
counter = Counter()
counter.update(['<pad>', '<unk>'])
data_train = open(data_dir.joinpath('sst2.train')).readlines()
print(f"Size of training data: {len(data_train)}")

token_lines = []

for line in data_train:
    lower_line = line.lower()
    token_lines.append(tokenizer.tokenize(lower_line))
    
for sentence in token_lines:
    sentence = sentence[1:]
    for word in sentence:
        counter[word] = counter.get(word, 0) + 1
        
print(f"Vocab size before frequency filtering: {len(counter)}")
        
vocab = {key: val for key, val in counter.items() if val >= 3 or key == '<pad>' or key == '<unk>'}
vocab = {key: 0 for key in vocab}
vocab.update((k, i) for i, k in enumerate(vocab))

print(f"Vocab size after frequency filtering: {len(vocab)}")
output_filepath = data_dir.joinpath('unigram_vocab.json')
json.dump(vocab, open(output_filepath, mode='w'))


Build unigram vocab from sst2.train
Size of training data: 6920
Vocab size before frequency filtering: 13850
Vocab size after frequency filtering: 4949


In [3]:
# sanity check
assert (vocab['<pad>'] == 0)
assert (vocab['<unk>'] == 1)
assert (len(vocab) == 4949)


 ### Generate features and labels files

In [4]:
# Generate `npz` files of features and of labels
import json
from nltk.tokenize import WordPunctTokenizer
import numpy as np
from scipy import sparse

def extract_features(vocab, data_dir, dataset, data_type, tokenizer, feature_name):
    '''
    Extracts and saves different features based on vocab of the features
    
    Inputs:
        vocab (dict): map from the word type to the index of the word
        data_dir (path): directory of the dataset
        dataset (file): data file
        data_type (string): train, dev, or test identifier
        tokenizer (callable): tokenizer with a tokenize method to return list of tokens
        feature_name (string): name of the feature, such as unigram_binary
        
    Output: no return, saves npz files
    '''
        
    data = open(data_dir.joinpath(dataset)).readlines()
    tokenizer = WordPunctTokenizer()
    
    tokens = []
    for line in data:
        lower_line = line.lower()
        tokens.append(tokenizer.tokenize(lower_line))
        
    label_list = []
    for line in tokens:
        label = int(line[0])
        label_list.append(label)
     
    final_tokens = []
    for line in tokens:
        split_line = line[1:]
        final_tokens.append(split_line)

    label_array = np.asarray(label_list)

    data_dict = {index: {key: 0 for key in vocab.keys()} for index, value in enumerate(final_tokens)}
    for line in final_tokens:
        for word in line:
            if word in vocab.keys():
                data_dict[final_tokens.index(line)][word] = 1
            else:
                data_dict[final_tokens.index(line)]['<unk>'] = 1
    
    data_matrix = np.array([[data_dict[index][key] for key in vocab.keys()] for index, value in enumerate(final_tokens)])
    sparse_matrix = sparse.csr_matrix(data_matrix)
    
    print(("The shape of the " + data_type + " matrix is: "), data_matrix.shape)
    print(("The shape of the " + data_type + " label array is: "), label_array.shape)
    print()
        
    sparse.save_npz('sst2/' + data_type + '_' + feature_name + '_features.npz', sparse_matrix)
    np.savez('sst2/' + data_type + '_labels.npz', label_array)
    

In [5]:
extract_features(vocab, Path('sst2/'), 'sst2.train', 'train', WordPunctTokenizer(), 'unigram_binary')
extract_features(vocab, Path('sst2/'), 'sst2.dev', 'dev', WordPunctTokenizer(), 'unigram_binary')
extract_features(vocab, Path('sst2/'), 'sst2.test', 'test', WordPunctTokenizer(), 'unigram_binary')

The shape of the train matrix is:  (6920, 4949)
The shape of the train label array is:  (6920,)

The shape of the dev matrix is:  (872, 4949)
The shape of the dev label array is:  (872,)

The shape of the test matrix is:  (1821, 4949)
The shape of the test label array is:  (1821,)



 We provide you the helper function below for feature weight analysis (1.1.2 and 1.2.2).

In [6]:
def print_important_weights(weights, words):
    """
    Print important pairs of weights and words.
    # Parameters
    weights : `Iterable`, required.
        Weights from a learned model.
    words : `Iterable`, required.
        Word types of the vocabulary.  
        It must be true that `len(weights) == len(words)`.
    # Returns
        `None`
    """

    def print_pairs(pairs):
        for weight, word in pairs:
            print("{: .4f} | {}".format(weight, word))

    assert len(weights) == len(words)
    pairs = list(zip(weights, words))
    pairs = sorted(pairs, key=lambda x: x[0], reverse=True)
    print("Most positive words:")
    print_pairs(pairs[:10])
    print("\nMost negative words:")
    print_pairs(reversed(pairs[-10:]))

    pairs = list(zip(abs(weights), words))
    pairs = sorted(pairs, key=lambda x: x[0], reverse=False)
    print("\nMost neutral words:")
    print_pairs(pairs[:10])


 # Logistic regression with scikit-learn

In [7]:
train_features = sparse.load_npz('sst2/train_unigram_binary_features.npz')
train_labels = np.load('sst2/train_labels.npz')

dev_features = sparse.load_npz('sst2/dev_unigram_binary_features.npz')
dev_labels = np.load('sst2/dev_labels.npz')

test_features = sparse.load_npz('sst2/test_unigram_binary_features.npz')
test_labels = np.load('sst2/test_labels.npz')

In [8]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score

def fit_and_eval_logistic_regression(data_dir: Path,
                                     train_X, train_Y,
                                     test_X, test_Y,
                                     feature_name: str) -> LogisticRegression:
    '''
    Fits and evaluates the logistic regression model using the scikit-learn library
    
    Inputs:
        data_dir (path): the data directory
        trn_data (file): training data 
        tst_data (file): testing or dev data
        
    Output:
        model_trained (LogisticRegression): object of LogisticRegression after it is trained
    '''
    
    model = LogisticRegression()
    model.fit(train_X, train_Y['arr_0'])
    y_pred = model.predict(test_X)
    
    print("The accuracy score is: ", accuracy_score(test_Y['arr_0'], y_pred))
    print("The f1 score is: ", f1_score(test_Y['arr_0'], y_pred, average = 'weighted'))
    
    return model

### dev model

In [9]:
print("These are the scores for dev")
print()

fit_and_eval_logistic_regression(feature_name = 'unigram_binary',
                                 train_X = train_features, train_Y = train_labels,
                                 test_X = dev_features, test_Y = dev_labels,
                                 data_dir = Path('sst2/'))

These are the scores for dev

The accuracy score is:  0.7901376146788991
The f1 score is:  0.790148380887179


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression()

### test model

In [10]:
print("These are the scores for test")
print()

fit_and_eval_logistic_regression(feature_name = 'unigram_binary',
                                 train_X = train_features, train_Y = train_labels,
                                 test_X = test_features, test_Y = test_labels,
                                 data_dir = Path('sst2/'))

These are the scores for test

The accuracy score is:  0.8039538714991763
The f1 score is:  0.8039266727707524


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression()

 ## 1.1.2 Weights Analysis

In [11]:
model_trained: LogisticRegression = fit_and_eval_logistic_regression(
    feature_name='unigram_binary', train_X = train_features, train_Y = train_labels, test_X = test_features, test_Y = test_labels,data_dir=Path('data'))
weights = model_trained.coef_[0]
vocab = json.load(open(data_dir.joinpath('unigram_vocab.json')))
print_important_weights(weights=weights, words=vocab.keys())


The accuracy score is:  0.8039538714991763
The f1 score is:  0.8039266727707524
Most positive words:
 1.9970 | solid
 1.9597 | powerful
 1.8592 | remarkable
 1.8391 | enjoyable
 1.7902 | refreshing
 1.7346 | fun
 1.6381 | works
 1.6371 | appealing
 1.6182 | hilarious
 1.5746 | treat

Most negative words:
-2.0717 | stupid
-1.9704 | suffers
-1.9543 | mess
-1.8894 | dull
-1.8758 | worst
-1.8209 | unfortunately
-1.7326 | lacking
-1.6592 | flat
-1.6586 | bland
-1.6217 | none

Most neutral words:
 0.0000 | <pad>
 0.0001 | soldiers
 0.0004 | actresses
 0.0005 | flesh
 0.0005 | narrator
 0.0006 | dose
 0.0008 | heal
 0.0010 | borrows
 0.0015 | daring
 0.0015 | subplots


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


 ## Error Analysis

In [12]:
def test_errors(data_dir: Path, train_X, train_Y, test_X, test_Y, feature_name: str) -> LogisticRegression:
    '''
    Returns predicted label output for given training and testing data
    '''
    
    model = LogisticRegression()
    model.fit(train_X, train_Y['arr_0'])
    y_pred = model.predict(test_X)
    
    return y_pred


In [13]:
test_model = test_errors(feature_name = 'unigram_binary',
                         train_X = train_features, train_Y = train_labels,
                         test_X = test_features, test_Y = test_labels,
                         data_dir = Path('sst2/'))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [14]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

def error_frame(pred_model, labels):
    '''
    Generates a pandas dataframe of mislabeled reviews
    '''
    
    label_column = pd.DataFrame(labels)
    label_column.rename(columns = {0: "True"}, inplace = True)
    model_column = pd.DataFrame(pred_model)
    model_column.rename(columns = {0: "Predicted"}, inplace = True)
    
    data_test = open(data_dir.joinpath('sst2.test')).readlines()
    tokens = []
    for line in data_test:
        lower_line = line.lower()
        tokens.append(tokenizer.tokenize(lower_line))
    
    full_frame = label_column.join(model_column)
    full_frame["Match"] = (full_frame["True"] == full_frame["Predicted"]).astype(int)
    full_frame["Line"] = tokens
    full_frame.index += 1
    
    print("The number of errors found: ", len(full_frame.loc[full_frame['Match'] == 0]))
    return full_frame.loc[full_frame['Match'] == 0]
    

  pd.set_option('display.max_colwidth', -1)


In [15]:
error_frame(test_model, test_labels['arr_0'])

The number of errors found:  357


Unnamed: 0,True,Predicted,Match,Line
7,0,1,0,"[0, acting, ,, particularly, by, tambor, ,, almost, makes, ``, never, again, '', worthwhile, ,, but, -, lrb, -, writer, \/, director, -, rrb, -, schaeffer, should, follow, his, titular, advice]"
8,1,0,0,"[1, the, movie, exists, for, its, soccer, action, and, its, fine, acting, .]"
17,1,0,0,"[1, if, your, senses, have, n, ', t, been, dulled, by, slasher, films, and, gorefests, ,, if, you, ', re, a, connoisseur, of, psychological, horror, ,, this, is, your, ticket, .]"
20,0,1,0,"[0, those, who, managed, to, avoid, the, deconstructionist, theorizing, of, french, philosopher, jacques, derrida, in, college, can, now, take, an, 85, -, minute, brush, -, up, course, with, the, documentary, derrida, .]"
22,0,1,0,"[0, but, what, saves, lives, on, the, freeway, does, not, necessarily, make, for, persuasive, viewing, .]"
23,1,0,0,"[1, steve, irwin, ', s, method, is, ernest, hemmingway, at, accelerated, speed, and, volume, .]"
29,0,1,0,"[0, the, premise, for, this, kegger, comedy, probably, sounded, brilliant, four, six, -, packs, and, a, pitcher, of, margaritas, in, ,, but, the, film, must, have, been, written, ..., in, the, thrall, of, a, vicious, hangover, .]"
36,1,0,0,"[1, haneke, challenges, us, to, confront, the, reality, of, sexual, aberration, .]"
44,1,0,0,"[1, much, monkeyfun, for, all, .]"
48,1,0,0,"[1, one, scarcely, needs, the, subtitles, to, enjoy, this, colorful, action, farce, .]"


In [16]:
def print_weight(weights, words):
    """
    Prints all word-weight pairs for given data and vocabulary
    """

    def print_pairs(pairs):
        for weight, word in pairs:
            print("{: .4f} | {}".format(weight, word))

    assert len(weights) == len(words)
    pairs = list(zip(weights, words))
    pairs = sorted(pairs, key=lambda x: x[0], reverse=True)
    print_pairs(pairs)


In [17]:
print_weight(weights = weights, words=vocab.keys())

 1.9970 | solid
 1.9597 | powerful
 1.8592 | remarkable
 1.8391 | enjoyable
 1.7902 | refreshing
 1.7346 | fun
 1.6381 | works
 1.6371 | appealing
 1.6182 | hilarious
 1.5746 | treat
 1.5637 | fascinating
 1.5539 | charming
 1.4961 | wonderful
 1.4882 | entertaining
 1.4869 | terrific
 1.4790 | brilliant
 1.4376 | beautifully
 1.4329 | assured
 1.4079 | eyes
 1.4027 | fashioned
 1.3845 | definitely
 1.3805 | follow
 1.3774 | manages
 1.3471 | years
 1.3393 | unexpected
 1.3380 | always
 1.3377 | summer
 1.3295 | heart
 1.3088 | smarter
 1.2976 | best
 1.2939 | delivers
 1.2893 | imax
 1.2803 | damn
 1.2786 | masterpiece
 1.2763 | resist
 1.2675 | deeply
 1.2664 | rare
 1.2628 | provides
 1.2468 | human
 1.2438 | refreshingly
 1.2418 | spirit
 1.2157 | delightful
 1.2128 | genre
 1.2106 | engrossing
 1.2058 | somewhat
 1.2048 | fast
 1.1871 | cinema
 1.1838 | perfectly
 1.1685 | worth
 1.1673 | pleasing
 1.1607 | charmer
 1.1584 | portrait
 1.1514 | funny
 1.1509 | rewarding
 1.1500 | f

 0.3239 | compellingly
 0.3236 | kid
 0.3234 | reel
 0.3233 | endlessly
 0.3232 | whale
 0.3227 | p
 0.3223 | frequent
 0.3217 | goofy
 0.3216 | enjoyably
 0.3215 | bill
 0.3215 | adventure
 0.3214 | takes
 0.3211 | marvelously
 0.3207 | boasting
 0.3207 | thankfully
 0.3206 | read
 0.3206 | crowd
 0.3206 | who
 0.3204 | courage
 0.3201 | enthusiasm
 0.3195 | deeds
 0.3192 | fate
 0.3184 | rose
 0.3183 | bible
 0.3183 | mixture
 0.3180 | war
 0.3177 | derrida
 0.3167 | shows
 0.3160 | primarily
 0.3159 | discover
 0.3155 | convey
 0.3152 | bravery
 0.3152 | comedic
 0.3142 | chouraqui
 0.3133 | wholesome
 0.3132 | slapstick
 0.3131 | encounter
 0.3131 | stone
 0.3127 | distinct
 0.3125 | era
 0.3123 | award
 0.3123 | duration
 0.3120 | schrader
 0.3116 | interested
 0.3115 | gradually
 0.3111 | those
 0.3111 | brutal
 0.3107 | knack
 0.3104 | sun
 0.3104 | classics
 0.3103 | mental
 0.3096 | strangeness
 0.3091 | screen
 0.3091 | success
 0.3090 | edges
 0.3082 | metropolis
 0.3076 | h

 0.1798 | punch
 0.1797 | breathtakingly
 0.1793 | finest
 0.1789 | environment
 0.1782 | memories
 0.1781 | nijinsky
 0.1781 | struck
 0.1780 | nutty
 0.1776 | schumacher
 0.1775 | offer
 0.1768 | else
 0.1766 | fine
 0.1757 | heady
 0.1754 | drunk
 0.1746 | tasteful
 0.1745 | borders
 0.1741 | create
 0.1741 | gere
 0.1731 | entertained
 0.1730 | stitch
 0.1729 | makers
 0.1726 | news
 0.1725 | rifkin
 0.1724 | blockbusters
 0.1722 | formed
 0.1721 | story
 0.1719 | hashiguchi
 0.1716 | fight
 0.1710 | comedy
 0.1710 | irwin
 0.1710 | accident
 0.1705 | helps
 0.1701 | sensitive
 0.1701 | struggle
 0.1700 | swimming
 0.1699 | angle
 0.1691 | cheese
 0.1689 | surprise
 0.1689 | scare
 0.1689 | adolescent
 0.1683 | amidst
 0.1683 | intensely
 0.1681 | don
 0.1680 | finally
 0.1675 | moody
 0.1671 | production
 0.1671 | director
 0.1670 | series
 0.1668 | sequels
 0.1666 | x
 0.1665 | rise
 0.1660 | play
 0.1655 | awake
 0.1655 | disappointed
 0.1651 | incessant
 0.1648 | lit
 0.1646 | 

 0.0578 | peak
 0.0575 | screens
 0.0574 | grim
 0.0574 | construct
 0.0574 | sort
 0.0573 | stately
 0.0572 | held
 0.0565 | wind
 0.0564 | issue
 0.0562 | devices
 0.0558 | &
 0.0556 | murphy
 0.0552 | signals
 0.0550 | these
 0.0550 | .
 0.0547 | break
 0.0546 | cuban
 0.0539 | israel
 0.0531 | credibility
 0.0529 | function
 0.0529 | youthful
 0.0528 | recognize
 0.0524 | noise
 0.0524 | general
 0.0523 | original
 0.0522 | cover
 0.0519 | practice
 0.0517 | realized
 0.0516 | dramas
 0.0512 | innovative
 0.0511 | s
 0.0510 | complexities
 0.0508 | morality
 0.0508 | arty
 0.0507 | auteur
 0.0506 | wrote
 0.0505 | york
 0.0504 | truly
 0.0499 | pushed
 0.0495 | drink
 0.0493 | reasonably
 0.0491 | jewish
 0.0490 | hollywood
 0.0490 | hoffman
 0.0489 | awareness
 0.0487 | than
 0.0486 | celebrity
 0.0478 | projects
 0.0477 | sting
 0.0475 | pleasurable
 0.0475 | ourselves
 0.0474 | bump
 0.0474 | rollicking
 0.0472 | notable
 0.0469 | mothman
 0.0468 | jones
 0.0467 | extravaganza
 

-0.1854 | gender
-0.1856 | problematic
-0.1856 | bluster
-0.1861 | coppola
-0.1866 | far
-0.1866 | strongly
-0.1873 | spins
-0.1880 | heights
-0.1888 | incongruous
-0.1888 | attention
-0.1889 | arc
-0.1895 | piccoli
-0.1899 | used
-0.1899 | attal
-0.1899 | earn
-0.1902 | away
-0.1906 | characterizations
-0.1906 | brisk
-0.1910 | out
-0.1912 | better
-0.1918 | re
-0.1918 | teeth
-0.1926 | dimension
-0.1927 | funnier
-0.1928 | interesting
-0.1933 | area
-0.1935 | gun
-0.1938 | inescapable
-0.1951 | machine
-0.1955 | starring
-0.1956 | male
-0.1957 | walking
-0.1958 | familiarity
-0.1962 | con
-0.1966 | wholly
-0.1967 | blow
-0.1969 | leather
-0.1981 | audacity
-0.1996 | amount
-0.2004 | ararat
-0.2004 | tapping
-0.2010 | sham
-0.2010 | exist
-0.2011 | gravity
-0.2012 | creation
-0.2020 | melodramatic
-0.2021 | album
-0.2024 | tosses
-0.2031 | schmaltz
-0.2033 | loss
-0.2034 | amid
-0.2035 | shafer
-0.2035 | adams
-0.2037 | stick
-0.2041 | gay
-0.2042 | playing
-0.2046 | simultaneously
-0

-0.3847 | sentence
-0.3850 | hawke
-0.3858 | brio
-0.3858 | feminist
-0.3882 | throwing
-0.3885 | spark
-0.3885 | underlying
-0.3891 | cia
-0.3893 | belt
-0.3899 | suspend
-0.3901 | mired
-0.3906 | exactly
-0.3918 | sitcom
-0.3919 | costner
-0.3919 | psyche
-0.3921 | malaise
-0.3924 | pryor
-0.3925 | replaced
-0.3927 | continuity
-0.3933 | pileup
-0.3935 | redeeming
-0.3936 | pieces
-0.3938 | sweetest
-0.3939 | deserving
-0.3940 | sequel
-0.3941 | awkwardly
-0.3944 | sit
-0.3950 | nonsensical
-0.3951 | lead
-0.3953 | cruel
-0.3954 | julie
-0.3954 | writers
-0.3956 | clunky
-0.3958 | stinker
-0.3965 | credits
-0.3966 | developed
-0.3968 | low
-0.3970 | desperate
-0.3971 | scary
-0.3972 | susan
-0.3973 | characterization
-0.3974 | 2002
-0.3974 | toback
-0.3976 | del
-0.3976 | traveler
-0.3976 | street
-0.3977 | relying
-0.3978 | boat
-0.3981 | gaghan
-0.3982 | particular
-0.3986 | normally
-0.3991 | misguided
-0.3991 | fiction
-0.3994 | players
-0.3995 | deniro
-0.4000 | before
-0.4002 |

-0.9516 | thin
-0.9517 | studio
-0.9518 | evidence
-0.9599 | aims
-0.9602 | fool
-0.9632 | reason
-0.9675 | joke
-0.9677 | dialogue
-0.9694 | pity
-0.9717 | weak
-0.9722 | reality
-0.9728 | sadly
-0.9749 | charmless
-0.9750 | only
-0.9774 | comes
-0.9785 | bug
-0.9814 | vulgar
-0.9824 | somewhere
-0.9826 | showgirls
-0.9830 | title
-0.9854 | preposterous
-0.9866 | ends
-0.9870 | formulaic
-0.9875 | slap
-0.9880 | plotting
-0.9932 | settles
-0.9938 | reputation
-0.9972 | college
-1.0010 | slip
-1.0013 | apparent
-1.0030 | ugly
-1.0122 | distasteful
-1.0127 | less
-1.0296 | taken
-1.0305 | nothing
-1.0318 | violent
-1.0333 | pedigree
-1.0337 | grating
-1.0343 | wrong
-1.0396 | tedious
-1.0462 | becomes
-1.0476 | money
-1.0496 | poorly
-1.0506 | writing
-1.0584 | figure
-1.0593 | inconsequential
-1.0619 | fluffy
-1.0705 | follows
-1.0725 | episode
-1.0799 | terribly
-1.0856 | house
-1.0898 | disappointment
-1.0966 | boring
-1.0967 | disappointing
-1.0967 | cold
-1.1041 | rises
-1.1041 | ?