In [None]:
!pip3 install --user tensorflow-gpu
!pip install --user spacy

#### Importing Dependencies

In [77]:
from sklearn import linear_model, model_selection
import pandas as pd
import numpy as np
import spacy
import io

#### Preprocessing

In [42]:
nlp = spacy.load('en')

In [71]:
words2vecs = lambda tokens, n: np.array([nlp(token).vector for token in tokens[:n]]) 

In [39]:
vader_lexicon = pd.read_csv(io.StringIO(open('vader_lexicon.csv', encoding='cp437').read()), names=['token', 'norm', 'score', 'vector'])

In [51]:
tokens = vader_lexicon['token'].values

In [119]:
N = 1000

In [120]:
X = vader_lexicon['token'].values[:N]
y = vader_lexicon['score'].values[:N]

In [121]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y)

#### Modeling

In [122]:
from sklearn import linear_model, model_selection
import spacy

class SentimentInterpolator:
    def __init__(self, model):
        self.model = model()
        self.nlp = spacy.load('en')    
    
    def preprocess(self, tokens):
         return np.array([self.nlp(token).vector for token in tokens]) 
    
    def fit(self, X, y):
        self.model.fit(self.preprocess(X), y)
        return self
        
    def predict(self, X):
        return self.model.predict(self.preprocess(X))

In [123]:
interpolator = SentimentInterpolator(linear_model.SGDRegressor)

In [124]:
interpolator.fit(X_train, y_train)



<__main__.SentimentInterpolator at 0x7ff0e34038d0>

In [126]:
interpolator.predict(['bitcoin is great', 'the S&P 500 is probably amazing', 'index funds are correlating the stock market'])

array([0.48436716, 0.16634598, 0.37125068])