In [1]:
import json, requests 

def get_sentiment(joke_text):
    # post to sentiment search API
    url = 'http://sentiment.vivekn.com/api/text/'
    response = requests.post(url=url, data=dict(txt = joke_text))
    
    # parse JSON response
    result = json.loads(response.text)['result']
    sentiment, confidence = result['sentiment'], result['confidence']
    
    # convert string result into numerical values
    # s: sentiment sign, 1 for positive, 0 for neutral, -1 for negative
    # I can also change it to output a length 3 vector with 0/1 value for positive, neutral, negative class
    # c: confidence score as a float between 0 and 1
    s = 1 if sentiment == 'Positive' else (-1 if sentiment == 'Negative' else 0)
    c = float(confidence)/100
    return s,c

In [2]:
# test
joke_1 = 'Today a man knocked on my door and asked for a small \
donation towards the local swimming pool. I gave him a glass of water.'
get_sentiment(joke_1)

(0, 0.561001)

In [3]:
joke_2 = 'Two wrongs don\'t make a right, take your parents as an example.'
get_sentiment(joke_2)

(-1, 0.814667)

***

In [None]:
"""
Helper functions to load data and tokenize sentence
"""
PRINT_VERBOSE = True
def cprint(s):
    if PRINT_VERBOSE: print(s)

# load jokes data
def get_jokes_data(n = None, random_seed = None):
    # Data processing
    # comedy central jokes
    cc_jokes = pd.read_csv('all_cc_jokes.csv', sep = ',', index_col = 0, names = ['type', 'link', 'text'])
    cc_jokes_text = cc_jokes['text']
    cprint('Number of jokes from Comedy Central: {}'.format(cc_jokes.shape[0]))
    cprint('There are {} types of jokes on Comendy Central'.format(cc_jokes['type'].nunique()))
    
    # one liner jokes
    one_liner_jokes = pd.read_csv('onelinefun.csv', encoding = "ISO-8859-1", sep = ',')
    one_liner_jokes_text = one_liner_jokes['text']
    cprint('Number of jokes from One Liner Fun: {}'.format(one_liner_jokes.shape[0]))
    
    # combining both jokes
    all_jokes_text = pd.concat([cc_jokes_text, one_liner_jokes_text], axis=0, ignore_index=True)
    cprint('Total number of jokes: {}'.format(all_jokes_text.shape[0]))
    
    # randomly select 1000 jokes
    if n:
        if random_seed:
            np.random.seed(random_seed)
            jokes_data = pd.Series(np.random.choice(all_jokes_text, size = n, replace = False))
    
    return jokes_data