# 072520 Cumber tests

In [85]:
import cv2
import numpy as np
import requests

from io import BytesIO
from PIL import Image
import matplotlib.pyplot as plt

In [148]:
def _cumberify(f):
    img = cv2.imdecode(np.frombuffer(f.read(), np.uint8), 1)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # convert to hsv colorspace because we get better accuracy?
    lower_green = np.array([25,50,50])
    upper_green = np.array([80,255,255]) # took too damn long to find these values
    mask = cv2.inRange(hsv, lower_green, upper_green) # create mask for all greens and yellows
    mask = mask/255
    mask = mask.astype(np.bool)
    
    cumbered = np.argwhere(mask) # get idxs of green pixels
    
    # get rectangle coords
    start = (cumbered[0][1], cumbered[0][0])
    end = (cumbered[round(len(cumbered)*0.5)][1], cumbered[round(len(cumbered)*0.5)][0])
    width = end[0] - start[0]
    length = end[1] - start[1]
    
    # draw a rectangle around part of the cucumber (20% looks too small in most cases)
    cv2.rectangle(img, start, end, (0,0,0), -1)
    
    # get censored coords
    C_OFF = 0.8
    censored_start = (start[0] + int((width-width*C_OFF)/2), start[1] + int((length-length*C_OFF)/2))
    
    # put censored image on
    img_PIL = Image.fromarray(img)
    img_PIL.show()
    censored = Image.open('../images/censored.png')
    img_PIL.paste(censored.resize((int(width*C_OFF),int(length*C_OFF))), censored_start)
#     img_PIL.show()
    
#     img = cv2.addWeighted(img, 0.4, censored, 0.1, 0)

#     _, buffer = cv2.imencode(".jpg", img)
    
    return img

In [149]:
f = open('test2.jpeg', 'rb')
modified_cumber = _cumberify(f)

img = Image.fromarray(modified_cumber)
# img.show()

In [135]:
(1,2)*2

(1, 2, 1, 2)

# 072620 Verbosify Tests

In [384]:
import random
import re

from nltk import pos_tag
from nltk.corpus import wordnet

In [385]:
input_sentence = 'you think we will achieve this win, brothers?'
new_sentence = ''

# go through every word in sentence
for word in re.findall(r"\w+|[^\w\s]", input_sentence):
    # punctuation
    if re.match(r"[^\w\s]", word):
        new_sentence += word
        continue
    
    # look for synonym until we find unique one
    while True:
        synsets = wordnet.synsets(word)

        # no synonyms
        if not synsets:
            new_sentence += ' ' + word
            break
            
        # choose random synonym for random synset
        synonym = random.choice(random.choice(synsets).lemmas()).name()
        if synonym.lower() not in word.lower():
            new_sentence += ' ' + synonym
            break


print('original:', input_sentence)
print('verbosified:', new_sentence[1:])

original: you think we will achieve this win, brothers?
verbosified: you conceive we volition accomplish this winnings, buddy?


## Parts of Speech exploration

In [102]:
# https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
# or run: `nltk.help.upenn_tagset()`
all_tags = ['CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NN', 'NNS', 'NNP', 'NNPS',
            'PDT', 'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM', 'TO', 'UH',
            'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB']

# https://stackoverflow.com/questions/15586721/wordnet-lemmatization-and-pos-tagging-in-python
# https://linguistics.stackexchange.com/questions/6508/which-part-of-speech-are-s-and-r-in-wordnet
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'): return 'as'
    elif treebank_tag.startswith('V'): return 'v'
    elif treebank_tag.startswith('N'): return 'n'
    elif treebank_tag.startswith('R'): return 'r'
    else: return ''

In [4]:
def print_syns(word):
    if not wordnet.synsets(word):
        print('no synonyms found')
        return

    for syn in wordnet.synsets(word):
        print(syn.name() + ':')
        for lemma in syn.lemmas():
            print(lemma)

        print()

In [5]:
tokenized_sentence = re.findall(r"\w+|[^\w\s]", 'hop in the voice channel and find out for yourself')
print('parts of speech:', nltk.pos_tag(tokenized_sentence))

print()

tokenized_sentence = ['hop', 'in', 'the', 'voice', 'channel', 'and', 'find_out', 'for', 'yourself', '!']
print('parts of speech:', nltk.pos_tag(tokenized_sentence))

parts of speech: [('hop', 'NN'), ('in', 'IN'), ('the', 'DT'), ('voice', 'NN'), ('channel', 'NN'), ('and', 'CC'), ('find', 'VB'), ('out', 'RP'), ('for', 'IN'), ('yourself', 'PRP')]

parts of speech: [('hop', 'NN'), ('in', 'IN'), ('the', 'DT'), ('voice', 'NN'), ('channel', 'NN'), ('and', 'CC'), ('find_out', 'NN'), ('for', 'IN'), ('yourself', 'NN'), ('!', '.')]


In [86]:
wordnet.synset('beautiful.a.01').lemmas()[0].antonyms()

[Lemma('ugly.a.01.ugly')]

## Test parts of speech

In [386]:
whitelist = {'a/DT': ['an', 'the'],
             'an/DT': ['a', 'the'],
             'the/DT': ['a', 'an'],
             'I/PRP': ['ur boy', 'me, myself and I', 'yours truly'],
             'me/PRP': 'I/PRP',
             'you/PRP': ['thou', 'thoust'],
             'will/MD': ['shall', 'shalt']}


def verbosify2(input_sentence, num_tries = 30):
    word_list = []

    # go through every word    
    for word, pos in pos_tag(re.findall(r"\w+|[^\w\s]", input_sentence)):
        # punctuation, whitelist, or normal word
        if re.match(r"[^\w\s]", word): word_list.append(word)
        elif word+'/'+pos in whitelist: word_list.append(get_whitelist_synonym(word, pos))
        else: word_list.append(get_synonym(word, get_wordnet_pos(pos)))

    return join_sentence(word_list)


def get_synonym(word, pos):
    synsets = wordnet.synsets(word)
    synonyms = []

    # loop through all synsets
    for synset in synsets:
        # don't check synset if wrong part of speech
        if synset.name().split('.')[1] not in pos: continue

        # loop through each synonym
        for synonym in synset.lemmas():
            synonym = synonym.name()
            if synonym != word and synonym not in synonyms: synonyms.append(synonym)
    
    # no unique synonyms?
    if not synsets or not synonyms: return word
    # otherwise, choose random synonym
    return random.choice(synonyms)

def get_whitelist_synonym(word, pos):
    synonyms = whitelist[word+'/'+pos]
    if isinstance(synonyms, list): return random.choice(synonyms + [word])
    else: return random.choice(whitelist[synonyms] + [word]) # reference to another entry
    

def join_sentence(word_list):
    new_sentence = ''
    
    for word in word_list:
        if re.match(r"[^\w\s]", word): new_sentence += word
        else: new_sentence += ' ' + word.replace('_', ' ')
            
    return new_sentence[1:]

In [390]:
sim = 20

input_sentence = 'I would like a hamburger cooked just for me'
for i in range(sim):
    print(verbosify2(input_sentence))

me, myself and I would care the ground beef misrepresent precisely for yours truly
I would care a burger wangle but for me
ur boy would wish an beefburger ready precisely for me, myself and I
I would care the ground beef falsify but for ur boy
me, myself and I would care a burger fix barely for me, myself and I
me, myself and I would care the ground beef cook hardly for ur boy
yours truly would care a ground beef make scarcely for ur boy
yours truly would care the ground beef fudge exactly for me
yours truly would care an burger misrepresent simply for ur boy
ur boy would wish an ground beef fudge precisely for yours truly
yours truly would care an beefburger falsify simply for me
yours truly would care a beefburger manipulate just now for ur boy
me, myself and I would care a ground beef wangle just now for me
ur boy would care the beefburger fix merely for me
I would wish an beefburger ready hardly for ur boy
I would care an beefburger cook merely for me, myself and I
ur boy would car

In [392]:
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/william/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [380]:
args = 'I would like a hamburger cooked just for me'.split()

In [383]:
' '.join(args)

'I would like a hamburger cooked just for me'

# 080320 Fixing verbosify spacing

In [124]:
import random
import re
from nltk.corpus import wordnet
from nltk import pos_tag

# -- Helper functions for verbosify -- #
WHITELIST = {'a/DT': ['an', 'the'],
                'an/DT': ['a', 'the'],
                'the/DT': ['a', 'an'],
                'I/PRP': ['ur boy', 'me, myself and I', 'yours truly'],
                'me/PRP': 'I/PRP',
                'you/PRP': ['thou', 'thoust'],
                'will/MD': ['shall', 'shalt']}

def _get_synonym(word, pos):
    synsets = wordnet.synsets(word)
    synonyms = []

    # loop through all synsets
    for synset in synsets:
        # don't check synset if wrong part of speech
        if synset.name().split('.')[1] not in pos: continue

        # loop through each synonym
        for synonym in synset.lemmas():
            synonym = synonym.name()
            if synonym != word and synonym not in synonyms: synonyms.append(synonym)
    
    # no unique synonyms?
    if not synsets or not synonyms: return word
    # otherwise, choose random synonym
    return random.choice(synonyms).replace('_', ' ')

def _get_whitelist_synonym(word, pos):
    synonyms = WHITELIST[word+'/'+pos]
    if isinstance(synonyms, list): return random.choice(synonyms + [word])
    else: return random.choice(WHITELIST[synonyms] + [word]) # reference to another entry

def _get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'): return 'as'
    elif treebank_tag.startswith('V'): return 'v'
    elif treebank_tag.startswith('N'): return 'n'
    elif treebank_tag.startswith('R'): return 'r'
    else: return ''


# -- main verbosify function -- #
def _verbosify(input_sentence):
    new_sentence = ''

    # go through every word    
    for word, pos in pos_tag([v for v in re.split('(\W)', input_sentence) if v != '']):
        # punctuation/whitespace, whitelist, or normal word
        if re.match(r'[^\w]', word): new_sentence += word
        elif word.upper() == 'I': new_sentence += _get_whitelist_synonym('I', 'PRP')
        elif word+'/'+pos in WHITELIST: new_sentence += _get_whitelist_synonym(word, pos)
        else: new_sentence += _get_synonym(word, _get_wordnet_pos(pos))

    # return the sentence
    return new_sentence

In [125]:
input_sentence = 'Raze explodes out of Brazil with her big personality and big guns.\nWith her blunt-force-trauma playstyle, she excels at flushing entrenched enemies and clearing tight spaces with a generous dose of "boom".'
input_sentence = "i like cheese"

print(_verbosify(input_sentence))

me, myself and I like Malva sylvestris


In [78]:
for thing in re.split('(\W)', input_sentence):
    if re.match(r"[^\w]", thing): print('[yuh] ', end='')
    
    if thing is '': print('_', end='\n')
    elif thing is ' ': print('--', end='\n')
    elif thing is '\n': print('newline', end='\n')
    else: print(thing, end='\n')

Raze
[yuh] --
explodes
[yuh] --
out
[yuh] --
of
[yuh] --
Brazil
[yuh] --
with
[yuh] --
her
[yuh] --
big
[yuh] --
personality
[yuh] --
and
[yuh] --
big
[yuh] --
guns
[yuh] .
_
[yuh] newline
With
[yuh] --
her
[yuh] --
blunt
[yuh] -
force
[yuh] -
trauma
[yuh] --
playstyle
[yuh] ,
_
[yuh] --
she
[yuh] --
excels
[yuh] --
at
[yuh] --
flushing
[yuh] --
entrenched
[yuh] --
enemies
[yuh] --
and
[yuh] --
clearing
[yuh] --
tight
[yuh] --
spaces
[yuh] --
with
[yuh] --
a
[yuh] --
generous
[yuh] --
dose
[yuh] --
of
[yuh] --
_
[yuh] "
boom
[yuh] "
_
[yuh] .
_


In [31]:
print(re.split('(\W)', 'yo "wassup-hello-f" yo'))

['yo', ' ', '', '"', 'wassup', '-', 'hello', '-', 'f', '"', '', ' ', 'yo']


# 080320 Fix progress

In [18]:
import random
import re
from nltk.corpus import wordnet
from nltk import pos_tag
import time

In [19]:
# -- Helper functions for verbosify -- #
WHITELIST = {'a/DT': ['an', 'the'],
                'an/DT': ['a', 'the'],
                'the/DT': ['a', 'an'],
                'I/PRP': ['ur boy', 'me, myself and I', 'yours truly'],
                'me/PRP': 'I/PRP',
                'you/PRP': ['thou', 'thoust'],
                'will/MD': ['shall', 'shalt']}

def get_synonym(word, pos):
    synsets = wordnet.synsets(word)
    synonyms = []

    # loop through all synsets
    for synset in synsets:
        # don't check synset if wrong part of speech
        if synset.name().split('.')[1] not in pos: continue

        # loop through each synonym
        for synonym in synset.lemmas():
            synonym = synonym.name()
            if synonym != word and synonym not in synonyms: synonyms.append(synonym)
    
    # no unique synonyms?
    if not synsets or not synonyms: return word
    # otherwise, choose random synonym
    return random.choice(synonyms).replace('_', ' ')

def get_whitelist_synonym(word, pos):
    synonyms = WHITELIST[word+'/'+pos]
    if isinstance(synonyms, list): return random.choice(synonyms + [word])
    else: return random.choice(WHITELIST[synonyms] + [word]) # reference to another entry

def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'): return 'as'
    elif treebank_tag.startswith('V'): return 'v'
    elif treebank_tag.startswith('N'): return 'n'
    elif treebank_tag.startswith('R'): return 'r'
    else: return ''


# -- main verbosify function -- #
def verbosify(input_sentence):
    new_sentence = ''

    # go through every word    
    for word, pos in pos_tag([v for v in re.split('(\W)', input_sentence) if v != '']):
        # punctuation/whitespace, the word 'I', whitelist, or normal word
        if re.match(r'[^\w]', word): new_sentence += word
        elif word.upper() == 'I': new_sentence += get_whitelist_synonym('I', 'PRP')
        elif word+'/'+pos in WHITELIST: new_sentence += get_whitelist_synonym(word, pos)
        else: new_sentence += get_synonym(word, get_wordnet_pos(pos))

    # return the sentence
    return new_sentence

To change back:
- verbosify -> verbosify.verbosify
- print -> await ctx.send
- print(EDIT) -> await msg.edit(content=)

In [35]:
def full_verbosify(input_sentence):
    # get spot to break up message
    def get_breakpoint(msg):
        i = 2000
        while i > 0 and msg[i] != ' ': i -= 1
        
        return 2000 if i is 0 else i
        
    def long_output(msg, verbosified):
        print('long ooutput')
        if len(verbosified) <= 2000:
            print(verbosified)
            return
        
        # keep looping until message is finished printing
        bp = get_breakpoint(verbosified)
        print(verbosified[:bp])
        long_output(msg, verbosified[bp+1:])
    
    num_times = 1
    # Detect num_times argument. gotta check for positive and negative numbers
    if input_sentence.split()[0].isdigit() or input_sentence.split()[0][1:].isdigit():
        num_times = int(input_sentence.split()[0])

        # bruh don't try to break it bruh
        if num_times < 0 or num_times > 500:
            print('bruh')
            return

        input_sentence = ' '.join(input_sentence.split()[1:])

    # edge cases
    if num_times is 0:
        print(input_sentence)
        return
    elif num_times is 1:
        print(verbosify(input_sentence))
        return

    # Run verbosify num_times number of times
    to_print = [round(num_times*(i/5)) for i in range(1,5)] # when to print progress
    max_char_count = False

    verbosified = verbosify(input_sentence)
    msg = print('`[1]` ' + verbosified)
    
    for i in range(2, num_times):
        new_verbosified = verbosify(verbosified)
        
        if len(new_verbosified) > 20000: break # would go past 10 messages...
        elif len(new_verbosified) > 1990 and not max_char_count:
            time.sleep(1)
            print('EDIT\t`[...]` ' + verbosified)
            max_char_count = True
        else:
            verbosified = new_verbosified

            if i in to_print and len(verbosified) < 1990:
                time.sleep(1)
                print('EDIT\t`[{}]` {}'.format(i, verbosified))

    # Final output
    time.sleep(1)
    verbosified = verbosify(verbosified) # one last time
    
    if len(verbosified) <= 2000: print('EDIT\t', verbosified)
    else:
        bp = get_breakpoint(verbosified)
        print('EDIT\t', verbosified[:bp]) # do the first one
        long_output(msg, verbosified[bp+1:])

In [36]:
input_sentence = "50 Raze explodes out of Brazil with her big personality and big guns. Raze explodes out of Brazil with her big personality and big guns."
full_verbosify(input_sentence)

`[1]` Raze detonate away of Federative Republic of Brazil with her gravid personality and freehanded hitman. Raze burst away of Brasil with her self-aggrandizing personality and bad triggerman.
EDIT	`[10]` Raze detonate Forth River river river river river river river of Federative plane ramification of proboscis bland of affaire of brazil with her gravid personality and freehanded engage stumble man airstream for hire. Raze wiz notch-upwards by of Federative absolute legal long prison term rule of Federative republic of Brasil nut with her self-embellish personality and invent customs tarnish-wakeless heavy weapon memory shop class.
EDIT	`[20]` Raze detonate Forth River river river river river river river river river river river river river river river river river of Federative woodworking represent after slur-witted motorcar rowlock of luggage compartment bland of affaire of brazil with her gravid personality and freehanded engage stumble homosexual field of battle of laurels dampen f