In [1]:
# import required packages
import re
import gensim
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize 
from nltk.corpus import wordnet as wn

In [6]:
#using the same corpus as lesson 8.05
model = gensim.models.KeyedVectors.load_word2vec_format('./lexvec.enwiki+newscrawl.300d.W.pos.vectors')

In [2]:
#define list of subjectively masculine words as defined by sociology researchers
masc = ['active', 'adventurous', 'aggress', 'ambitio', 'analy', 'assert', 'athlet', 'autonom',
'boast', 'challeng', 'compet', 'confident', 'courag', 'dominant', 'domina', 'decide', 'decisive', 'decision',
'determin', 'force', 'greedy', 'headstrong', 'hierarch', 'hostil', 'impulsive',
'independen', 'individual', 'intellect', 'lead', 'logic', 'masculine', 'objective', 'opinion', 'outspoken', 
'persist', 'principle', 'reckless', 'stubborn', 'superior', 'self-confiden', 'self-sufficien', 'self-relian']

In [7]:
# define the first function using the gensim word to vec model
def most_similar_subjective(txt):
    rm_punc = re.sub(r'[^\w\s]', '', txt)
    txt_list = rm_punc.split()
    suggestions = []    
    for word in txt_list:
        for fragment in masc:
            if fragment in word:
                temp = (word, [])
                for similar_word in model.most_similar(word):
                    if fragment not in similar_word[0]:
                        temp[1].append(similar_word[0])
                suggestions.append(temp)
    return suggestions

In [4]:
def hyper_hypo_nyms(txt):
    rm_punc = re.sub(r'[^\w\s]', '', txt)
    suggestions = []  
    dictionary = {}
    tokenized = sent_tokenize(rm_punc)
    for words in tokenized:
        word_lst = nltk.word_tokenize(words)
        tagged = nltk.pos_tag(word_lst)
    for item in tagged:
        for fragment in masc:
            if fragment in item[0]:
                temp = (item[0], [])
                dictionary[item[0]] = item[1]
                if dictionary[item[0]].startswith('N'):
                     pos = 'n'
                elif dictionary[item[0]].startswith('J'):
                    pos = 'a'
                elif dictionary[item[0]].startswith('R'):
                     pos = 'r'
                elif dictionary[item[0]].startswith('V'):
                    pos = 'v'
                for sinset in wn.synsets(item[0], pos):
                    for hypernym in wn.synset(sinset.name()).hypernyms():
                        if fragment not in hypernym.name().split('.')[0]: 
                            temp[1].append(hypernym.name().split('.')[0])
                    for hyponym in wn.synset(sinset.name()).hyponyms():
                        if fragment not in hyponym.name().split('.')[0]: 
                            temp[1].append(hyponym.name().split('.')[0])
                suggestions.append(temp)
    return suggestions

In [5]:
hyper_hypo_nyms('''<p><b>POSITION SUMMARY</b></p>, <p>\r\r\nThe Business Analyst role is the primary architect of challenging and dashboard solutions for internal and external clients. Utilizing ESI corporate standard development tools this position is responsible for the design, development, implementation, analysis, interpretation and communication of business information based on the needs of individual clients. The ability to balance overall aesthetics with robust and intuitive functionality is a critical requirement for success in this position.</p>, <p><b>\r\r\nESSENTIAL FUNCTIONS</b></p>, <ul><li>\r\r\nSuccessfully design and implement external client data reporting and dashboard solutions with a strong focus on product aesthetics and functionality.</li><li>\r\r\nAid in the design, development, and implementation of new product ideas for external and internal clients.</li><li>\r\r\nMaintain Live and Data Warehouse Business Objects Universes; add new fields, modify table joins, implement data structures that streamline report extraction and data analysis.</li><li>\r\r\nDevelop and document best practices for all points throughout the design and implementation process.</li><li>\r\r\nCoordinate and interface with Account Management and Implementation teams to gather product design requirements and provide insight into capabilities and solutions.</li><li>\r\r\nResearch and present new software and technology solutions to other internal developers, as well as management, to allow for the evaluation and potential integration of new development tools.</li></ul>, <p><b>QUALIFICATIONS</b></p>, <ul><li><p>\r\r\nBachelor’s degree in related field or 8 to 11 years of experience.</p></li><li>\r\r\n2-5 years relevant experience with Bachelor’s Degree or Master’s degree and 0-3 years of relevant experience.</li><li>\r\r\nRecent experience creating Business Objects XI reports.</li><li>\r\r\nDesigning data visualization applications using SAP Xcelsius 2008 software.</li><li>\r\r\nDesigning, implementing and maintaining data universe structures using Business Objects Universe Designer.</li><li>\r\r\nSQL, AS400, Adobe Flex, Flash experience preferred.</li><li>\r\r\nCreative problem solver.</li><li>\r\r\nFundamental commitment to creating customer value through technical innovation.<br/>\r\r\n</li></ul>, <p>\r\r\nBachelor’s degree in related field or 8 to 11 years of experience.</p>, <p><b>\r\r\nABOUT THE DEPARTMENT</b></p>, <p><b>\r\r\nABOUT EXPRESS SCRIPTS</b></p>, <p>\r\r\nAdvance your career with the company that makes it easier for people to choose better health.</p>, <p>\r\r\nExpress Scripts is a leading healthcare company serving tens of millions of consumers. We are looking for individuals who are passionate, creative and committed to creating systems and service solutions that promote better health outcomes. Join the company that Fortune magazine ranked as one of the "Most Admired Companies" in the pharmacy category. Then, use your intelligence, creativity, integrity and hard work to help us enhance our products and services. We offer a highly competitive base salary and a comprehensive benefits program, including medical, prescription drug, dental, vision, 401(k) with company match, life insurance, paid time off, tuition assistance and an employee stock purchase plan.</p>, <p>\r\r\nExpress Scripts is an equal opportunity employer/disability/veteran</p>''')

[('challenging',
  ['contest',
   'call',
   'call',
   'invite',
   'action',
   'appeal',
   "call_one's_bluff",
   'call_out',
   'defy',
   'impeach',
   'impugn',
   'litigate',
   'provoke',
   'question',
   'remit',
   'request',
   'object',
   'appeal']),
 ('analysis',
  ['investigation',
   'anatomy',
   'case_study',
   'dissection',
   'reasoning',
   'breakdown',
   'dissection',
   'elimination',
   'reductionism',
   'criticism',
   'expressive_style',
   'calculus',
   'psychotherapy']),
 ('individual', []),
 ('analysislili', []),
 ('leading',
  ['beacon',
   'hand',
   'usher',
   'produce',
   'leave',
   'entail',
   'necessitate',
   'precede',
   'draw_away',
   'induce',
   'give',
   'be',
   'come',
   'go_far',
   'radiate',
   'direct',
   'captain',
   'chair',
   'spearhead',
   'take_hold',
   'promote',
   'perform',
   'be',
   'travel',
   'pass',
   'hash_out']),
 ('individuals',
  ['causal_agent',
   'organism',
   'abator',
   'abjurer',
   'abominat

In [11]:
def combine_results(txt):
    similar_results = most_similar_subjective(txt)
    hyper_hypo_results = hyper_hypo_nyms(txt)
    combined = []
    for result in similar_results:
        for h_result in hyper_hypo_results:
            if result[0] == h_result[0]:
                temp = (result[0], [result[1], h_result[1]])
        combined.append(temp)
    return combined

In [14]:
combine_results('challenge, aggressive')

[('challenge',
  [['obstacle',
    'dilemma',
    'conundrum',
    'quandary',
    'threat',
    'competition',
    'showdown',
    'daunting'],
   ['situation',
    'speech_act',
    'call-out',
    'calling_into_question',
    'confrontation',
    'dare',
    'defiance',
    'demand_for_identification',
    'gauntlet',
    'questioning',
    'objection',
    'demand']]),
 ('aggressive',
  [['assertive',
    'confrontational',
    'forceful',
    'proactive',
    'combative',
    'vigorous',
    'agressive',
    'strident'],
   []]),
 ('aggression',
  [['belligerence',
    'provocation',
    'provocations',
    'hostility',
    'violence',
    'repression',
    'assertiveness'],
   ['unfriendliness',
    'hostility',
    'action',
    'meat_grinder',
    'plundering',
    'violence',
    'combat',
    'behavior',
    'aggravation',
    'bitchery',
    'bullying',
    'raising_hell',
    'self-assertion']])]