In [None]:
#buildJSON20151109

from bs4 import BeautifulSoup
import json
import re
import os
from os import listdir
from os.path import isfile, join
import shutil

import pymongo
from pymongo import MongoClient

#connect to MongoDB
client = MongoClient('localhost', 27017)
db = client.dpdb  # databas: dpdb

geweiDict = db.geweiDict201511 # collection (table): geweiDict201511
#geweiDict = db.geweiDictTest


def a_has_id(tag):
    return tag.has_attr('id')

def a_has_href(tag):
    return tag.has_attr('href')


labels = ('US', 'chiefly US', 'Brit', 'chiefly Brit', 
          'slang', 'offensive', 'obscene', 'impolite', 
          'informal', 'formal', 'literary', 'old-fashioned', 'humorous', 'technical', 'disapproving', 'approving',
          'medical', 'law', 'baseball', 'mathematics', 'sports', 'grammar',
          'trademark', 'old-fashioned')

#map IPA image symbols to my ASCII IPA symbols
ipa_to_alpha = {
    '22089': 'Z', #ZH -> Z
    '22212': 'Z', #ZH -> Z
    '22369': 'Z', #ZH -> Z
    '22077': 'z',
    '22200': 'y',
    '22047': 'w',
    '22005': 'v',
    '22218': 'u-Y',
    '22167': 'u-B',
    '22170': 'u-b',
    '22203': 'u-b',
    '22137': 'u-A',
    '22140': 'u-a',
    '22254': 'u-2',
    '22152': 'u-1',
    '22242': 'u-1',
    '22263': 'u-1',
    '22294': 'u-1',
    '22312': 'u-1',
    '22339': 'u-_',
    '22185': 'u-^',
    '22309': 'u-#',
    '22321': 'u-#',
    '22029': 'U', # u -> U
    '22074': 'u',
    '22342': 'U', # u -> U
    '22101': 'th', # TH -> th
    '22206': 'th', # TH -> th
    '22366': 'th', # TH -> th
    '22038': 't',
    '22044': 'S', # SH -> S
    '22209': 'S', # SH -> S
    '22363': 'S', # SH -> S
    '22014': 's',
    '22050': 'r',
    '22092': 'p',
    '22281': 'p',
    '22026': 'o',
    '22068': 'N', # NG -> N
    '22360': 'N', # NG -> N
    '21981': 'n',
    '22080': 'n-', # n -> n-
    '22357': 'n-', # n -> n-
    '22035': 'm',
    '22227': 'm',
    '22023': 'l',
    '22053': 'l-', # l -> l-
    '22354': 'l-', # l -> l-
    '22008': 'k',
    '22071': 'dZ', # JH -> dZ
    '22351': 'dZ', # JH -> dZ
    '22062': 'j',
    '21966': '!', # i -> !
    '22032': 'i',
    '22333': '!', # i -> !
    '22065': 'h',
    '22110': 'h',
    '22086': 'g',
    '22083': 'f',
    '21996': '~', # ER -> ~
    '22266': '~', # ER -> ~
    '22330': '~', # ER -> ~
    '21963': 'e',
    '22059': 'E', # e -> E
    '22269': 'E', # e -> E
    '22345': 'E', # e -> E
    '22131': 'TH', # DH -> TH
    '22116': 'dd', # no change but what 'dd' means?
    '21999': 'd',
    '22104': 'tS', # CH -> tS
    '22348': 'tS', # CH -> tS
    '22119': 'b-a',
    '22128': 'b-a',
    '22011': 'b',    
    '22056': '^', # AH -> ^
    '22327': '^', # AH -> ^
    '21975': '.', # AH -> .
    '22272': '.', # AH -> .
    '22324': '.', # AH -> .  
    '21984': '@', # AE -> @
    '21990': 'A', # a -> A
    '22020': 'a',
    '22257': 'A', # a -> A
    '22336': 'A', # a -> A
    '22041': 'u-~', # ~ -> u-~
    '22002': ',', # ` -> , secondary stress
    '21993': ':', # _ -> :
    '22260': ':', # _ -> :
    '21960': "'", # ^ -> ' primary stress
    '22149': ';',
    '21957': '/',
    '21978': '', # , -> null
    '21969': '#',
    '21972': '#',
    '21987': '#',
    '22017': '#',
    '22095': '#',
    '22098': '#',
    '22107': '#',
    '22113': '#',
    '22122': '#',
    '22125': '#',
    '22134': '#',
    '22143': '#',
    '22146': '#',
    '22155': '#',
    '22158': '#',
    '22161': '#',
    '22164': '#',
    '22173': '#',
    '22176': '#',
    '22179': '#',
    '22182': '#',
    '22188': '#',
    '22191': '#',
    '22194': '#',
    '22197': '#',
    '22215': '#',
    '22221': '#',
    '22224': '#',
    '22230': '#',
    '22233': '#',
    '22236': '#',
    '22239': '#',
    '22245': '#',
    '22248': '#',
    '22251': '#',
    '22275': '#',
    '22278': '#',
    '22284': '#',
    '22287': '#',
    '22291': '#',
    '22297': '#',
    '22300': '#',
    '22303': '#',
    '22306': '#',
    '22315': '#',
    '22318': '#',
    '22372': '#'
}

# words like abstract (noun), abstract (verb, adj) have different pronunciations
audiofile = ["abstract", "abuse", "advocate", "aggregate", "alternate", "appropriate", "approximate", "articulate", 
             "attribute", "close", "compound", "concrete", "conduct", "conflict", "consummate", "contract", "contrast", 
             "decrease", "degenerate", "desert", "digest", "essay", "excuse", "export", "graduate", "imprint", 
             "incarnate", "incense", "increase", "insult", "live", "minute", "mobile", "object", "outright", "overhaul", 
             "overload", "present", "produce", "progress", "project", "protest", "recall", "record", "research", 
             "separate", "subject", "supplement", "survey", "tear", "transfer", "wind"]

#mypath = os.getcwd() + "\\rawfile"
mypath = "E:\\dictrawfile-39195"
print mypath
onlyfiles = [ f for f in os.listdir(mypath) if isfile(join(mypath,f)) ]

cnt = 0  #total words
cnt_f = 0 #number of French words
cnt_d = 0 #number of words beginning with a dash
cnt_sq = 0 #number of words beginning with a single quote

kk = 0
for fname in onlyfiles:
    jsonBody = {}
    
    fj = open('E:\\dictjson\\' + fname + '.json', 'w')
    #print fj
    
    with open('E:\\dictrawfile-39195\\' + fname, 'r') as fentry:
        raw_doc = fentry.read()
    #fentry = open('declare.axb8472', 'r')
    #hw_doc = fentry.read()
        
    
    #get IPA symbols
    #first find the pair of slashes used to mark the beginning and end of a pronunciation
    #image21955.gif, image21956.gif, image21957.gif -- images of a slash '/'
    patt = '<img hspace="0" align="middle" losrc="Images\/image21955\.gif" src="Images\/image21956\.gif" hisrc="Images\/image21957\.gif"\/>'
    pattern = patt + '(.*?)' + patt
    #print pattern
    ipa = re.compile(pattern, re.DOTALL)

    flag_brit = 0 #suppose there is no British pronunciation
    my_pronun = []
    for sounds in ipa.findall(raw_doc):
        i = 0
        for sound in sounds.split('<i>Brit</i>'):
            i += 1
            ipa_soup = BeautifulSoup(sound)
            if i == 1: cmu = ''
            elif i == 2: #'Brit'
                flag_brit = 1 #there is British pronunciation
                break #skip British pronunciation
            for symbol in ipa_soup.find_all('img'):
                key = symbol['hisrc'][12:-4]
                if key in ipa_to_alpha: cmu += ipa_to_alpha[key]
                else: cmu += key
            #print cmu
            #print "---\n"   
            my_pronun.append(cmu)
    #print my_pronun
    if my_pronun: jsonBody['prn'] = my_pronun
    if flag_brit == 1: jsonBody['prn_brit'] = 'y'
        
    
    #after getting IPA, remove all image tags    
    hw_doc = re.sub(r"(<img[^>]*\/>)", "", raw_doc)
    #remove Unicode 'middle dot' u+00B7
    hw_doc = re.sub(u"\u00B7", "", hw_doc)
    #print hw_doc
    #continue
    fentry.close()

    soup = BeautifulSoup(hw_doc)

    
    #get headword
    headword = soup.find('idx:orth')
    #if headword[0] == "'": headword = "\\" + headword
    if headword: 
        jsonBody['hw'] = headword['value']
        kk += 1
        print kk, '---', jsonBody
    else:
        print fname, ": no headword"
        break
    
    #check if it is in 3,000 basic English words (3,000 core vocabulary words)
    basic_word = soup.find('u')
    if basic_word: jsonBody['bw'] = 1    
    #print jsonBody
    

    #French words
    if fname[0] == '.':
        cnt_f += 1
        jsonBody["fr"] = "y" #is it a Frenche word? yes
    elif fname[0] == '-':
        cnt_d += 1
        jsonBody["dash"] = "y" # begin with a dash
    elif fname[0] == "'":
        cnt_sq += 1
        jsonBody["sq"] = "y" #begin with a single quote '
    
    
    #get inflection
    infl = []
    for it in soup.find_all('idx:iform'):
        infl.append(it['value'])
        #print infl
    if infl: jsonBody['infl'] = infl

    #get id
    aid = soup.find(a_has_id)
    #print type(aid)
    if aid: jsonBody['id'] = aid['id']

    #get part of speech
    #<i><font color="#999999">noun</font></i>
    pos = soup.find(color="#999999")
    if pos: jsonBody['pos'] = pos.get_text()

        
    # if headword in audiofile list, generate "audiofile" key and value    
    if headword['value'] in audiofile:
        print 'in audiofile'
        jsonBody['audiofile'] = headword['value'] + " " + pos.get_text() + ".wav"
    # con is a key word of Windows, a filename as 'con.wav' is not allowed
    elif headword['value'] == 'con':
        print "---------con-----------"
        jsonBody['audiofile'] = "con1.wav"

        
    #get labels
    label = []
    label = soup.find_all(text=labels)
    #some labels are not captured in tags, so use regex to get them
    if soup.find(text=re.compile("figuratively")): label.append("figuratively")
    if soup.find(text=re.compile("informal")): label.append("informal")
    #no 'see color picture' in mobi file
    #if soup.find_all(text=re.compile("see color picture")): label.append("color picture")
    if label: jsonBody['label'] = label

    #get common phrases
    cplist = []
    for item in soup.find_all('span'):
        if item.parent.name == 'font': cplist.append(item.get_text())
    if cplist: jsonBody['cp'] = cplist

    #get all example sentences
    examples = []
    for it in soup.find_all(color="#002984"):
        examples.append(it.get_text())
    if examples: jsonBody['alles'] = examples

    #get all idioms & phrasal verbs, including common phrases
    idpvs = []
    for item in soup.find_all('span'):
        #print item.parent.name, "---", item.get_text()
        if item.parent.name != 'idx:entry': idpvs.append(item.get_text())
    if idpvs: jsonBody['idpvs'] = idpvs
                
                
    #get cross references
    cr = {}
    for link in soup.find_all(a_has_href):
        if link: cr[link.get('href').replace('#','')] = link.get_text().strip()
    if cr: jsonBody['cr'] = cr

        
    #construct definitions & examples
    sublist = []
    i = 1 #cross reference counter
    
    # remove j that is not necessary
    #j = 1 #idioms & phrasal verbs counter 

    for it in soup.find_all('blockquote'):
        #print it.parent.parent.name, ">>", it.parent.name, "---", it, "\n"
        if(it.parent.name=='idx:entry'):
            examp = []
            defi = it.get_text()
            if defi==" ": continue
            for e in it.find_all(color="#002984"):
                examp.append(e.get_text())
                defi = defi.replace(e.get_text(),"")
            #print defi
            #print examp

            subentry = {}
            # to-do: next time use'defi' which is better as the key of dictionary since def is a key word of Python
            subentry['def'] = defi
            subentry['es'] = examp
            #print subentry
            #print sublist                

            if subentry: sublist.append(subentry)
        
        #idioms & phrasal verbs
        if(it.parent.name=='div' and it.parent.parent.name!='blockquote'):
            subentry = {}
            
            #just use 'idpv' as key
            subentry['idpv'] = it.get_text()
            #subentry['idpv'+str(j)] = it.get_text()
            #j += 1
 
            #print subentry
            if subentry: sublist.append(subentry)

            
    if sublist: jsonBody['sublist'] = sublist
    if jsonBody:
        #print jsonBody
        cnt += 1
        json.dump(jsonBody, fj)
        
        # insert into MongoDB database
        doc_id = geweiDict.insert(jsonBody)
        #print doc_id 
        shutil.move("E:\\dictrawfile-39195\\"+fname, "E:\\dictprocessed\\"+fname)
    fj.close()

print 'total words: ', cnt, ', French words: ', cnt_f, ", begin with a dash:", cnt_d, ", begin with a ': ", cnt_sq




E:\dictrawfile-39195
1 --- {'prn': ["'kA:zk.z"], 'hw': "'cause"}
2 --- {'prn': ['.dtd;d'], 'hw': "'d"}
3 --- {'prn': ['.m'], 'hw': "'em"}
4 --- {'hw': "'ll"}
5 --- {'prn': ['.nn-'], 'hw': "'n'"}
6 --- {'prn': ["'ni:th"], 'hw': "'neath"}
7 --- {'prn': ['sptkfth;.zszSZtSdZ;z'], 'hw': "'s"}
8 --- {'hw': "'til"}
9 --- {'prn': ["'t!zt.z"], 'hw': "'tis"}
10 --- {'prn': ["'tw^z"], 'hw': "'twas"}
11 --- {'prn': ['v.v'], 'hw': "'ve"}
12 --- {'prn': ['sptkfth;.zszSZtSdZ;z'], 'hw': "-'s"}
13 --- {'hw': '-ability'}
14 --- {'hw': '-able'}
15 --- {'prn': ['!dZ'], 'hw': '-age'}
16 --- {'hw': '-al'}
17 --- {'hw': '-al'}
18 --- {'hw': '-an'}
19 --- {'hw': '-an'}
20 --- {'prn': ['.ns'], 'hw': '-ance'}
21 --- {'prn': ['.nsi'], 'hw': '-ancy'}
22 --- {'prn': ['.nt'], 'hw': '-ant'}
23 --- {'hw': '-ant'}
24 --- {'prn': ['~'], 'hw': '-ar'}
25 --- {'hw': '-ary'}
26 --- {'hw': '-ary'}
27 --- {'hw': '-ation'}
28 --- {'hw': '-ative'}
29 --- {'hw': '-backed'}
30 --- {'prn': ["'sEntr!k"], 'hw': '-centric'}
31 --- {

In [8]:
print(soup.img.decompose())

AttributeError: 'NoneType' object has no attribute 'decompose'

In [193]:
dpdict.find({"label":"mathematics"}).count()

64

In [194]:
dpdict.find({"label":"slang"}).count()

29

In [195]:
dpdict.find({"label":"offensive"}).count()

33

In [196]:
dpdict.find({"label":"chiefly US"}).count()

807

In [197]:
dpdict.find({"label":"Brit"}).count()

2615

In [209]:
db.dpdict.find({"label":"US"}).count()

2027

In [208]:
db.dpdict.find({cp: {$exists: true}}).count() 

SyntaxError: invalid syntax (<ipython-input-208-11f5d97b9c3b>, line 1)

In [284]:
dpdict.find_one({'hw': u'doppelg\xe4nger'})

{u'_id': ObjectId('537e93cd8707ee3128063ada'),
 u'alles': [u'I saw your doppelg\xe4nger [=(more commonly) double] yesterday.',
  u'In the story, the character is haunted by a doppelg\xe4nger.'],
 u'fr': u'y',
 u'hw': u'doppelg\xe4nger',
 u'idpvs': [u'dop\xb7pel\xb7gang\xb7er'],
 u'infl': {u'1np1': u'doppelg\xe4ngers', u'1ns1': u'doppelg\xe4nger'},
 u'label': [u'literary'],
 u'pos': u'or',
 u'sublist': [{u'def': u'1 : someone who looks like someone else  ',
   u'es': [u'I saw your doppelg\xe4nger [=(more commonly) double] yesterday.']},
  {u'def': u'2 literary : a ghost that looks like a living person ',
   u'es': [u'In the story, the character is haunted by a doppelg\xe4nger.']}]}

In [233]:
dpdict.find({"id":"filepos18005907"})


<pymongo.cursor.Cursor at 0x7993630>

In [53]:
comment = re.compile(r'/\*(.*?)\*/', re.DOTALL)
patt = '<img hspace="0" align="middle" losrc="Images\/image21955\.gif" src="Images\/image21956\.gif" hisrc="Images\/image21957\.gif"\/>'
pattern = patt + '(.*?)' + patt
print pattern
ipa = re.compile(pattern, re.DOTALL)
text1 = '/* this is a comment */'
text2 = '''/* this is a
        multiline comment */
        '''
text3 = '''<idx:entry scriptable="yes"><idx:orth value="vast"><idx:infl>  <idx:iform name="1.adj.pos.1" value="vast"/></idx:infl><idx:infl>  <idx:iform name="2.n.s.1" value="vast"/>  <idx:iform name="2.n.p.1" value="vasts"/></idx:infl></idx:orth><a id="filepos83514877" /><span><font size="3"><b><u>vast</u></span> <img hspace="0" align="middle" losrc="Images/image21955.gif" src="Images/image21956.gif" hisrc="Images/image21957.gif"/><img hspace="0" align="middle" losrc="Images/image21958.gif" src="Images/image21959.gif" hisrc="Images/image21960.gif"/><img hspace="0" align="middle" losrc="Images/image22003.gif" src="Images/image22004.gif" hisrc="Images/image22005.gif"/><img hspace="0" align="middle" losrc="Images/image21982.gif" src="Images/image21983.gif" hisrc="Images/image21984.gif"/><img hspace="0" align="middle" losrc="Images/image22012.gif" src="Images/image22013.gif" hisrc="Images/image22014.gif"/><img hspace="0" align="middle" losrc="Images/image22036.gif" src="Images/image22037.gif" hisrc="Images/image22038.gif"/><img hspace="0" align="middle" losrc="Images/image21976.gif" src="Images/image21977.gif" hisrc="Images/image21978.gif"/> <i>Brit</i> <img hspace="0" align="middle" losrc="Images/image21958.gif" src="Images/image21959.gif" hisrc="Images/image21960.gif"/><img hspace="0" align="middle" losrc="Images/image22003.gif" src="Images/image22004.gif" hisrc="Images/image22005.gif"/><img hspace="0" align="middle" losrc="Images/image21988.gif" src="Images/image21989.gif" hisrc="Images/image21990.gif"/><img hspace="0" align="middle" losrc="Images/image21991.gif" src="Images/image21992.gif" hisrc="Images/image21993.gif"/><img hspace="0" align="middle" losrc="Images/image22012.gif" src="Images/image22013.gif" hisrc="Images/image22014.gif"/><img hspace="0" align="middle" losrc="Images/image22036.gif" src="Images/image22037.gif" hisrc="Images/image22038.gif"/><img hspace="0" align="middle" losrc="Images/image21955.gif" src="Images/image21956.gif" hisrc="Images/image21957.gif"/> <i><font color="#999999">adj</font></i>, <b>vast積r</b>, <b>-est</b> [<i>more <img hspace="0" align="middle" losrc="Images/image22039.gif" src="Images/image22040.gif" hisrc="Images/image22041.gif"/>; most <img hspace="0" align="middle" losrc="Images/image22039.gif" src="Images/image22040.gif" hisrc="Images/image22041.gif"/></i>]<blockquote align="left"> <b>:</b> very great in size, amount, or extent <blockquote align="left"><font color="#002984">She has a <i>vast</i> amount of knowledge on this subject.</font></blockquote> <blockquote align="left"><font color="#002984"><i>vast</i> quantities of information</font></blockquote> <blockquote align="left"><font color="#002984">The policy is supported by the <i>vast</i> majority of citizens.</font></blockquote> <blockquote align="left"><font color="#002984">a <i>vast</i> expanse of land</font></blockquote></blockquote> <img hspace="0" vspace="0" align="middle" src="Images/image21970.gif"/><div align="left"><blockquote><span><font size="3"><b>vast穕y</span> <i><font color="#999999">adv</font></i> <blockquote><font color="#002984">His background is <i>vastly</i> different from mine.</font></blockquote> <blockquote><font color="#002984">They <i>vastly</i> increased spending.</font></blockquote></blockquote> </div>

<img hspace="0" vspace="0" align="middle" src="Images/image21970.gif"/><div align="left"><blockquote><span><font size="3"><b>vast穘ess</span> 

<img hspace="0" align="middle" losrc="Images/image21955.gif" src="Images/image21956.gif" hisrc="Images/image21957.gif"/>
<img hspace="0" align="middle" losrc="Images/image21958.gif" src="Images/image21959.gif" hisrc="Images/image21960.gif"/>
<img hspace="0" align="middle" losrc="Images/image22003.gif" src="Images/image22004.gif" hisrc="Images/image22005.gif"/>
<img hspace="0" align="middle" losrc="Images/image21982.gif" src="Images/image21983.gif" hisrc="Images/image21984.gif"/>
<img hspace="0" align="middle" losrc="Images/image22012.gif" src="Images/image22013.gif" hisrc="Images/image22014.gif"/>
<img hspace="0" align="middle" losrc="Images/image22036.gif" src="Images/image22037.gif" hisrc="Images/image22038.gif"/>
<img hspace="0" align="middle" losrc="Images/image21979.gif" src="Images/image21980.gif" hisrc="Images/image21981.gif"/>
<img hspace="0" align="middle" losrc="Images/image21973.gif" src="Images/image21974.gif" hisrc="Images/image21975.gif"/>
<img hspace="0" align="middle" losrc="Images/image22012.gif" src="Images/image22013.gif" hisrc="Images/image22014.gif"/>
<img hspace="0" align="middle" losrc="Images/image21976.gif" src="Images/image21977.gif" hisrc="Images/image21978.gif"/>
 <i>Brit</i> 
 <img hspace="0" align="middle" losrc="Images/image21958.gif" src="Images/image21959.gif" hisrc="Images/image21960.gif"/>
 <img hspace="0" align="middle" losrc="Images/image22003.gif" src="Images/image22004.gif" hisrc="Images/image22005.gif"/>
 <img hspace="0" align="middle" losrc="Images/image21988.gif" src="Images/image21989.gif" hisrc="Images/image21990.gif"/>
 <img hspace="0" align="middle" losrc="Images/image21991.gif" src="Images/image21992.gif" hisrc="Images/image21993.gif"/>
 <img hspace="0" align="middle" losrc="Images/image22012.gif" src="Images/image22013.gif" hisrc="Images/image22014.gif"/>
 <img hspace="0" align="middle" losrc="Images/image22036.gif" src="Images/image22037.gif" hisrc="Images/image22038.gif"/>
 <img hspace="0" align="middle" losrc="Images/image21979.gif" src="Images/image21980.gif" hisrc="Images/image21981.gif"/>
 <img hspace="0" align="middle" losrc="Images/image21973.gif" src="Images/image21974.gif" hisrc="Images/image21975.gif"/>
 <img hspace="0" align="middle" losrc="Images/image22012.gif" src="Images/image22013.gif" hisrc="Images/image22014.gif"/>
<img hspace="0" align="middle" losrc="Images/image21955.gif" src="Images/image21956.gif" hisrc="Images/image21957.gif"/> 


<i><font color="#999999">noun</font></i> [<i>noncount</i>] <blockquote><font color="#002984">the <i>vastness</i> of the desert/ocean</font></blockquote></blockquote></div></idx:entry><div><img hspace="0" vspace="0" align="middle" src="Images/image21971.gif"/></div><div><table width="100%" bgcolor="#7593CD"><tr><th widht="100%" height="2px"/></tr></table></div><div><img hspace="0" vspace="0" align="middle" src="Images/image21972.gif"/></div> 
 
 '''
#comment.findall(text1)
#comment.findall(text2)
for sounds in ipa.findall(text3):
    for sound in sounds.split('<i>Brit</i>'):
        print "---\n"
        soup = BeautifulSoup(sound)
        for symbol in soup.find_all('img'):
            print symbol['hisrc']
        

<img hspace="0" align="middle" losrc="Images\/image21955\.gif" src="Images\/image21956\.gif" hisrc="Images\/image21957\.gif"\/>(.*?)<img hspace="0" align="middle" losrc="Images\/image21955\.gif" src="Images\/image21956\.gif" hisrc="Images\/image21957\.gif"\/>
---

Images/image21960.gif
Images/image22005.gif
Images/image21984.gif
Images/image22014.gif
Images/image22038.gif
Images/image21978.gif
---

Images/image21960.gif
Images/image22005.gif
Images/image21990.gif
Images/image21993.gif
Images/image22014.gif
Images/image22038.gif
---

Images/image21960.gif
Images/image22005.gif
Images/image21984.gif
Images/image22014.gif
Images/image22038.gif
Images/image21981.gif
Images/image21975.gif
Images/image22014.gif
Images/image21978.gif
---

Images/image21960.gif
Images/image22005.gif
Images/image21990.gif
Images/image21993.gif
Images/image22014.gif
Images/image22038.gif
Images/image21981.gif
Images/image21975.gif
Images/image22014.gif


In [1]:
import pymongo
from pymongo import MongoClient

#connect to MongoDB
client = MongoClient('localhost', 27017)
db = client.dpdb
dpdict = db.dpdict

In [6]:
words = dpdict.find({"hw":"incongruous"})
for word in words:
    print word.get("alles")
    print word.get("sublist")

[u'His outburst seemed incongruous to those who know him well.', u'The style of the porch is incongruous with [=does not match] the style of the house overall.', u'The modern sculpture seems incongruous [=out of place] among all the antiques.']
[{u'def': u' : strange because of not agreeing with what is usual or expected   ', u'es': [u'His outburst seemed incongruous to those who know him well.', u'The style of the porch is incongruous with [=does not match] the style of the house overall.', u'The modern sculpture seems incongruous [=out of place] among all the antiques.']}, {u'idpv1': u'in\xb7con\xb7gru\xb7i\xb7ty noun, pl -ties [count, noncount]'}, {u'idpv2': u'in\xb7con\xb7gru\xb7ous\xb7ly adv'}]
