In [121]:
import os
import re
import sys,csv
import time
import string
import logging
import argparse
import platform
import requests
import sqlite3
import subprocess
import pandas as pd
import numpy as np
from time import sleep
from googletrans import Translator
from os.path import expanduser
from lxml import html

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger("urllib3").setLevel(logging.WARNING)


HOME= expanduser("~")
DATA_DIR = os.path.join(os.getcwd(),'data')
if not os.path.isdir(DATA_DIR): os.mkdir(DATA_DIR)

vocab_dir = "/Volumes/Kindle/system/vocabulary/vocab.db"
clip_dir = "/Volumes/Kindle/documents/My Clippings.txt"
con = sqlite3.connect(vocab_dir)
cur = con.cursor()

def fetch_bookname():
    cur.execute('''select title from BOOK_INFO;''')
    bn = cur.fetchall()
    return bn

def fetch_words(book):
    q_word = """select ta.word,ta.stem, tb.usage from ((select  id, word, stem from WORDS) ta inner join (select word_key,usage from LOOKUPS where book_key=(select id from BOOK_INFO where title="{book_name}") ) tb on ta.id==tb.word_key) ;""".format(book_name=book)
    cur.execute(q_word)
    data = cur.fetchall()
    words = pd.DataFrame(data,columns = ['word','stem','usage'])
    return words

def eng_to_cn(row,word_file,total_len):
    if 'stem' in row.index:
        word = row['stem']
    elif 'note' in row.index:
        word = row['note']
    
    if len(word.split())<3:
        
        url = "https://www.youdao.com/w/{}/#keyfrom=dict2.top".format(word)
        
        try:
            page = requests.get(url)
        except requests.exceptions.ConnectionError:
            logging.debug("Connection refused")
            sleep(5)

        tree = html.fromstring(page.content)
        xpath = '//*[@id="phrsListTab"]//div[@class="trans-container"]/ul/li/text()'
        output = tree.xpath(xpath)
        if output!=[]:
            output = ',\n'.join(output)
        else:
            xpath = '//div[@id="tWebTrans"]/div[not(@id)]//div[@class="title"]//span/text()'
            output = tree.xpath(xpath)
        if output!=[]:
            output = ''.join(output)
        else:
            output = ''
        
    else:
        
        translator = Translator()
        output = translator.translate(word, dest='zh-cn').text
        
    row = row.to_dict()
    row['trans']=output
    write_row(row,word_file)
    logging.info(total_len-row['index'])
    return output
    

def write_row(row,word_file):
    csv_col = row.keys()
    with open(word_file,'a') as file:
        writer = csv.DictWriter(file,fieldnames = list(csv_col))            
        writer.writerow(row)
            
            
def write_header(csv_col,word_file):
    with open(word_file,'w') as file:
        writer = csv.DictWriter(file,fieldnames = list(csv_col))            
        writer.writeheader()
        
        
def fetch_note(book):
    text = []
    with open(clip_dir,'r') as f:
        for highlight in f.read().split("=========="):
            lines = highlight.split("\n")[1:]
            if len(lines) < 3 or lines[3] == "":
                continue
            title = lines[0]
            if title[0] == "\ufeff":
                title = title[1:]
            if title.startswith(book):
                text.append(lines[3])
    note = pd.DataFrame(np.array([text]).transpose(),columns=['note'])
    note['title']=book
    return note


def main():
    bn = fetch_bookname()
    bn_op = ["{}. {}".format(i,b[0]) for i, b in enumerate(bn)]
    print("Books:")
    print("=========")
    print('\n'.join(bn_op))
    print("=========")

    print()
    book = bn[int(input("Which book do you want to query? (Insert book index) "))][0]
    print(book)
    note = fetch_note(book)
    words = fetch_words(book).reset_index()
    print()
    print("=========")

    print()
    if_trans = input("Words list is fetched. Do you want to translate all the words? [y/n] ")
    word_file = os.path.join(DATA_DIR ,book+' Word2.csv')
    if if_trans=='y':
        
        write_header(list(words.columns)+['trans'],word_file)
        words.apply(lambda x:eng_to_cn(x,word_file,len(words)),axis=1)
        print("Translation is completed.")
    words.to_csv(word_file,index=False)
    print("Words directory: "+word_file)
    print()
    print("=========")

    print()
    if_trans_note = input("Notes are fetched. Do you want to translate them all? [y/n] ")
    note_file = os.path.join(DATA_DIR ,book+' Note2.csv')
    if if_trans_note=='y':
        
        write_header(list(note.columns)+['trans'],note_file)
        note.apply(lambda x: eng_to_cn(x,note_file,len(note)),axis=1)
        print("Translation is completed.")
    note.to_csv(note_file,index=False)
    print("Notes directory: "+note_file)
    print()
    print("=========")

In [122]:
main()

Books:
0. 羊をめぐる冒険
1. The Rise and Fall of American Growth (The Princeton Economic History of the Western World)
2. The Human Tide
3. ノルウェイの森 (講談社文庫)
4. Seeing Like a State: How Certain Schemes to Improve the Human Condition Have Failed (The Institution for Social and Policy St)
5. No Filter
6. Two Cheers for Anarchism
7. The Handmaid's Tale
8. Atomic Habits: Tiny Changes, Remarkable Results

Which book do you want to query? (Insert book index) 7
The Handmaid's Tale


Words list is fetched. Do you want to translate all the words? [y/n] y


KeyboardInterrupt: 

In [106]:
pd.read_csv("/Users/yan/Google Drive/Python/kindle_parser/data/The Handmaid's Tale Word.csv").style

Unnamed: 0,word,stem,usage,trans
0,compound,compound,"Though we are no longer in the Commanders’ compound, there are large houses here also.","n. [化学] 化合物；混合物；复合词；有围栏（或围墙）的场地（内有工厂或其他建筑群）, adj. 复合的；混合的, v. 合成；混合；恶化，加重；和解，妥协"
1,façades,facade,"The lawns are tidy, the façades are gracious, in good repair; they’re like the beautiful pictures they used to print in the magazines about homes and gardens and interior decoration.",n. 正面；表面；外观
2,skimpy,skimpy,"There are other women with baskets, some in red, some in the dull green of the Marthas, some in the striped dresses, red and blue and green and cheap and skimpy, that mark the women of the poorer men.",adj. 不足的；吝啬的
3,cement,cement,The sidewalks here are cement.,"n. 水泥；接合剂；纽带；使人们团结的因素；黏固粉；牙骨质；沉积岩基质, v. 粘牢，胶合；巩固，确定；在......上抹水泥；（物质）在沉积岩中黏附（颗粒 ）"
4,cushioned,cushion,"Sometimes it was shoes for running, with cushioned soles and breathing holes, and stars of fluorescent fabric that reflected light in the darkness.","n. 垫子；起缓解作用之物；（猪等的）臀肉；银行储蓄, vt. 给…安上垫子；把…安置在垫子上；缓和…的冲击"
5,fluorescent,fluorescent,"Sometimes it was shoes for running, with cushioned soles and breathing holes, and stars of fluorescent fabric that reflected light in the darkness.","adj. 荧光的；发亮的, n. 荧光；日光灯"
6,obscenities,obscenity,"Now we walk along the same street, in red pairs, and no man shouts obscenities at us, speaks to us, touches us.",n. 猥亵，淫秽；猥亵的言语（或行为）
7,lettering,lettering,"You can see the place, under the lily, where the lettering was painted out, when they decided that even the names of shops were too much temptation for us.","n. 刻字；印字；书写的文字, v. 用字母写；用印刷体写（letter的ing形式）"
8,undone,undone,They wore blouses with buttons down the front that suggested the possibilities of the word undone.,"adj. （尤指衣服）未扣，解开的；（尤指工作）未完成的；（旧）毁掉的，完蛋了的, v. 打开，解开；消除……的影响；使垮台，打败；（计）撤销先前的指令（undo 的过去分词）"
9,furtively,furtive,"Nobody talks much, though there is a rustling, and the women’s heads move furtively from side to side: here, shopping, is where you might see someone you know, someone you’ve known in the time before, or at the Red Centre.",adj. 鬼鬼祟祟的，秘密的


In [107]:
    words = fetch_words(book).reset_index()


In [108]:
words

Unnamed: 0,index,word,stem,usage
0,0,compound,compound,Though we are no longer in the Commanders’ com...
1,1,façades,facade,"The lawns are tidy, the façades are gracious, ..."
2,2,skimpy,skimpy,"There are other women with baskets, some in re..."
3,3,cement,cement,The sidewalks here are cement.
4,4,cushioned,cushion,"Sometimes it was shoes for running, with cushi..."
...,...,...,...,...
1281,1281,phosphorescent,phosphorescent,The paint must be phosphorescent.
1282,1282,talisman,talisman,"“Trust me,” he says; which in itself has never..."
1283,1283,snatch,snatch,"But I snatch at it, this offer."
1284,1284,incredulous,incredulous,"Serena Joy stands in the hallway, under the mi..."
