In [3]:
import os
import re
import sys
import time
import urllib
import string
import argparse
import platform
import requests
import sqlite3
import subprocess
import pandas as pd
import numpy as np
from googletrans import Translator
from os.path import expanduser
from lxml import html

HOME= expanduser("~")

In [19]:
vocab_dir = "/Volumes/Kindle/system/vocabulary/vocab.db"
clip_dir = "/Volumes/Kindle/documents/My Clippings.txt"
con = sqlite3.connect(vocab_dir)
cur = con.cursor()

def fetch_bookname():
    cur.execute('''select title from BOOK_INFO;''')
    bn = cur.fetchall()
    return bn

def fetch_words(book):
    q_word = """select ta.word,ta.stem, tb.usage from ((select  id, word, stem from WORDS) ta inner join (select word_key,usage from LOOKUPS where book_key=(select id from BOOK_INFO where title="{book_name}") ) tb on ta.id==tb.word_key) ;""".format(book_name=book)
    cur.execute(q_word)
    data = cur.fetchall()
    words = pd.DataFrame(data,columns = ['word','stem','usage'])
    return words

def eng_to_cn(word,src = 'youdao'):
    if src =='youdao':
        url = "https://www.youdao.com/w/{}/#keyfrom=dict2.top".format(urllib.parse.quote(word))
        page = requests.get(url)
        tree = html.fromstring(page.content)
        xpath = '//*[@id="phrsListTab"]//div[@class="trans-container"]/ul/li/text()'
        output = tree.xpath(xpath)
        if output!=[]:
            return ',\n'.join(output)
        else:
            xpath = '//div[@id="tWebTrans"]/div[not(@id)]//div[@class="title"]//span/text()'
            output = tree.xpath(xpath)
        if output!=[]:
            return ',\n'.join(output)
        else:
            return ''
        
    elif src =='google':
        translator = Translator()
        output = translator.translate(word, dest='zh-cn').text
        return output


def fetch_note(book):
    text = []
    with open(clip_dir,'r') as f:
        for highlight in f.read().split("=========="):
            lines = highlight.split("\n")[1:]
            if len(lines) < 3 or lines[3] == "":
                continue
            title = lines[0]
            if title[0] == "\ufeff":
                title = title[1:]
            if title.startswith(book):
                text.append(lines[3])
    note = pd.DataFrame(np.array([text]).transpose(),columns=['note'])
    note['title']=book
    return note

In [33]:
def main():
    bn = fetch_bookname()
    bn_op = ["{}. {}".format(i,b[0]) for i, b in enumerate(bn)]
    print("Books:")
    print("=========")
    print('\n'.join(bn_op))
    print("=========")

    print()
    book = bn[int(input("Which book do you want to query? (Insert book index) "))][0]
    print(book)
    note = fetch_note(book).head(1)
    words = fetch_words(book).head(1)
    print()
    print("=========")

    print()
    if_trans = input("Words list is fetched. Do you want to translate all the words? [y/n]")
    if if_trans=='y':
        words['trans'] = words['stem'].apply(eng_to_cn)
        print("Translation is completed.")
    word_dir = os.path.join(HOME,book+' Word.csv')
    words.to_csv(word_dir,index=False)
    print("Words directory: "+word_dir)
    print()
    print("=========")

    print()
    if_trans_note = input("Notes are fetched. Do you want to translate them all? [y/n]")
    if if_trans_note=='y':
        note['len_'] = note['note'].str.strip(string.punctuation).str.split().apply(len)
        note.loc[note['len_']==1,'trans'] = note.loc[note['len_']==1,'note'].apply(lambda x: eng_to_cn(x,'youdao'))
        note.loc[note['len_']>1,'trans']  = note.loc[note['len_']>1,'note'].apply(lambda x: eng_to_cn(x,'google'))
        del note['len_']
        print("Translation is completed.")
    note_dir = os.path.join(HOME,book+' Note.csv')
    note.to_csv(note_dir,index=False)
    print("Notes directory: "+note_dir)
    print()
    print("=========")

Books:
0. 羊をめぐる冒険
1. The Rise and Fall of American Growth (The Princeton Economic History of the Western World)
2. The Human Tide
3. ノルウェイの森 (講談社文庫)
4. Seeing Like a State: How Certain Schemes to Improve the Human Condition Have Failed (The Institution for Social and Policy St)
5. No Filter
6. Two Cheers for Anarchism

Which book do you want to query? (Insert book index) 6
Two Cheers for Anarchism


Words list is fetched. Do you want to translate all the words? [y/n]y
Translation is completed.
Words directory: /Users/yan/Two Cheers for Anarchism Word.csv


Notes are fetched. Do you want to translate them all? [y/n]y
Translation is completed.
Notes directory: /Users/yan/Two Cheers for Anarchism Note.csv



In [22]:
note = note.head(10)

In [23]:
note

Unnamed: 0,note,title,len_
0,revolution ended by creating a state more powe...,Two Cheers for Anarchism,38
1,bear out the adage of,Two Cheers for Anarchism,5
2,“Freedom without socialism is privilege,Two Cheers for Anarchism,5
3,and injustice; socialism without freedom is sl...,Two Cheers for Anarchism,9
4,nomothetic,Two Cheers for Anarchism,1
5,wary,Two Cheers for Anarchism,1
6,"One thing that heaves into view,",Two Cheers for Anarchism,6
7,In light of the,Two Cheers for Anarchism,4
8,huge strides,Two Cheers for Anarchism,2
9,There is no authentic freedom where huge diffe...,Two Cheers for Anarchism,18


In [30]:
note

Unnamed: 0,note,title,len_,trans
0,revolution ended by creating a state more powe...,Two Cheers for Anarchism,38,革命以建立一个比其推翻的国家更强大的国家而告终，而该国家反过来又可以从其计划服务的人口中提取...
1,bear out the adage of,Two Cheers for Anarchism,5,忍受
2,“Freedom without socialism is privilege,Two Cheers for Anarchism,5,“没有社会主义的自由就是特权
3,and injustice; socialism without freedom is sl...,Two Cheers for Anarchism,9,和不公正；没有自由的社会主义就是奴隶制和野蛮。”
4,nomothetic,Two Cheers for Anarchism,1,adj. 制定法律的；以法律为根据的（副词nomothetically，异体字nomothe...
5,wary,Two Cheers for Anarchism,1,adj. 谨慎的；机警的；惟恐的；考虑周到的
6,"One thing that heaves into view,",Two Cheers for Anarchism,6,一件事让人眼前一亮，
7,In light of the,Two Cheers for Anarchism,4,鉴于
8,huge strides,Two Cheers for Anarchism,2,大步向前
9,There is no authentic freedom where huge diffe...,Two Cheers for Anarchism,18,没有真正的自由，只有巨大的分歧才能达成自愿协议或交换合法的掠夺物。


In [17]:
note['len_'] = note['note'].str.strip(string.punctuation).str.split().apply(len)

0    38
1     5
2     5
3     9
4     1
5     1
6     6
7     4
8     2
9    18
Name: note, dtype: int64

In [18]:
note

Unnamed: 0,note,title,trans
0,revolution ended by creating a state more powe...,Two Cheers for Anarchism,革命以建立一个比其推翻的国家更强大的国家而告终，而该国家反过来又可以从其计划服务的人口中提取...
1,bear out the adage of,Two Cheers for Anarchism,忍受
2,“Freedom without socialism is privilege,Two Cheers for Anarchism,“没有社会主义的自由就是特权
3,and injustice; socialism without freedom is sl...,Two Cheers for Anarchism,和不公正；没有自由的社会主义就是奴隶制和野蛮。”
4,nomothetic,Two Cheers for Anarchism,理性的
5,wary,Two Cheers for Anarchism,警惕
6,"One thing that heaves into view,",Two Cheers for Anarchism,一件事让人眼前一亮，
7,In light of the,Two Cheers for Anarchism,鉴于
8,huge strides,Two Cheers for Anarchism,大步向前
9,There is no authentic freedom where huge diffe...,Two Cheers for Anarchism,没有真正的自由，只有巨大的分歧才能达成自愿协议或交换合法的掠夺物。


In [8]:
text = note['note'].iloc[:20].tolist()
text

['revolution ended by creating a state more powerful than the one it overthrew, a state that in turn was able to extract more resources from and exercise more control over the very populations it was designed to serve.',
 'bear out the adage of',
 '“Freedom without socialism is privilege',
 'and injustice; socialism without freedom is slavery and brutality.”',
 'nomothetic',
 'wary',
 'One thing that heaves into view,',
 'In light of the',
 'huge strides',
 'There is no authentic freedom where huge differences make voluntary agreements or exchanges nothing more than legalized plunder.',
 'a case in point.',
 'far off the mark.',
 'Episodes of structural change, therefore, tend to occur only when massive, noninstitutionalized disruption in the form of riots, attacks on property, unruly demonstrations, theft, arson, and open defiance threatens established institutions.',
 'wildcat strikers.',
 'lay claim to the moral high ground of democratic politics.',
 'in vain',
 'foot-dragging,',
 '

revolution ended by creating a state more powerful than the one it overthrew, a state that in turn was able to extract more resources from and exercise more control over the very populations it was designed to serve.  ->  革命以建立一个比其推翻的国家更强大的国家而告终，而该国家反过来又可以从其计划服务的人口中提取更多资源并对其行使更多控制权。
bear out the adage of  ->  忍受
“Freedom without socialism is privilege  ->  “没有社会主义的自由就是特权
and injustice; socialism without freedom is slavery and brutality.”  ->  和不公正；没有自由的社会主义就是奴隶制和野蛮。”
nomothetic  ->  理性的
wary  ->  警惕
One thing that heaves into view,  ->  一件事让人眼前一亮，
In light of the  ->  鉴于
huge strides  ->  大步向前
There is no authentic freedom where huge differences make voluntary agreements or exchanges nothing more than legalized plunder.  ->  没有真正的自由，只有巨大的分歧才能达成自愿协议或交换合法的掠夺物。
a case in point.  ->  一个恰当的例子。
far off the mark.  ->  远远没有达到目标。
Episodes of structural change, therefore, tend to occur only when massive, noninstitutionalized disruption in the form of riots, attacks on property, unruly demonstrat

In [57]:
text

'revolution ended by creating a state more powerful than the one it overthrew, a state that in turn was able to extract more resources from and exercise more control over the very populations it was designed to serve.'

In [None]:
titles = []
texts = []



if __name__=='__main__':
    # read in command like arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("-source", type=str, default="/Volumes/Kindle/documents/My Clippings.txt")
    parser.add_argument("-showtitle", type=bool, default=False)
    parser.add_argument("-title", type=str, default='')
    parser.add_argument("-dest", type=str, default='')
    args = parser.parse_args()
    
    df = main(args.source)
    if args.showtitle==True:
        print('\n\n'.join(df.title.unique().tolist()))
    if args.title!='' and args.dest!='':
        if args.title not in df.title.tolist():
            print("Error: book title is not in file")
            sys.exit()

        else:
            text_file =  open(args.dest,'w') 
            text_file.write('\n\n\n'.join(df[df['title']=='No Filter (Sarah Frier)']['note'].tolist()))
            text_file.close()
                