In [1]:
import pandas as pd
import numpy as np
import os
import sys
import re
import datetime

sys.path.append('../python_scripts/')
sys.path.append('../ML')
from update_lipstick import *

# Read random entry from head 10 in lipstick

In [None]:
lipstick_path = '/Users/pabloherrero/Documents/ManHatTan/LIPSTICK/Die_Verwandlung.lip'

In [26]:
def set_question(lipstick_path : str, size_head : int = 10):
    """Read lipstick head (least practiced words) and select a random question and translation
        size_head : number of options to shuffle from"""
    lips_head = pd.read_csv(lipstick_path, nrows = size_head)

    rndi = np.random.randint(0, size_head)
    qentry = lips_head.iloc[rndi]
    question, answer = qentry.lexeme_id, qentry.word_id
    return question, answer

def rnd_options(lipstick_path : str, n_options : int = 3, size_head : int = 0):
    """Pick at random n_options to set as false answers from lipstick head 
        (full if size_head == 0)
        Return dict options {'word' : False}"""
    if size_head == 0:
        lips_head = pd.read_csv(lipstick_path)
        size_head = len(lips_head)
    else:
        lips_head = pd.read_csv(lipstick_path, nrows = size_head)
        
    options = {}
    for i in range(n_options):
        rndi = np.random.randint(0, size_head)
        rndOp = lips_head.iloc[rndi].word_id
        options[rndOp] = False
    return options

In [77]:
def shuffle_dic(opts : dict):
    """Shuffle option dictionary to diplay in grid"""
    from random import shuffle
    from collections import OrderedDict 
    b = list(opts.items())
    shuffle(b)
    shufOpt = OrderedDict(b)
    return  shufOpt

In [33]:
lipstick.set_index('lexeme_id').loc[question]

p_recall                                            0
timestamp                                  1584895152
delta                                               0
user_id                                         pablo
learning_language                                  de
ui_language                                        en
word_id                                         crash
lexeme_string        lernt/lernen<vblex><pri><p3><sg>
history_seen                                        0
history_correct                                     0
session_seen                                        0
session_correct                                     0
p_pred                                         0.9999
Name: Krach, dtype: object

In [95]:
qu, answ = set_question(lipstick_path)
qu, answ

opts = rnd_options(lipstick_path)
opts[answ] = True
opts

opts = shuffle_dic(opts)
opts

OrderedDict([('Dusk', False),
             ('inconvenience', False),
             ('armpits', False),
             ('approved', True)])

# Update things

In [144]:
lipstick = pd.read_csv(lipstick_path)
lipstick

qu, answ = set_question(lipstick_path)
print(qu)
ind = lipstick.index

zitterte


In [147]:
lipstick.set_index('lexeme_id', inplace=True, drop=False)
lipstick.loc[qu]

p_recall                                            1
timestamp                                  1585754167
delta                                          859015
user_id                                         pablo
learning_language                                  de
ui_language                                        en
lexeme_id                                    zitterte
word_id                                      trembled
lexeme_string        lernt/lernen<vblex><pri><p3><sg>
history_seen                                        1
history_correct                                     1
session_seen                                        0
session_correct                                     0
p_pred                                     0.00194083
Name: zitterte, dtype: object

In [133]:
def update_performance(lipstick : pd.DataFrame, iw : str, perf : float):
    """Update times the entry iw was practice and the performance"""
    lipstick.loc[iw, 'history_seen'] += 1
    lipstick.loc[iw, 'history_correct']+= perf
    lipstick.loc[iw, 'p_recall'] = lipstick.loc[iw, 'history_seen'] / lipstick.loc[iw, 'history_correct']
    return lipstick

def update_timedelta(lipstick : pd.DataFrame, iw : str):
    """Update last practice timestamp and timedelta"""
    today = int(datetime.datetime.timestamp(datetime.datetime.today()))
    lipstick.loc[iw, 'timestamp'] = today
    lipstick.delta = lipstick.timestamp - lipstick.timestamp.min()
    return lipstick

In [148]:
newLips = update_performance(lipstick, question[0], perf=1)
newLips

Unnamed: 0_level_0,p_recall,timestamp,delta,user_id,learning_language,ui_language,lexeme_id,word_id,lexeme_string,history_seen,history_correct,session_seen,session_correct,p_pred
lexeme_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
billigte,0.000000,1584895152,0,pablo,de,en,billigte,approved,lernt/lernen<vblex><pri><p3><sg>,0,0.0,0,0,0.999900
Krach,0.000000,1584895152,0,pablo,de,en,Krach,crash,lernt/lernen<vblex><pri><p3><sg>,0,0.0,0,0,0.999900
zitterte,1.000000,1585754167,859015,pablo,de,en,zitterte,trembled,lernt/lernen<vblex><pri><p3><sg>,1,1.0,0,0,0.001941
Hausknecht,1.000000,1584895152,0,pablo,de,en,Hausknecht,servant,lernt/lernen<vblex><pri><p3><sg>,2,2.0,1,1,0.999900
Abenddämmerung,1.000000,1586377871,1482719,pablo,de,en,Abenddämmerung,Dusk,lernt/lernen<vblex><pri><p3><sg>,2,2.0,0,0,0.000100
allmählich,1.000000,1585818771,923619,pablo,de,en,allmählich,gradually,lernt/lernen<vblex><pri><p3><sg>,1,1.0,0,0,0.001213
Faust,1.000000,1584895152,0,pablo,de,en,Faust,Faust,lernt/lernen<vblex><pri><p3><sg>,2,2.0,2,2,0.999900
unweigerlich,1.000000,1584895152,0,pablo,de,en,unweigerlich,inevitably,lernt/lernen<vblex><pri><p3><sg>,1,1.0,1,1,0.999900
Sehkraft,1.000000,1584895152,0,pablo,de,en,Sehkraft,eyesight,lernt/lernen<vblex><pri><p3><sg>,2,2.0,2,2,0.999900
anstarrten,1.000000,1586349721,1454569,pablo,de,en,anstarrten,staring,lernt/lernen<vblex><pri><p3><sg>,2,2.0,0,0,0.000100


# Recover word_id column

In [267]:
lipstick_path = '/Users/pabloherrero/Documents/ManHatTan/LIPSTICK/Die_Verwandlung.lip'
recovery_lipstick_path = '/Users/pabloherrero/Documents/ManHatTan/GOTAs/Die_Verwandlung.agot'

lipstick = pd.read_csv(lipstick_path)
reclipstick = pd.read_csv(recovery_lipstick_path)
reclipstick

Unnamed: 0.1,Unnamed: 0,de,en,creation_time,seen_hist,right_hist
0,0,Ungeziefer,vermin,2020-03-22 17:39:12.011106,0,0.0
1,1,Versteifungen,stiffeners,2020-03-22 17:39:12.011106,2,2.0
2,2,Umfang,scope,2020-03-22 17:39:12.011106,3,1.0
3,3,flimmerten,flickered,2020-03-22 17:39:12.011106,0,0.0
4,4,versehen,isprovided,2020-03-22 17:39:12.011106,0,0.0
5,5,Fensterblech,Windowsheet,2020-03-22 17:39:12.011106,0,0.0
6,6,undurchführbar,impracticable,2020-03-22 17:39:12.011106,1,1.0
7,7,schaukelte,rocked,2020-03-22 17:39:12.011106,1,1.0
8,8,Jucken,itching,2020-03-22 17:39:12.011106,0,0.0
9,9,Pult,panel,2020-03-22 17:39:12.011106,0,0.0


In [268]:
lips2 = lipstick.set_index('lexeme_id', drop=False)
# lips2.loc['0.0olgedessen','lexeme_id'] = 'Infolgedessen'
# lips2 = lipstick.set_index('lexeme_id', drop=False)
lips2

Unnamed: 0_level_0,p_recall,timestamp,delta,user_id,learning_language,ui_language,lexeme_id,lexeme_string,history_seen,history_correct,session_seen,session_correct,p_pred
lexeme_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
billigte,0.000000,1584895152,0,pablo,de,en,billigte,lernt/lernen<vblex><pri><p3><sg>,0,0.0,0,0,0.999900
Krach,0.000000,1584895152,0,pablo,de,en,Krach,lernt/lernen<vblex><pri><p3><sg>,0,0.0,0,0,0.999900
zitterte,1.000000,1585754167,859015,pablo,de,en,zitterte,lernt/lernen<vblex><pri><p3><sg>,1,1.0,0,0,0.001941
Hausknecht,1.000000,1584895152,0,pablo,de,en,Hausknecht,lernt/lernen<vblex><pri><p3><sg>,1,1.0,1,1,0.999900
Abenddämmerung,1.000000,1586533953,1638801,pablo,de,en,Abenddämmerung,lernt/lernen<vblex><pri><p3><sg>,3,3.0,0,0,0.000100
allmählich,1.000000,1585818771,923619,pablo,de,en,allmählich,lernt/lernen<vblex><pri><p3><sg>,1,1.0,0,0,0.001213
Faust,1.000000,1586534073,1638921,pablo,de,en,Faust,lernt/lernen<vblex><pri><p3><sg>,3,3.0,2,2,0.999900
unweigerlich,1.000000,1584895152,0,pablo,de,en,unweigerlich,lernt/lernen<vblex><pri><p3><sg>,1,1.0,1,1,0.999900
Sehkraft,1.000000,1584895152,0,pablo,de,en,Sehkraft,lernt/lernen<vblex><pri><p3><sg>,2,2.0,2,2,0.999900
anstarrten,1.000000,1586349721,1454569,pablo,de,en,anstarrten,lernt/lernen<vblex><pri><p3><sg>,2,2.0,0,0,0.000100


In [269]:
lips2.loc['Zuversicht']

p_recall                                            1
timestamp                                  1585755974
delta                                          860822
user_id                                         pablo
learning_language                                  de
ui_language                                        en
lexeme_id                                  Zuversicht
lexeme_string        lernt/lernen<vblex><pri><p3><sg>
history_seen                                        1
history_correct                                     1
session_seen                                        0
session_correct                                     0
p_pred                                      0.0019155
Name: Zuversicht, dtype: object

In [270]:
len(lips2.index.values), len(lips2['lexeme_id'])

(103, 103)

In [271]:
reclipstick.set_index('de', drop=False, inplace=True)
reclipstick.index.rename('lexeme_id', inplace=True)
reclipstick

Unnamed: 0_level_0,Unnamed: 0,de,en,creation_time,seen_hist,right_hist
lexeme_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ungeziefer,0,Ungeziefer,vermin,2020-03-22 17:39:12.011106,0,0.0
Versteifungen,1,Versteifungen,stiffeners,2020-03-22 17:39:12.011106,2,2.0
Umfang,2,Umfang,scope,2020-03-22 17:39:12.011106,3,1.0
flimmerten,3,flimmerten,flickered,2020-03-22 17:39:12.011106,0,0.0
versehen,4,versehen,isprovided,2020-03-22 17:39:12.011106,0,0.0
Fensterblech,5,Fensterblech,Windowsheet,2020-03-22 17:39:12.011106,0,0.0
undurchführbar,6,undurchführbar,impracticable,2020-03-22 17:39:12.011106,1,1.0
schaukelte,7,schaukelte,rocked,2020-03-22 17:39:12.011106,1,1.0
Jucken,8,Jucken,itching,2020-03-22 17:39:12.011106,0,0.0
Pult,9,Pult,panel,2020-03-22 17:39:12.011106,0,0.0


In [272]:
reclipstick['en']['Ungeziefer']
word_id = []
for w in lips2.index:
#     print(reclipstick['en'][w])
#     print(w)
    try:
        word_id.append(reclipstick['en'][str(w)])
    except KeyError:
        word_id.append('As a result')
word_id

['approved',
 'crash',
 'trembled',
 'servant',
 'Dusk',
 'gradually',
 'Faust',
 'inevitably',
 'eyesight',
 'staring',
 'armpits',
 'affair',
 'plump',
 'betrayed',
 'itching',
 'cane',
 'peakhairstyle',
 'swayed',
 'failure',
 'taken',
 'sibilance',
 'ruffled',
 'inaccessible',
 'well',
 'scar',
 'rags',
 'lodgers',
 'stay',
 'overabundant',
 'rocked',
 'Heidengeld',
 'impracticable',
 'defend',
 'balls',
 'delicate',
 'refused',
 'confidence',
 'inconvenience',
 'Gosh',
 'actions',
 'lag',
 'final',
 'manifold',
 'grim',
 'froze',
 'Windowsheet',
 'faintingsimilar',
 'indifference',
 'indetail',
 'flickered',
 'sob',
 'encouragement',
 'sobs',
 'edges',
 'disengaged',
 'vermin',
 'As a result',
 'stiffeners',
 'efficient',
 'Kiefer',
 'ceiling',
 'touched',
 'resolve',
 'rationalarguments',
 'cheerfulness',
 'tired',
 'communicate',
 'gracious',
 'deserved',
 'work-shy',
 'harbinger',
 'strongly',
 'jack',
 'sleeves',
 'obstinacy',
 'proficient',
 'Despite',
 'so',
 'corrosive',
 '

In [273]:
len(word_id)
lips2['word_id'] = word_id

In [274]:
lips2 = lips2[['p_recall','timestamp','delta','user_id', 'learning_language','ui_language', 'word_id', 'lexeme_id', 'lexeme_string','history_seen','history_correct',	'session_seen','session_correct','p_pred']]
lips2

Unnamed: 0_level_0,p_recall,timestamp,delta,user_id,learning_language,ui_language,word_id,lexeme_id,lexeme_string,history_seen,history_correct,session_seen,session_correct,p_pred
lexeme_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
billigte,0.000000,1584895152,0,pablo,de,en,approved,billigte,lernt/lernen<vblex><pri><p3><sg>,0,0.0,0,0,0.999900
Krach,0.000000,1584895152,0,pablo,de,en,crash,Krach,lernt/lernen<vblex><pri><p3><sg>,0,0.0,0,0,0.999900
zitterte,1.000000,1585754167,859015,pablo,de,en,trembled,zitterte,lernt/lernen<vblex><pri><p3><sg>,1,1.0,0,0,0.001941
Hausknecht,1.000000,1584895152,0,pablo,de,en,servant,Hausknecht,lernt/lernen<vblex><pri><p3><sg>,1,1.0,1,1,0.999900
Abenddämmerung,1.000000,1586533953,1638801,pablo,de,en,Dusk,Abenddämmerung,lernt/lernen<vblex><pri><p3><sg>,3,3.0,0,0,0.000100
allmählich,1.000000,1585818771,923619,pablo,de,en,gradually,allmählich,lernt/lernen<vblex><pri><p3><sg>,1,1.0,0,0,0.001213
Faust,1.000000,1586534073,1638921,pablo,de,en,Faust,Faust,lernt/lernen<vblex><pri><p3><sg>,3,3.0,2,2,0.999900
unweigerlich,1.000000,1584895152,0,pablo,de,en,inevitably,unweigerlich,lernt/lernen<vblex><pri><p3><sg>,1,1.0,1,1,0.999900
Sehkraft,1.000000,1584895152,0,pablo,de,en,eyesight,Sehkraft,lernt/lernen<vblex><pri><p3><sg>,2,2.0,2,2,0.999900
anstarrten,1.000000,1586349721,1454569,pablo,de,en,staring,anstarrten,lernt/lernen<vblex><pri><p3><sg>,2,2.0,0,0,0.000100


In [275]:
lips2.to_csv(lipstick_path, index=False)

# Overall book processor

In [2]:
from rashib import *
from krahtos import *
from bulkTranslate import *
from init_lipstick import *

In [3]:
# sys.path.append('~/Documents/ManHatTan/python_scripts/')
#from test_bulkTranslate import *

# GUI select raw kindle/playbooks -> file
# basename = input()
# dest_lang = input()
# src_lang = input()
# filename = blabla

filename = '/Users/pabloherrero/Documents/ManHatTan/kindle_raw/Il castello dei destini incrociati - Bloc-notes.html'
dest_lang = 'es'
src_lang = 'it'
filepath, basename = os.path.split(filename)

In [4]:
if 'html' in basename:
    print('Amazon Kindle file detected')
    cder_path = krahtos_main(filename)
    
elif '.docx' in basename:
    print('Google Books file detected')
    cder_path = rashib_main(filename)

Amazon Kindle file detected
Loading file  /Users/pabloherrero/Documents/ManHatTan/kindle_raw/Il castello dei destini incrociati - Bloc-notes.html
Successfully extracted 149 highlighted entries
Created CADERA file /Users/pabloherrero/Documents/ManHatTan/CADERAs/Il_castello_dei_destini_incrociati.cder


In [6]:
assert '.cder' in cder_path, "Wrong CADERA extension"
gota_path = bulkTranslate_main(cder_path, dest_lang, src_lang)
assert '.got' in gota_path, "Wrong GOTA extension"
print('Initializing word bank...')
init_lipstick_main(gota_path)
print("Done! You can start practicing")

Entry with more than 3 words detected:  uno di quei sapienti che scrutando in alambicc
Entry with more than 3 words detected:  tentano di strappare alla natura i suoi segreti
Entry with more than 3 words detected:  Vuoi la ricchezza Denari o la forza Spade 
Entry with more than 3 words detected:  udimmo il suono delle trombe già le lance vol
Entry with more than 3 words detected:  LEremita qui rappresentato come un vecchio g
Entry with more than 3 words detected:  su nei campi pallidi della Luna dove uno ster
Entry with more than 3 words detected:  nelle vecchie fantasie dun mondo allincontra
Attempted translation of 128 entries. Check DB for mistranslations.
Created GOTA file /Users/pabloherrero/Documents/ManHatTan/GOTAs/Il_castello_dei_destini_incrociati.got
Initializing word bank...
Created LIPSTICK file /Users/pabloherrero/Documents/ManHatTan/LIPSTICK/Il_castello_dei_destini_incrociati.lip
Done! You can start practicing
