<a href="https://colab.research.google.com/github/mannmoshe/text-recognition/blob/main/text_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# based on:
# https://deeplearningcourses.com/c/data-science-natural-language-processing-in-python 
# https://www.udemy.com/data-science-natural-language-processing-in-python Section 7 Cipher Decryption

# Author: Moshe Mann

import numpy as np
np.seterr(divide = 'ignore') 

import string
import random
import requests

In [4]:
for l in range(1488, 1515):
  print(l - 1488, chr(l))

0 א
1 ב
2 ג
3 ד
4 ה
5 ו
6 ז
7 ח
8 ט
9 י
10 ך
11 כ
12 ל
13 ם
14 מ
15 ן
16 נ
17 ס
18 ע
19 ף
20 פ
21 ץ
22 צ
23 ק
24 ר
25 ש
26 ת


In [5]:
### the language model

# initialize Markov matrix
# MM: matrix for the bi-gram probabilites
M = np.ones((27, 27))

# initial state distribution
# MM: vector for the uni-gram probabilites
# MM: uni-gram probabilites calculated only by first letters of the words
pi = np.zeros(27)

# a function to update the Markov matrix
def update_transition(ch1, ch2):
  # ord('א') = 1488, ord('ב') = 1489, ...
  i = ord(ch1) - 1488 # 'א' will mapped to 0, 'ב' will mapped to 1 and so on
  j = ord(ch2) - 1488
  M[i,j] += 1

# a function to update the initial state distribution
def update_pi(ch):
  i = ord(ch) - 1488
  pi[i] += 1

# get the log-probability of a word / token
def get_word_prob(word):
  # print("word:", word)
  i = ord(word[0]) - 1488
  logp = np.log(pi[i])

  for ch in word[1:]:
    j = ord(ch) - 1488
    logp += np.log(M[i, j]) # update prob
    i = j # update i to j

  return logp

# get the probability of a sequence of words
def get_sequence_prob(words):
  # if input is a string, split into an array of tokens
  if type(words) == str:
    words = words.split()

  logp = 0
  for word in words:
    logp += get_word_prob(word)
  return logp

In [6]:
# with open('torah_text.txt', encoding='ISO-8859-8') as f:
#     torah_text = f.read()
req = requests.get("https://raw.githubusercontent.com/mannmoshe/text-recognition/main/torah_text.txt")
req.encoding = 'ISO-8859-8'
torah_text = req.text

In [7]:
torah_text[:50]

'  בראשית ברא אלהים את השמים ואת הארץ והארץ היתה תה'

מספר הפעמים שהאות א מופיעה שלא בסוף המילה

In [8]:
torah_text.count('א') - torah_text.count('א ')

23502

In [9]:
torah_text.count('אב')

972

In [10]:
tokens = torah_text.split()

for token in tokens:
  # update the model

  # first letter
  ch0 = token[0]
  update_pi(ch0)

  # other letters
  for ch1 in token[1:]:
    update_transition(ch0, ch1)
    ch0 = ch1

pi_initial_count = pi.copy()
M_initial_count = M.copy()

# normalize the probabilities
pi /= pi.sum()
M /= M.sum(axis=1, keepdims=True)

first letter distribution:

In [9]:
pi_initial_count

array([13177.,  6162.,   532.,   549.,  8724., 13776.,   634.,  1073.,
         251.,  5585.,     0.,  3434.,  6859.,     0.,  5383.,     0.,
        1442.,   291.,  3983.,     0.,  1022.,     0.,   515.,   793.,
         857.,  2736.,  2133.])

The probabilties vector of first letters in word:

In [10]:
pi

array([0.16489595, 0.07711079, 0.00665741, 0.00687014, 0.10917145,
       0.17239179, 0.00793383, 0.01342744, 0.00314099, 0.06989025,
       0.        , 0.04297281, 0.08583299, 0.        , 0.06736244,
       0.        , 0.01804508, 0.00364155, 0.04984295, 0.        ,
       0.01278923, 0.        , 0.00644467, 0.00992354, 0.01072443,
       0.03423809, 0.0266922 ])

In [11]:
pi.sum()

1.0

In [12]:
M_initial_count[0]

array([2.000e+00, 9.730e+02, 1.600e+01, 5.360e+02, 9.610e+02, 1.111e+03,
       1.030e+02, 1.098e+03, 4.000e+00, 1.073e+03, 1.010e+02, 4.900e+02,
       4.090e+03, 3.880e+02, 1.688e+03, 1.060e+02, 5.920e+02, 1.140e+02,
       5.500e+01, 5.300e+01, 1.450e+02, 2.000e+00, 5.600e+01, 3.200e+01,
       1.460e+03, 2.737e+03, 5.543e+03])

In [13]:
M_initial_count[0].sum()

23529.0

The probabilties matrix of bi-gram where the first letter is: א

In [14]:
M[0]

array([8.50014875e-05, 4.13532237e-02, 6.80011900e-04, 2.27803987e-02,
       4.08432148e-02, 4.72183263e-02, 4.37757661e-03, 4.66658167e-02,
       1.70002975e-04, 4.56032981e-02, 4.29257512e-03, 2.08253644e-02,
       1.73828042e-01, 1.64902886e-02, 7.17412555e-02, 4.50507884e-03,
       2.51604403e-02, 4.84508479e-03, 2.33754091e-03, 2.25253942e-03,
       6.16260785e-03, 8.50014875e-05, 2.38004165e-03, 1.36002380e-03,
       6.20510859e-02, 1.16324536e-01, 2.35581623e-01])

In [15]:
M.sum(axis=1)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [16]:
get_sequence_prob('וידבר יהוה אל משה לאמר')

-38.291733346990895

In [17]:
letters = [chr(l) for l in range(1488, 1515)]

random_texts = []
random_scores = []
for i in range(1000):
  random_text = ''.join(random.choice(letters) for i in range(5)) + ' ' \
                 + ''.join(random.choice(letters) for i in range(4)) + ' ' \
                 + ''.join(random.choice(letters) for i in range(2)) + ' ' \
                 + ''.join(random.choice(letters) for i in range(3)) + ' ' \
                 + ''.join(random.choice(letters) for i in range(4)) 
  #print (random_text)
  random_scores.append(get_sequence_prob(random_text))
  random_texts.append(random_text)

print(len(random_scores))
print(random_texts[:10])
print(random_scores[:10])
print('average_score:', sum(random_scores) / len(random_scores))
print("\n")
fixed_random_scores = [value for value in random_scores if value != float('-inf')]
print(len(fixed_random_scores))
#print(fixed_random_scores)
print('average_score:', sum(fixed_random_scores) / len(fixed_random_scores))

1000
['צנבשא צההל בג ףמו אחיר', 'מלשרן גידצ ןכ תזכ ןהיא', 'ףובלך וחפנ עך תכך חרסע', 'צוזבו סץקן שפ ףצר נאחנ', 'רפאטק אסבג מק ןכר ףסדכ', 'שןגפם פשךפ פש פעח שפזף', 'קגוןף סזיג ךא םתנ ץחףמ', 'טיצגו סבקף הש יצל ינחץ', 'קףךךש בדהס נר רזז כצכם', 'םעירח עץיה מן חכח וגשר']
[-inf, -inf, -inf, -inf, -inf, -82.1269705208731, -inf, -68.97807087590743, -76.48659236601486, -inf]
average_score: -inf


365
average_score: -74.04106404421832


get_sequence_prob('וידבר יהוה אל משה לאמר') is bigger than average score of a sentence with a same structure.

In [18]:
get_sequence_prob('ואהבתם את הגר')

-27.056798620938007

In [19]:
get_sequence_prob('ואהבת לרעך כמוך')

-42.81038222930017

In [20]:
get_sequence_prob('וילך משה ויאמר אל עמו הנה פרעה משלח אתכם ואת טפכם')

-101.2397427430866

In [21]:
letters = [chr(l) for l in range(1488, 1515)]

random_texts = []
random_scores = []
for i in range(1000):
  random_text = ''.join(random.choice(letters) for i in range(4)) + ' ' \
                 + ''.join(random.choice(letters) for i in range(3)) + ' ' \
                 + ''.join(random.choice(letters) for i in range(5)) + ' ' \
                 + ''.join(random.choice(letters) for i in range(2)) + ' ' \
                 + ''.join(random.choice(letters) for i in range(3)) + ' ' \
                 + ''.join(random.choice(letters) for i in range(3)) + ' ' \
                 + ''.join(random.choice(letters) for i in range(4)) + ' ' \
                 + ''.join(random.choice(letters) for i in range(4)) + ' ' \
                 + ''.join(random.choice(letters) for i in range(4)) + ' ' \
                 + ''.join(random.choice(letters) for i in range(3)) + ' ' \
                 + ''.join(random.choice(letters) for i in range(4)) 
  #print (random_text)
  random_scores.append(get_sequence_prob(random_text))
  random_texts.append(random_text)

print(len(random_scores))
print(random_texts[:10])
print(random_scores[:10])
print('average_score:', sum(random_scores) / len(random_scores))
print("\n")
fixed_random_scores = [value for value in random_scores if value != float('-inf')]
print(len(fixed_random_scores))
#print(fixed_random_scores)
print('average_score:', sum(fixed_random_scores) / len(fixed_random_scores))

1000
['ךףונ לזס לצףיט מק ץסז חאא ףוםך תהסב סנבך וףץ אךוף', 'אניע ךחי קמחאש בצ יכל תסט לךנפ רשחה נגסג ףמר גכתצ', 'אסלע נטז ףםכקט ות זםא מטק תגוח זעמש תטסד בזר אכעס', 'סמףך אשף בףגץן לח סץב מדנ סנזל ןצמט עכחק אףז ישבנ', 'גגעט ץדי וכויס סצ הךם כעק חםםע דאץף בםצק לדד יאלו', 'םגםר ייכ תצרןך גג ותנ אךה ץבפז תלשט סךןמ קסם נכתת', 'אץרד םשה בשדיפ כם ךזצ כקל חבםף סןאר ככרר ירס לתקע', 'ביןף ודצ רדגבי סן יםת גתכ לוזף גגצק תשןר ןעף יטזפ', 'מאעי הות ןהלטת נת ואס נזף חעגך פנזא טחתפ הלג לםטכ', 'סץשף נצש בךפדב נש פמק תףמ ןרזץ ץאפט צקףע עעק מאיד']
[-inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf]
average_score: -inf


99
average_score: -159.80117064042562


In [22]:
torah_text[:50]

'  בראשית ברא אלהים את השמים ואת הארץ והארץ היתה תה'

In [11]:
torah_text_no_spaces = torah_text.strip().replace(" ", "").replace("  ", "")

In [24]:
print(torah_text_no_spaces[:50]+'\n')
print(torah_text_no_spaces[-50:]+'\n')
print(len(torah_text_no_spaces))

בראשיתבראאלהיםאתהשמיםואתהארץוהארץהיתהתהוובהווחשךעל

רצוולכלהידהחזקהולכלהמוראהגדולאשרעשהמשהלעיניכלישראל

304600


Check if spaces affect the probability:

In [25]:
get_sequence_prob('אבגד הוז חטיכ')

-33.55358404834101

In [27]:
get_sequence_prob('אבג דהו זחטיכ')

-38.653909358008555

In [12]:
torah_skips_2_s0 = ''
for i in range(0, len(torah_text_no_spaces), 2):
  torah_skips_2_s0 += torah_text_no_spaces[i]

In [13]:
torah_skips_2_s1 = ''
for i in range(1, len(torah_text_no_spaces), 2):
  torah_skips_2_s1 += torah_text_no_spaces[i]

In [None]:
torah_skips_2_s0[:100]

'באיבאליאהמםאהרואץיההוהושעפיהםרחלימחתלנהיוארליייווהאריאליאהוכטבידאהםיהוויהשוקאלילויםלשקאיהיירוהברואדי'

In [None]:
torah_skips_2_s1[:100]

'רשתראהםתשיותאץהרהתתובוחךלנתווואהםרפעפימםימאהםהארייווראהםתאריוובלליבןארבןחךיראהםארווחךרללוהעבייקיםחוא'

Trying recognize true text 20,15,10 characters long in skips of 2.

In [None]:
words_probs_20 = {}

for i in range(len(torah_skips_2_s0) - 20):
  word = torah_skips_2_s0[i:i + 20]
  if get_sequence_prob(word) != float('-inf'):
    words_probs_20[word] = [get_sequence_prob(word), 'torah_skips_2_s0', i*2, i*2+20]

words_probs_20 = dict(sorted(words_probs_20.items(), key=lambda item: item[1], reverse=True))
print(len(words_probs_20))

140913


In [None]:
for i in range(len(torah_skips_2_s1) - 20):
  word = torah_skips_2_s1[i:i + 20]
  if get_sequence_prob(word) != float('-inf'):
    words_probs_20[word] = [get_sequence_prob(word), 'torah_skips_2_s1', i*2+1, i*2+21]

words_probs_20 = dict(sorted(words_probs_20.items(), key=lambda item: item[1], reverse=True))
words_probs_20

{'ליהמויההויהיהאלויהיא': [-44.08113883474636,
  'torah_skips_2_s0',
  157446,
  157466],
 'המויההויהיהאלויהיארא': [-44.60101107602181,
  'torah_skips_2_s0',
  157450,
  157470],
 'יהמויההויהיהאלויהיאר': [-45.02072835211838,
  'torah_skips_2_s0',
  157448,
  157468],
 'ליאתויוהולקדבישאליאת': [-45.41074255019094,
  'torah_skips_2_s0',
  188204,
  188224],
 'אהויותתיויעלארבאלניו': [-46.75835038580793,
  'torah_skips_2_s0',
  119716,
  119736],
 'הדליהמויההויהיהאלויה': [-46.88343005717402,
  'torah_skips_2_s0',
  157442,
  157462],
 'אתויוהולקדבישאליאתמן': [-46.95681901063987,
  'torah_skips_2_s0',
  188208,
  188228],
 'יאתויוהולקדבישאליאתמ': [-47.29372863386202,
  'torah_skips_2_s0',
  188206,
  188226],
 'דליהמויההויהיהאלויהי': [-47.31267809831628,
  'torah_skips_2_s0',
  157444,
  157464],
 'אתעואתובתעשהובואתשרו': [-47.39464908050742,
  'torah_skips_2_s1',
  260211,
  260231],
 'אשחליאתויוהולקדבישאל': [-47.403494586730666,
  'torah_skips_2_s0',
  188198,
  188218],
 'ויהעואלארהלכימויהא

In [None]:
print(len(words_probs_20))

280906


In [None]:
words_probs_15 = {}

for i in range(len(torah_skips_2_s0) - 15):
  word = torah_skips_2_s0[i:i + 15]
  if get_sequence_prob(word) != float('-inf'):
    words_probs_15[word] = [get_sequence_prob(word), 'torah_skips_2_s0', i*2, i*2+15]

words_probs_15 = dict(sorted(words_probs_15.items(), key=lambda item: item[1], reverse=True))
print(len(words_probs_15))

140257


In [None]:
for i in range(len(torah_skips_2_s1) - 15):
  word = torah_skips_2_s1[i:i + 15]
  if get_sequence_prob(word) != float('-inf'):
    words_probs_15[word] = [get_sequence_prob(word), 'torah_skips_2_s1', i*2+1, i*2+16]

words_probs_15 = dict(sorted(words_probs_15.items(), key=lambda item: item[1], reverse=True))
words_probs_15

{'ויההויהיהאלויהי': [-31.04310673601242, 'torah_skips_2_s0', 157454, 157469],
 'הויהיהאלויהיארא': [-31.36510829645146, 'torah_skips_2_s0', 157460, 157475],
 'מויההויהיהאלויה': [-32.72746904447306, 'torah_skips_2_s0', 157452, 157467],
 'ליהמויההויהיהאל': [-32.99753544232238, 'torah_skips_2_s0', 157446, 157461],
 'המויההויהיהאלוי': [-33.13815426590379, 'torah_skips_2_s0', 157450, 157465],
 'יההויהיהאלויהיא': [-33.21591068383969, 'torah_skips_2_s0', 157456, 157471],
 'אתויוהולקדבישאל': [-33.3460027137177, 'torah_skips_2_s0', 188208, 188223],
 'ליאתויוהולקדביש': [-33.42425819402061, 'torah_skips_2_s0', 188204, 188219],
 'ההויהיהאלויהיאר': [-33.66457267992939, 'torah_skips_2_s0', 157458, 157473],
 'יהמויההויהיהאלו': [-33.801869556974765, 'torah_skips_2_s0', 157448, 157463],
 'ויותתיויעלארבאל': [-34.15652554227262, 'torah_skips_2_s0', 119720, 119735],
 'יואשרוהיאשושהוי': [-34.167634771120234, 'torah_skips_2_s1', 109211, 109226],
 'אתויאשאשהישביות': [-34.30320275468704, 'torah_skips_2_s0', 58

In [None]:
words_probs_10 = {}

for i in range(len(torah_skips_2_s0) - 10):
  word = torah_skips_2_s0[i:i + 10]
  if get_sequence_prob(word) != float('-inf'):
    words_probs_10[word] = [get_sequence_prob(word), 'torah_skips_2_s0', i*2, i*2+10]

words_probs_10 = dict(sorted(words_probs_10.items(), key=lambda item: item[1], reverse=True))
print(len(words_probs_10))

138039


In [None]:
for i in range(len(torah_skips_2_s1) - 10):
  word = torah_skips_2_s1[i:i + 10]
  if get_sequence_prob(word) != float('-inf'):
    words_probs_10[word] = [get_sequence_prob(word), 'torah_skips_2_s1', i*2+1, i*2+11]

words_probs_10 = dict(sorted(words_probs_10.items(), key=lambda item: item[1], reverse=True))
words_probs_10

{'ויהיהאלויה': [-19.61682541097137, 'torah_skips_2_s0', 157462, 157472],
 'הויהיהאלוי': [-19.90225148633344, 'torah_skips_2_s0', 157460, 157470],
 'ויהוהמלהוא': [-20.482698322500163, 'torah_skips_2_s1', 173195, 173205],
 'יהיהאלויהי': [-20.80136822611412, 'torah_skips_2_s0', 157464, 157474],
 'היויאלהיוי': [-20.831798218967442, 'torah_skips_2_s0', 31956, 31966],
 'להואהאשרים': [-21.04272439102881, 'torah_skips_2_s1', 250857, 250867],
 'ויאתוישיני': [-21.080834132428514, 'torah_skips_2_s1', 23129, 23139],
 'ויההויהיהא': [-21.147195023425287, 'torah_skips_2_s0', 157454, 157464],
 'אברביוהוים': [-21.220433166169137, 'torah_skips_2_s1', 161651, 161661],
 'ליאתויוהול': [-21.275324576872816, 'torah_skips_2_s0', 188204, 188214],
 'אלאתותבנהו': [-21.277120174698, 'torah_skips_2_s1', 180109, 180119],
 'ויויהחתואש': [-21.279383193642765, 'torah_skips_2_s1', 111901, 111911],
 'וימהוהותים': [-21.362314943029144, 'torah_skips_2_s0', 86908, 86918],
 'אתויוהולקד': [-21.381304653172325, 'torah_skips_2

In [None]:
print(len(words_probs_10))

271983


In [20]:
import pandas as pd

In [None]:
df_10 = pd.DataFrame.from_dict(words_probs_10, orient='index').reset_index()

In [None]:
df_10.head()

Unnamed: 0,index,0,1,2,3
0,ויהיהאלויה,-19.616825,torah_skips_2_s0,157462,157472
1,הויהיהאלוי,-19.902251,torah_skips_2_s0,157460,157470
2,ויהוהמלהוא,-20.482698,torah_skips_2_s1,173195,173205
3,יהיהאלויהי,-20.801368,torah_skips_2_s0,157464,157474
4,היויאלהיוי,-20.831798,torah_skips_2_s0,31956,31966


In [None]:
df_10.columns = ['string', 'log_prob', 'skip_start_from', 'i_start', 'i_end']
df_10['skip_start_from'] = df_10['skip_start_from'].map({'torah_skips_2_s0': 's0', 'torah_skips_2_s1': 's1'})
df_10

Unnamed: 0,string,log_prob,skip_start_from,i_start,i_end
0,ויהיהאלויה,-19.616825,s0,157462,157472
1,הויהיהאלוי,-19.902251,s0,157460,157470
2,ויהוהמלהוא,-20.482698,s1,173195,173205
3,יהיהאלויהי,-20.801368,s0,157464,157474
4,היויאלהיוי,-20.831798,s0,31956,31966
...,...,...,...,...,...
271978,אאצכעוחאאי,-56.376110,s0,69652,69662
271979,חאצשעעיעגת,-56.647489,s1,48343,48353
271980,אאהזחאגכמם,-57.750187,s1,174363,174373
271981,אאאץעראאץל,-59.977906,s0,241914,241924


In [None]:
from google.colab import files

df_10.to_csv('df_10.csv', index=False ,encoding = 'utf-8-sig')
files.download('df_10.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [17]:
seq_probs_4_3_4 = {}

for i in range(len(torah_skips_2_s0) - 11):
  seq = torah_skips_2_s0[i:i + 4] + ' ' + torah_skips_2_s0[i + 4:i + 7] + ' ' + torah_skips_2_s0[i + 7:i + 11]
  if get_sequence_prob(seq) != float('-inf'):
    seq_probs_4_3_4[seq] = [get_sequence_prob(seq), 'torah_skips_2_s0', i*2, i*2+11]

seq_probs_4_3_4 = dict(sorted(seq_probs_4_3_4.items(), key=lambda item: item[1], reverse=True))
print(len(seq_probs_4_3_4))
#seq_probs_4_3_4

121221


In [18]:
for i in range(len(torah_skips_2_s1) - 11):
  seq = torah_skips_2_s1[i:i + 4] + ' ' + torah_skips_2_s1[i + 4:i + 7] + ' ' + torah_skips_2_s1[i + 7:i + 11]
  if get_sequence_prob(seq) != float('-inf'):
    seq_probs_4_3_4[seq] = [get_sequence_prob(seq), 'torah_skips_2_s0', i*2+1, i*2+12]

seq_probs_4_3_4 = dict(sorted(seq_probs_4_3_4.items(), key=lambda item: item[1], reverse=True))
print(len(seq_probs_4_3_4))
seq_probs_4_3_4

239711


{'ויהי האל ויהי': [-21.009082205986463, 'torah_skips_2_s0', 157462, 157473],
 'ליהמ ויה הויה': [-21.956500510014834, 'torah_skips_2_s0', 157446, 157457],
 'אתוי והו לקדב': [-22.240718945409675, 'torah_skips_2_s0', 188208, 188219],
 'והוי ויא אראר': [-22.684698556895086, 'torah_skips_2_s0', 27833, 27844],
 'ויהי ארא אליב': [-22.826855715617818, 'torah_skips_2_s0', 157476, 157487],
 'ליהי ויה להיא': [-22.917980219118903, 'torah_skips_2_s0', 31544, 31555],
 'אויה והמ להוא': [-22.972284814484585, 'torah_skips_2_s0', 173193, 173204],
 'מויה הוי היהא': [-23.114323712235723, 'torah_skips_2_s0', 157452, 157463],
 'אתעי ואי ואתי': [-23.18974602551204, 'torah_skips_2_s0', 294085, 294096],
 'הויה יהא לויה': [-23.20476626849819, 'torah_skips_2_s0', 157460, 157471],
 'ויהו המל הואש': [-23.224121572471184, 'torah_skips_2_s0', 173195, 173206],
 'ואשר והי אשוש': [-23.31871597444792, 'torah_skips_2_s0', 109213, 109224],
 'היוי אלה יויא': [-23.345948651659718, 'torah_skips_2_s0', 31956, 31967],
 'היני א

In [21]:
df_4_3_4 = pd.DataFrame.from_dict(seq_probs_4_3_4, orient='index').reset_index()

In [22]:
df_4_3_4.columns = ['string', 'log_prob', 'skip_start_from', 'i_start', 'i_end']
df_4_3_4['skip_start_from'] = df_4_3_4['skip_start_from'].map({'torah_skips_2_s0': 's0', 'torah_skips_2_s1': 's1'})
df_4_3_4

Unnamed: 0,string,log_prob,skip_start_from,i_start,i_end
0,ויהי האל ויהי,-21.009082,s0,157462,157473
1,ליהמ ויה הויה,-21.956501,s0,157446,157457
2,אתוי והו לקדב,-22.240719,s0,188208,188219
3,והוי ויא אראר,-22.684699,s0,27833,27844
4,ויהי ארא אליב,-22.826856,s0,157476,157487
...,...,...,...,...,...
239706,אארל מכת אאאץ,-59.632999,s0,163027,163038
239707,דאפם שען פעחא,-59.826772,s0,129291,129302
239708,ללשץ שץל אץאא,-59.863689,s0,157172,157183
239709,רלטב דיי קאאץ,-60.007558,s0,303832,303843


In [23]:
from google.colab import files

df_4_3_4.to_csv('df_4_3_4.csv', index=False ,encoding = 'utf-8-sig')
files.download('df_4_3_4.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>