In [1]:
from corpustool.models import *

Удалим тексты-дубликаты из папки "best works"

In [4]:
len(Document.objects.filter(title__startswith='best_works'))

34

In [6]:
Document.objects.filter(title__startswith='best_works').delete()

(9674,
 {'corpustool.DepRel': 0,
  'corpustool.Occurence': 9640,
  'corpustool.Document': 34})

In [2]:
import os

In [3]:
import json

In [4]:
import time

In [5]:
import re

In [6]:
import functools

In [7]:
from django.db import DataError

Самописная кэш-функция, которая возвращает None, если набор аргументов есть в хэше, так как Model instances without primary key value are unhashable

In [8]:
def cached(func, *args, **kwargs):  # correct signature is not known
    if not hasattr(cached, "cache"):
        cached.cache = []
    
    def cache_clear():
        cached.cache = []

    def find_in_cache(*args, **kwargs):
        if (args, kwargs) in cached.cache:
            return None
        cached.cache.append((args, kwargs))
        return func(*args, **kwargs)
    
    find_in_cache.cache_clear = cache_clear
    find_in_cache.__name__ = func.__name__
    find_in_cache.__doc__ = func.__doc__
    find_in_cache.__module__ = func.__module__
    return find_in_cache

In [9]:
def parse_rus_date(s):
    s = s.split('.')
    if len(s) != 3 or len(s[0]) != 2 or len(s[1]) != 2 or len(s[2]) != 4:
        return None
    try:
        datetime.datetime(int(s[2]), int(s[1]), int(s[0]))
        return s[2]+'-'+s[1]+'-'+s[0]
    except:
        return None

In [10]:
def check_int(s):
    try:
        int(s)
        return True
    except:
        return False

In [11]:
parse_rus_date('29.03.2017')

In [12]:
import spacy

In [13]:
from stattool.settings import TEXT_FILE_PATH

In [14]:
nlp = spacy.load("en_core_web_sm")

In [15]:
def escapepath(fp):
    return fp.replace(os.sep, '%').replace('/', '%')

In [16]:
extension_split = lambda x: x[:x.rfind('.')]

In [17]:
@cached
def try_to_get(Model, *args, **kwargs):
    ''' 
    Returns Model instance with specified property if it exists,
    If it doesn't creates a new one
    
    Arguments:
    Model - a class inherited from django models.Model
    *args, **kwargs - unnamed and named properties of needed object
    '''
    #print(kwargs)
    try:
        obj = Model.objects.get(*args, **kwargs)
    except Model.DoesNotExist:
        obj = Model(*args, **kwargs)
        return obj

Сделать нормальную проверку всех полей метаданных (пола, даты, оценки) на валидность

In [18]:
class TextBaseFiller(object):
    def __init__(self, model=None, folder='.', recursive=True, include_metadata=False):
        self.parser = model
        
        self.include_metadata = include_metadata
        self.folder = folder
        self.text_filenames = []
        if recursive:
            for root, dirs, files in os.walk(self.folder):
                for f in files:
                    if f.endswith('.txt'):
                        self.text_filenames.append(os.path.join(root, f))
        else:
            self.text_filenames = [os.path.join(self.folder, f) for f in os.listdir(self.folder) if f.endswith('.txt')]
        self.current_text_id = 0
    
    
    def process_all(self, show_titles=False, show_time = False):
        st_time = time.time()
        while self.current_text_id < len(self.text_filenames):
            if show_titles:
                print(self.text_filenames[self.current_text_id])
            if show_time:
                print(time.time() - st_time)
            self.process_next()
    
    
    def process_next_n(self, n, show_titles=False, show_time = False):
        st_time = time.time()
        limit = min(self.current_text_id + n, len(self.text_filenames))
        while self.current_text_id < limit:
            if show_titles:
                print(self.text_filenames[self.current_text_id])
            if show_time:
                print(time.time() - st_time)
            self.process_next()
    
    
    def process_next(self):
        if self.current_text_id >= len(self.text_filenames):
            print('Specified folder is fully processed')
            return
        
        fn = self.text_filenames[self.current_text_id]
        
        title = escapepath(fn[len(self.folder):])
        self.textobj = Document(title=title)
        if self.include_metadata:
            self.process_metadata()
        self.textobj.save()
        with open(fn, 'r', encoding='utf-8') as f:
            parsed = self.parser(f.read().replace('\ufeff', ''))
        
        try_to_get.cache_clear()
        
        ##saving lemmas to db:
        try:
            lemmas = [try_to_get(Lemma, text=token.lemma_.lower(), pos=token.pos_.upper()) for token in parsed]
            Lemma.objects.bulk_create([lemma for lemma in lemmas if lemma is not None and lemma.pk is None])
        except:
            ## skipping files with any non-utf8 shit:
            self.current_text_id += 1
            return
        
        ##saving tokens to db:
        try:
            tokens = [try_to_get(Token, text=token.text.lower(), lemma = Lemma.objects.get(text=token.lemma_.lower(),
                                                                                  pos=token.pos_.upper())) for token in parsed]
            Token.objects.bulk_create([token for token in tokens if token is not None and token.pk is None])
        except:
            ## skipping files with any non-utf8 shit:
            self.current_text_id += 1
            return
        
        ##saving occurences to db:
        try:
            occurences = [Occurence(document=self.textobj,
                                    token=Token.objects.get(text=parsed[i].text.lower(),
                                                            lemma = Lemma.objects.get(text=parsed[i].lemma_.lower(),
                                                                                      pos=parsed[i].pos_.upper())),
                                    index=i) for i in range(len(parsed))]
            Occurence.objects.bulk_create(occurences)

            self.current_text_id += 1
        except:
           ## skipping files with any non-utf8 shit:
            self.current_text_id += 1
            return 
    
    def process_metadata(self):
        fn = extension_split(self.text_filenames[self.current_text_id]) + '.json'
        if os.path.exists(fn):
            with open(fn, 'r', encoding='utf-8') as inp:
                try:
                    meta = json.load(inp)
                except:
                    return
            if 'sex' in meta:
                if meta['sex'] in ('m','f'):
                    self.textobj.sex = meta['sex']
            if 'date' in meta:
                parsed_date = parse_rus_date(meta['date'])
                if parsed_date is not None:
                    self.textobj.date = parsed_date
            if 'mark' in meta:
                if check_int(meta['mark']):
                    self.textobj.mark = meta['mark']
            if 'study_year' in meta:
                if check_int(meta['study_year']):
                    self.textobj.study_year = meta['study_year']
            if 'department' in meta:
                if len(meta['department']) < 30:
                    self.textobj.department = meta['department']
            

In [19]:
new_path = os.path.join(TEXT_FILE_PATH, r'data/exam/exam2016')

In [20]:
base_filler = TextBaseFiller(model=nlp, folder=new_path+'/', include_metadata=True)

In [21]:
len(base_filler.text_filenames)

1364

In [22]:
base_filler.current_text_id

0

In [100]:
base_filler.current_text_id = 106

In [23]:
base_filler.process_all(show_titles=True)

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_100_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_100_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_101_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_101_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_105_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_105_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_108_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_108_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/ex

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_53_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_54_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_54_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_55_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_55_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_5_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_5_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/EKu_60_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam201

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_136_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_136_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_137_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_137_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_138_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_138_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_139_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_139_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/ex

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_201_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_201_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_202_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_202_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_203_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_203_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_204_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_204_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/ex

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_266_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_266_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_267_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_267_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_268_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_268_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_269_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_269_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/ex

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_59_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_5_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_5_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_60_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_60_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_61_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_61_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/JSl_62_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam201

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/MTsy_33_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/MTsy_33_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/MTsy_34_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/MTsy_34_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/MTsy_35_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/MTsy_35_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/MTsy_36_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/MTsy_36_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/ex

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_124_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_125_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_125_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_126_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_126_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_127_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_127_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_128_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_68_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_68_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_69_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_69_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_6_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_6_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_70_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_70_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/OR_71_

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/ZEv_44_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/ZEv_44_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/ZEv_45_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/ZEv_45_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/ZEv_46_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/ZEv_46_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/ZEv_47_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2016/ZEv_47_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2

Сделаем функцию для быстрого конвертирования папки в бд:

In [20]:
base_filler = None

In [21]:
def insert_to_db(folder):
    new_path = os.path.join(TEXT_FILE_PATH, folder)
    base_filler = TextBaseFiller(model=nlp, folder=new_path+'/', include_metadata=True)
    base_filler.process_all(show_titles=True)

In [22]:
insert_to_db(r'data/exam/exam2015')

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2015/APL_1_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2015/APL_1_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2015/APL_2_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2015/APL_2_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2015/APL_3_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2015/APL_3_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2015/APL_4_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2015/APL_4_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2015/APL_

In [23]:
insert_to_db(r'data/exam/exam2014')

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AAl_10_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AAl_10_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AAl_11_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AAl_11_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AAl_12_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AAl_12_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AAl_13_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AAl_13_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AAl_9_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AAl_9_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ADe_10_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ADe_10_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ADe_11_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ADe_11_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ADe_12_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ADe_12_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam201

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AMe_19_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AMe_1_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AMe_1_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AMe_20_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AMe_20_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AMe_21_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AMe_21_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/AMe_22_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam201

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ASt_23_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ASt_24_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ASt_24_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ASt_25_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ASt_25_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ASt_26_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ASt_26_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ASt_27_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DAr_17_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DAr_18_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DAr_18_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DAr_19_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DAr_19_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DAr_1_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DAr_1_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DAr_20_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam201

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_10_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_11_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_11_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_12_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_12_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_13_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_13_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_14_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_5_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_6_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_6_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_7_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_7_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_8_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_8_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_9_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/DZu_

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EEm_39_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EEm_3_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EEm_3_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EEm_40_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EEm_40_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EEm_4_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EEm_4_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EEm_5_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/E

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_35_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_36_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_36_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_37_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_37_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_38_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_38_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_39_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_68_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_69_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_69_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_6_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_6_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_70_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_70_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EPa_71_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam201

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_23_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_24_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_24_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_25_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_25_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_26_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_26_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_27_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/ex

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_56_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_56_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_57_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_57_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_58_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_58_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_59_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ESha_59_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/ex

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_24_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_25_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_25_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_26_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_26_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_27_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_27_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_28_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_58_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_58_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_59_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_59_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_5_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_5_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_60_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_60_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam201

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_90_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_90_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_91_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_91_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_92_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_92_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_93_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/EZa_93_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/LPo_10_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/LPo_10_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/LPo_11_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/LPo_11_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/LPo_12_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/LPo_12_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/LPo_13_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/LPo_13_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MBi_24_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MBi_24_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MBi_2_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MBi_2_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MBi_3_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MBi_3_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MBi_4.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MBi_4_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MBi_

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MGr_4_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MGr_4_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MGr_5_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MGr_5_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MGr_6_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MGr_6_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MGr_7_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MGr_7_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MGr_

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MTsy_24_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MTsy_25_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MTsy_25_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MTsy_26_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MTsy_26_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MTsy_27_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MTsy_27_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/MTsy_28_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/ex

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/TSha_15_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/TSha_16_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/TSha_16_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/TSha_17_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/TSha_17_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/TSha_18_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/TSha_18_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/TSha_19_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/ex

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VKo_28_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VKo_29_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VKo_29_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VKo_2_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VKo_2_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VKo_30_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VKo_30_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VKo_31_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam201

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VPe_26_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VPe_26_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VPe_27_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VPe_27_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VPe_28_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VPe_28_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VPe_29_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VPe_29_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VPe_9_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/VPe_9_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ZEv_10_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ZEv_10_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ZEv_11_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ZEv_11_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ZEv_12_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ZEv_12_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam201

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ZEv_8_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ZEv_8_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ZEv_9_1.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/ZEv_9_2.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00251.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00252.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00253.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00254.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\real

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00313.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00314.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00315.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00316.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00317.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00318.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00319.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00378.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00379.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00380.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00381.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00382.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00383.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00384.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00443.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00444.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00445.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00446.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00447.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00449.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00450.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_2\esl_00

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00511.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00512.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00513.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00514.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00515.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00516.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00517.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00576.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00577.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00578.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00579.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00580.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00581.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00582.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00641.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00642.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00643.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00644.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00645.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00646.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00647.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00706.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00707.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00708.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00709.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00710.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00711.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00712.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_3\esl_00

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00774.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00775.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00776.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00777.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00778.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00779.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00780.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00839.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00840.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00841.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00842.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00843.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00844.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00845.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00904.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00905.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00906.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00907.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00908.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00909.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00910.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00969.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00970.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00971.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00972.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00973.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00974.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00975.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_4\esl_00

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01034.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01035.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01036.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01037.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01038.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01039.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01040.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01099.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01100.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01101.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01102.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01103.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01104.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01105.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01164.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01165.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01166.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01167.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01168.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01169.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01170.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01229.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01230.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01231.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01232.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01233.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01234.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01235.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_5\esl_01

C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_6\esl_01294.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_6\esl_01295.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_6\esl_01296.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_6\esl_01297.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_6\esl_01298.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_6\esl_01299.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_6\esl_01300.txt
C:\HP_PC\hp_pc\Studies\3rd-year-thesis\Data\realec_dump_2019_2_10_16_7_20_6_41_0\data/exam/exam2014/2012-2014_6\esl_01

In [None]:
insert_to_db(r'data/exam/2012-2014')

Здесь всё сломалось из-за странной даты:

In [48]:
Document.objects.all().delete()
Token.objects.all().delete()
Lemma.objects.all().delete()

(15675, {'corpustool.Lemma': 15675})

In [44]:
base_filler.current_text_id

1448

In [1]:
import datetime

In [6]:
datetime.date.fromisoformat('2008-06-03').month

6

In [86]:
Lemma.objects.values()

<QuerySet [{'id': 3585, 'text': 'This', 'pos': 'DET'}, {'id': 3586, 'text': 'chart', 'pos': 'NOUN'}, {'id': 3587, 'text': 'show', 'pos': 'VERB'}, {'id': 3588, 'text': '-PRON-', 'pos': 'PRON'}, {'id': 3589, 'text': 'the', 'pos': 'DET'}, {'id': 3590, 'text': 'datum', 'pos': 'NOUN'}, {'id': 3591, 'text': 'about', 'pos': 'ADP'}, {'id': 3592, 'text': 'the', 'pos': 'DET'}, {'id': 3593, 'text': 'unemployment', 'pos': 'NOUN'}, {'id': 3594, 'text': 'in', 'pos': 'ADP'}, {'id': 3595, 'text': 'a', 'pos': 'DET'}, {'id': 3596, 'text': 'few', 'pos': 'ADJ'}, {'id': 3597, 'text': 'world', 'pos': 'NOUN'}, {'id': 3598, 'text': 'area', 'pos': 'NOUN'}, {'id': 3599, 'text': '.', 'pos': 'PUNCT'}, {'id': 3600, 'text': 'the', 'pos': 'DET'}, {'id': 3601, 'text': 'datum', 'pos': 'NOUN'}, {'id': 3602, 'text': 'be', 'pos': 'VERB'}, {'id': 3603, 'text': 'divide', 'pos': 'VERB'}, {'id': 3604, 'text': 'into', 'pos': 'ADP'}, '...(remaining elements truncated)...']>

In [91]:
d = Document(title='heh')

In [92]:
d.save()

In [96]:
a = Document.objects.get(title='heh')

In [97]:
b = Document.objects.get(title='heh')

In [98]:
a == b

True

In [103]:
hash(a)

141

In [104]:
hash(b)

141

In [106]:
hash('abc')

-1583079279397609165

In [107]:
help(lru_cache)

Help on function lru_cache in module functools:

lru_cache(maxsize=128, typed=False)
    Least-recently-used cache decorator.
    
    If *maxsize* is set to None, the LRU features are disabled and the cache
    can grow without bound.
    
    If *typed* is True, arguments of different types will be cached separately.
    For example, f(3.0) and f(3) will be treated as distinct calls with
    distinct results.
    
    Arguments to the cached function must be hashable.
    
    View the cache statistics named tuple (hits, misses, maxsize, currsize)
    with f.cache_info().  Clear the cache and statistics with f.cache_clear().
    Access the underlying function with f.__wrapped__.
    
    See:  http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used



In [114]:
{'a':1, 'b':2} in [{'a':1, 'b':2}]

True

In [201]:
len(list(Lemma.objects.values('text', 'pos')))

89

In [200]:
len(set(list((i['text'], i['pos']) for i in Lemma.objects.values('text', 'pos'))))

89

In [46]:
vals = Lemma.objects.values('text', 'pos')

In [47]:
for val in vals:
    try:
        Lemma.objects.get(**val)
        pass
    except:
        print(val, Lemma.objects.filter(**val))

{'text': '\u200b1995\u200b', 'pos': 'PROPN'} <QuerySet [<Lemma: Lemma object (26525)>, <Lemma: Lemma object (26544)>]>
{'text': '\u200b1995', 'pos': 'PROPN'} <QuerySet [<Lemma: Lemma object (26525)>, <Lemma: Lemma object (26544)>]>
{'text': 'tugen', 'pos': 'NOUN'} <QuerySet [<Lemma: Lemma object (30953)>, <Lemma: Lemma object (30954)>]>
{'text': 'tügen', 'pos': 'NOUN'} <QuerySet [<Lemma: Lemma object (30953)>, <Lemma: Lemma object (30954)>]>
{'text': 'gunnen', 'pos': 'NOUN'} <QuerySet [<Lemma: Lemma object (30956)>, <Lemma: Lemma object (30957)>]>
{'text': 'günnen', 'pos': 'NOUN'} <QuerySet [<Lemma: Lemma object (30956)>, <Lemma: Lemma object (30957)>]>
{'text': 'kunnen', 'pos': 'NOUN'} <QuerySet [<Lemma: Lemma object (30959)>, <Lemma: Lemma object (30960)>]>
{'text': 'künnen', 'pos': 'NOUN'} <QuerySet [<Lemma: Lemma object (30959)>, <Lemma: Lemma object (30960)>]>
{'text': 'turren', 'pos': 'NOUN'} <QuerySet [<Lemma: Lemma object (30968)>, <Lemma: Lemma object (30969)>]>
{'text': 'türr

Пример запроса - сколько словоупотреблений в текстах девушек

In [295]:
len(Occurence.objects.filter(document__sex='f'))

51749

Сколько словоупотреблений в текстах парней:

In [296]:
len(Occurence.objects.filter(document__sex='m'))

39698

Средняя длина текста у девушек:

In [299]:
len(Occurence.objects.filter(document__sex='f'))/len(Document.objects.filter(sex='f'))

265.37948717948717

У парней:

In [300]:
len(Occurence.objects.filter(document__sex='m'))/len(Document.objects.filter(sex='m'))

277.6083916083916

In [53]:
'\ufeffabsurdity'.strip()

'\ufeffabsurdity'

In [70]:
Occurences = Occurence.objects.filter(document = Document.objects.all()[3])

Попробуем восстановить текст, чтобы проверить что всё нормально:

In [72]:
for occ in Occurences:
    print(occ.token.text, end=' ')

  the course of time , the means of getting from one place to another   becom more and more sophisticated . two hundred years ago no one could even imagine that it would be possible to travel flying above the earth without any difficulty , but now it 's quite    to get on a plane and to fly to another city , country or even continent . so let 's imagine how the world of transport can improve in the future . 

 firstly , some people used to joke that the only way to solve the problem of traffic congestions in moscow is to    learn how to teleport or to fly . for me , the idea of teleportation sounds quite unreal ( but not    unreal ) , but flying from one place to another looks quite nice . as far as i know , some means of   flying have already been invented - for instance , there 're some rocket packs . they look like a quite large sack with small jet engines inside .    who uses this thing should put it on their back and , having switched it on , take off . the most advanced kind of s

In [73]:
Token.objects.filter(lemma=None)

<QuerySet []>

In [None]:
'\ufeffwhen'.encode('utf')