In [1]:
!pip install pandas==1.3.4
!pip install transformers==4.12.5
!pip install datasets==1.15.1

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting pandas==1.3.4
  Downloading pandas-1.3.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.5 MB)
[K     |████████████████████████████████| 11.5 MB 13.8 MB/s eta 0:00:01
Installing collected packages: pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 1.5.3
    Uninstalling pandas-1.5.3:
      Successfully uninstalled pandas-1.5.3
Successfully installed pandas-1.3.4
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting transformers==4.12.5
  Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 19.4 MB/s eta 0:00:01
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 54.4 MB/s eta 0:00:01
Installing collected packag

In [2]:
!pip install ipywidgets
!pip install IProgress

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [3]:
import os
import re
import pickle

import pandas as pd

import numpy as np
import torch

import transformers
from transformers import BertTokenizer

import datasets
from datasets import concatenate_datasets
from datasets import Dataset
from datasets import ClassLabel
from datasets import DatasetDict

In [4]:
print('pandas:\t\t', pd.__version__)
print('transformers:\t', transformers.__version__)
print('datasets:\t', datasets.__version__)

pandas:		 1.3.4
transformers:	 4.12.5
datasets:	 1.15.1


## Load Data

In [5]:
dataset_df = pd.read_pickle("/notebooks/KURI-BERT/notebooks/full_formula_w_fts/df_pe_for_kuri_model")

In [6]:
dataset_df

Unnamed: 0,paragraph,paragraph_components_list,paragraph_labels_list,paragraph_markers_list,split,essay_nr,structural_fts_as_text_combined,paragraph_labels_numeric
0,It is always said that competition can effecti...,[[we should attach more importance to cooperat...,[[MajorClaim]],[[from this point of view]],TRAIN,essay001,[Topic: Should students be taught to compete o...,[0]
1,"First of all, through cooperation, children ca...","[[through cooperation, children can learn abou...","[[Claim], [Premise], [Premise], [Premise]]","[[first of all], [], [], []]",TRAIN,essay001,[Topic: Should students be taught to compete o...,"[1, 2, 2, 2]"
2,"On the other hand, the significance of competi...",[[the significance of competition is that how ...,"[[Premise], [Claim], [Premise], [Premise], [Cl...","[[on the other hand], [hence], [however], [], []]",TRAIN,essay001,[Topic: Should students be taught to compete o...,"[2, 1, 2, 2, 1]"
3,"Consequently, no matter from the view of indiv...",[[a more cooperative attitudes towards life is...,[[MajorClaim]],[[consequently]],TRAIN,essay001,[Topic: Should students be taught to compete o...,[0]
4,The last 50 years have seen an increasing numb...,[[they are able to sustain their cultural iden...,[[MajorClaim]],[[however]],TRAIN,essay002,[Topic: More people are migrating to other cou...,[0]
...,...,...,...,...,...,...,...,...
1714,"In conclusion, fatherhood should be as present...",[[fatherhood should be as present as motherhoo...,[[MajorClaim]],[[in conclusion]],TRAIN,essay401,[Topic: Fatherhood should be as present as mot...,[0]
1715,Some people believe that studying hard is esse...,[[both of studying hard and playing sports are...,[[MajorClaim]],[[in my point of view]],TRAIN,essay402,[Topic: Children should studying hard or playi...,[0]
1716,"On the other hand, studying hard will give chi...",[[studying hard will give children a better fu...,"[[Claim], [Premise], [Premise], [Premise], [Pr...","[[on the other hand], [], [for instance], [], []]",TRAIN,essay402,[Topic: Children should studying hard or playi...,"[1, 2, 2, 2, 2]"
1717,"At the same time, playing sports will give goo...",[[playing sports will give good effects on chi...,"[[Claim], [Premise], [Premise], [Premise], [Pr...","[[], [], [firstly], [], [because], [], [so], [...",TRAIN,essay402,[Topic: Children should studying hard or playi...,"[1, 2, 2, 2, 2, 2, 2, 2]"


In [7]:
def clean_structural_features(x):
    
    clean_features_list = []
    
    strct_fts_list = x.structural_fts_as_text_combined
    
    for strct_fts in strct_fts_list:
        
        nr_idx = strct_fts.index("Structural Features:")
        clean_fts = strct_fts[nr_idx+len("Structural Features:"):]
        clean_features_list.append(clean_fts.lstrip())
        
    return clean_features_list

In [8]:
dataset_df['structural_fts_as_text_combined'] = dataset_df.apply(lambda x: clean_structural_features(x), axis=1)

In [9]:
def flatten_list(l):
    
    return [item for sublist in l for item in sublist]

In [10]:
# Get paragraphs with fts inserted

In [11]:
def get_paragraphs_w_fts(x):
    
    paragraph = x.paragraph
    # paragraph = '[CLS] ' + paragraph
    para_fts_as_txt = x.structural_fts_as_text_combined
    para_acs = x.paragraph_components_list
    para_ams = x.paragraph_markers_list
    
    for am, ac, ac_fts_as_txt in zip(para_ams, para_acs, para_fts_as_txt):
        
        end_of_component_idx = paragraph.index(ac[0]) + len(ac[0])
        end_of_component_char = paragraph[end_of_component_idx]
        
        if end_of_component_char != " ":            
            new_ac = ac[0] + end_of_component_char
        
        else:            
            new_ac = ac[0]

        fts_as_txt_new = ' Start ' + ac_fts_as_txt + ' Start'         
        paragraph = paragraph.replace(ac[0] + end_of_component_char, new_ac + fts_as_txt_new, 1)
    
    paragraph = 'Start ' + paragraph.rstrip()
    
    if paragraph[-5:] == 'Start':
        
        paragraph = paragraph[:-6] 

    return paragraph

In [12]:
get_paragraphs_w_fts(dataset_df.iloc[1])

'Start First of all, through cooperation, children can learn about interpersonal skills which are significant in the future life of all students. Start 2, Yes, No, No, No Start What we acquired from team work is not only how to achieve the same goal with others but more importantly, how to get along with others. Start 2, No, No, No, No Start During the process of cooperation, children can learn about how to listen to opinions of others, how to communicate with others, how to think comprehensively, and even how to compromise with other team members when conflicts occurred. Start 2, No, No, No, No Start All of these skills help them to get on well with other people and will benefit them for the whole life. Start 2, No, Yes, No, No'

In [13]:
dataset_df['full_paragraph_w_cls'] = dataset_df.apply(lambda x: get_paragraphs_w_fts(x), axis=1)

In [14]:
dataset_df['full_paragraph_w_cls'][786]

'Start Nowadays, there is a prevailing opinion that human needs for farmland, housing and industry are more important Start 1, Yes, Yes, Yes, No StartStart 1, Yes, Yes, Yes, No Startthan saving land for endangered animals. People who disagree with the point dispute that the decreasing of land for endangered animals will bring damage to ecological balance. As far as I am concerned, I agree with the opinion human needs for farmland, housing and industry are more important. The reasons are based on the following aspects.'

In [15]:
dataset_df['full_paragraph_w_cls'][786] = 'Start Nowadays, there is a prevailing opinion that human needs for farmland, housing and industry are more important Start 1, Yes, Yes, Yes, No Start than saving land for endangered animals. People who disagree with the point dispute that the decreasing of land for endangered animals will bring damage to ecological balance. As far as I am concerned, I agree with the opinion human needs for farmland, housing and industry are more important. Start 1, No, Yes, Yes, No Start The reasons are based on the following aspects.'

In [16]:
dataset_df.columns

Index(['paragraph', 'paragraph_components_list', 'paragraph_labels_list',
       'paragraph_markers_list', 'split', 'essay_nr',
       'structural_fts_as_text_combined', 'paragraph_labels_numeric',
       'full_paragraph_w_cls'],
      dtype='object')

In [17]:
dataset_df['paragraph_labels_list'][1470] = [['Premise'], ['Premise'], ['Claim']]

In [18]:
### Get CLS indices

In [19]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [20]:
def get_CLS_indices(x):
    
    cls_indices = []
    
    full_paragraph = x.full_paragraph_w_cls
    tokenized_paragraph = tokenizer.tokenize(full_paragraph)
    components_list = x.paragraph_components_list
    
    for idx, token in enumerate(tokenized_paragraph):
        
        if token == 'start':
            
            cls_indices.append(idx)
    
    component_cls_indices = [i for j,i in enumerate(cls_indices) if j % 2 == 0]
    features_cls_indices = [i for j,i in enumerate(cls_indices) if j % 2 == 1]
    
    if len(component_cls_indices) == len(components_list) + 1:
        
        component_cls_indices = component_cls_indices[:-1]    
    
    
    return component_cls_indices, features_cls_indices     

In [21]:
# def get_CLS_indices(x):
    
#     cls_indices = []
    
#     full_paragraph = x.full_paragraph_w_cls
#     tokenized_paragraph = tokenizer.tokenize(full_paragraph)
    
#     for idx, token in enumerate(tokenized_paragraph):
        
#         if token == '[CLS]':
            
#             cls_indices.append(idx)
    
#     component_cls_indices = [i for j,i in enumerate(cls_indices) if j % 2 == 0]
#     features_cls_indices = [i for j,i in enumerate(cls_indices) if j % 2 == 1]
    
#     last_cls_idx = full_paragraph.rfind('[CLS]')
    
#     end_of_last_cls = last_cls_idx + 5
    
#     if '[CLS]' not in full_paragraph[end_of_last_cls:]:
        
#         component_cls_indices = component_cls_indices[:-1]
    
# #     if len(full_paragraph) != last_cls_idx + 4:
        
# #             component_cls_indices = component_cls_indices[:-1]
            
    
#     return component_cls_indices, features_cls_indices     

In [22]:
dataset_df['full_paragraph_w_cls'][11]

'Start To conclude, as far as I am concerned, international tourism has both triggered economic development and maintained cultural and environment values of the tourist countries. Start 4, Yes, No, No, Yes Start In addition, the authorities should adequately support these sustainable developments.'

In [23]:
get_CLS_indices(dataset_df.iloc[11])

([0], [29])

In [24]:
# def get_CLS_indices(x):
    
#     cls_indices = []
#     component_cls_indices = []
    
#     full_paragraph = x.full_paragraph_w_cls
#     tokenized_paragraph = tokenizer.tokenize(full_paragraph)
#     components_list = x.paragraph_components_list
    
#     for idx, token in enumerate(tokenized_paragraph):
        
#         if token == '[CLS]':
            
#             cls_indices.append(idx)
            
#     for idx_enum, cls_idx in enumerate(cls_indices):
        
#         if idx_enum == len(cls_indices) - 1:
        
#             remaining_para = full_paragraph[cls_idx:]

#             for component in components_list:

#                 if idx_enum % 2 == 0 and component[0] not in remaining_para:

#                     component_cls_indices.append(cls_idx)
        
    
#     # component_cls_indices = [i for j,i in enumerate(cls_indices) if j % 2 == 0]
#     features_cls_indices = [i for j,i in enumerate(cls_indices) if j % 2 == 1]
    
#     return component_cls_indices, features_cls_indices     

In [25]:
dataset_df['component_cls_indices'] = dataset_df.apply(lambda x: get_CLS_indices(x)[0], axis=1)

In [26]:
dataset_df['feature_cls_indices'] = dataset_df.apply(lambda x: get_CLS_indices(x)[1], axis=1)

In [27]:
def sanity(x):
    
    return len(x.component_cls_indices) == len(x.paragraph_components_list)

In [28]:
dataset_df['sanity_1'] = dataset_df.apply(lambda x: sanity(x), axis=1)

In [29]:
dataset_df['sanity_1'].value_counts()

True     1717
False       2
Name: sanity_1, dtype: int64

In [30]:
whatever = dataset_df[dataset_df.sanity_1 == False]

In [31]:
whatever

Unnamed: 0,paragraph,paragraph_components_list,paragraph_labels_list,paragraph_markers_list,split,essay_nr,structural_fts_as_text_combined,paragraph_labels_numeric,full_paragraph_w_cls,component_cls_indices,feature_cls_indices,sanity_1
1424,Learning a foreign language is necessary for c...,[[It seems to me that students should start le...,"[[MajorClaim], [Claim]]","[[], []]",TRAIN,essay327,"[1, Yes, No, Yes, No, 1, No, Yes, Yes, No]","[0, 1]",Start Learning a foreign language is necessary...,"[0, 26, 48, 61]","[19, 43, 51, 85]",False
1552,There is no doubt that some behaviors of the t...,[[some behaviors of the tourists are not welco...,"[[Claim], [Premise], [Premise], [Premise], [Pr...","[[], [firstly], [therefore,], [so], [moreover]...",TEST,essay359,"[2, Yes, No, No, No, 2, No, No, No, No, 2, No,...","[1, 2, 2, 2, 2, 2, 2, 2]",Start There is no doubt that some behaviors of...,"[0, 29, 52, 90, 117, 137, 164]","[19, 42, 64, 100, 127, 154, 186]",False


In [32]:
whatever['full_paragraph_w_cls'][1424]

'Start Learning a foreign language is necessary for children to strengthen their ability to learn. Whether children should start learning a foreign language as they start school or not might be a controversial issue. It seems to me that students should start learning that as they start school. Start 1, Yes, No, Yes, No Start Learning that not only help them to improve their minds and memories, but can extend their visions about the other countries. Start 1, No, Yes, Yes, No'

In [33]:
whatever['paragraph_components_list'][1424]

[['It seems to me that students should start learning that as they start school'],
 ['Learning that not only help them to improve their minds and memories, but can extend their visions about the other countries']]

In [34]:
whatever['component_cls_indices'][1424]

[0, 26, 48, 61]

In [35]:
whatever['feature_cls_indices'][1424]

[19, 43, 51, 85]

In [36]:
for i,x in enumerate(tokenizer.tokenize(whatever['full_paragraph_w_cls'][1424])):
    print(i, x)

0 start
1 learning
2 a
3 foreign
4 language
5 is
6 necessary
7 for
8 children
9 to
10 strengthen
11 their
12 ability
13 to
14 learn
15 .
16 whether
17 children
18 should
19 start
20 learning
21 a
22 foreign
23 language
24 as
25 they
26 start
27 school
28 or
29 not
30 might
31 be
32 a
33 controversial
34 issue
35 .
36 it
37 seems
38 to
39 me
40 that
41 students
42 should
43 start
44 learning
45 that
46 as
47 they
48 start
49 school
50 .
51 start
52 1
53 ,
54 yes
55 ,
56 no
57 ,
58 yes
59 ,
60 no
61 start
62 learning
63 that
64 not
65 only
66 help
67 them
68 to
69 improve
70 their
71 minds
72 and
73 memories
74 ,
75 but
76 can
77 extend
78 their
79 visions
80 about
81 the
82 other
83 countries
84 .
85 start
86 1
87 ,
88 no
89 ,
90 yes
91 ,
92 yes
93 ,
94 no


In [37]:
z = dataset_df['full_paragraph_w_cls'][1564]

In [38]:
dataset_df['paragraph_components_list'][1564]

[['their unresponsiveness can be attributed to their attitude towards life'],
 ['that they are aware of their deteriorating health and lack of enthusiasm comes with their taste for monotony and safe ideology'],
 ["This, by all means, is outweigh by youngsters' adventurousness and bravery"],
 ['Blatantly seen, success of most companies stems from young people, whose enterprising ideas that catch up with the world is regarded as wind of refreshment']]

In [39]:
dataset_df['feature_cls_indices'][1564]

[18, 57, 87, 132]

In [40]:
z

"Start Secondly, their unresponsiveness can be attributed to their attitude towards life. Start 3, Yes, No, No, No Start In other words, that they are aware of their deteriorating health and lack of enthusiasm comes with their taste for monotony and safe ideology. Start 3, No, No, No, No Start This, by all means, is outweigh by youngsters' adventurousness and bravery. Start 3, No, No, No, No Start Blatantly seen, success of most companies stems from young people, whose enterprising ideas that catch up with the world is regarded as wind of refreshment. Start 3, No, Yes, No, No"

In [41]:
for i,x in enumerate(tokenizer.tokenize(z)):
    print(i, x)

0 start
1 secondly
2 ,
3 their
4 un
5 ##res
6 ##pon
7 ##sive
8 ##ness
9 can
10 be
11 attributed
12 to
13 their
14 attitude
15 towards
16 life
17 .
18 start
19 3
20 ,
21 yes
22 ,
23 no
24 ,
25 no
26 ,
27 no
28 start
29 in
30 other
31 words
32 ,
33 that
34 they
35 are
36 aware
37 of
38 their
39 deteriorating
40 health
41 and
42 lack
43 of
44 enthusiasm
45 comes
46 with
47 their
48 taste
49 for
50 mono
51 ##ton
52 ##y
53 and
54 safe
55 ideology
56 .
57 start
58 3
59 ,
60 no
61 ,
62 no
63 ,
64 no
65 ,
66 no
67 start
68 this
69 ,
70 by
71 all
72 means
73 ,
74 is
75 out
76 ##weig
77 ##h
78 by
79 young
80 ##sters
81 '
82 adventurous
83 ##ness
84 and
85 bravery
86 .
87 start
88 3
89 ,
90 no
91 ,
92 no
93 ,
94 no
95 ,
96 no
97 start
98 b
99 ##lat
100 ##antly
101 seen
102 ,
103 success
104 of
105 most
106 companies
107 stems
108 from
109 young
110 people
111 ,
112 whose
113 enter
114 ##pr
115 ##ising
116 ideas
117 that
118 catch
119 up
120 with
121 the
122 world
123 is
124 regarded
125 as
126 wi

In [42]:
dataset_df['paragraph_components_list'][311]

[['it is easy to realize that languages, especially oral language, are a quite major part of culture of a country'],
 ['It helps people distinguish between nations and even regions in a country'],
 ['The clearance of minority of language means that a national traditional, customs and habitants do not exist'],
 ['Governments should spend money on keeping and preserving these things for the rich diversity of cultures which make our world more interesting'],
 ['it is a pride of the country with a rage of culture which is easy to educate people to love their country more']]

## Spans Computation

In [43]:
dataset_df

Unnamed: 0,paragraph,paragraph_components_list,paragraph_labels_list,paragraph_markers_list,split,essay_nr,structural_fts_as_text_combined,paragraph_labels_numeric,full_paragraph_w_cls,component_cls_indices,feature_cls_indices,sanity_1
0,It is always said that competition can effecti...,[[we should attach more importance to cooperat...,[[MajorClaim]],[[from this point of view]],TRAIN,essay001,"[1, Yes, Yes, Yes, No]",[0],Start It is always said that competition can e...,[0],[99],True
1,"First of all, through cooperation, children ca...","[[through cooperation, children can learn abou...","[[Claim], [Premise], [Premise], [Premise]]","[[first of all], [], [], []]",TRAIN,essay001,"[2, Yes, No, No, No, 2, No, No, No, No, 2, No,...","[1, 2, 2, 2]","Start First of all, through cooperation, child...","[0, 36, 75, 129]","[26, 65, 119, 152]",True
2,"On the other hand, the significance of competi...",[[the significance of competition is that how ...,"[[Premise], [Claim], [Premise], [Premise], [Cl...","[[on the other hand], [hence], [however], [], []]",TRAIN,essay001,"[3, Yes, No, No, No, 3, No, No, No, No, 3, No,...","[2, 1, 2, 2, 1]","Start On the other hand, the significance of c...","[0, 32, 56, 91, 177]","[22, 46, 81, 167, 205]",True
3,"Consequently, no matter from the view of indiv...",[[a more cooperative attitudes towards life is...,[[MajorClaim]],[[consequently]],TRAIN,essay001,"[4, Yes, Yes, No, Yes]",[0],"Start Consequently, no matter from the view of...",[0],[40],True
4,The last 50 years have seen an increasing numb...,[[they are able to sustain their cultural iden...,[[MajorClaim]],[[however]],TRAIN,essay002,"[1, Yes, Yes, Yes, No]",[0],Start The last 50 years have seen an increasin...,[0],[78],True
...,...,...,...,...,...,...,...,...,...,...,...,...
1714,"In conclusion, fatherhood should be as present...",[[fatherhood should be as present as motherhoo...,[[MajorClaim]],[[in conclusion]],TRAIN,essay401,"[4, Yes, Yes, No, Yes]",[0],"Start In conclusion, fatherhood should be as p...",[0],[36],True
1715,Some people believe that studying hard is esse...,[[both of studying hard and playing sports are...,[[MajorClaim]],[[in my point of view]],TRAIN,essay402,"[1, Yes, Yes, Yes, No]",[0],Start Some people believe that studying hard i...,[0],[45],True
1716,"On the other hand, studying hard will give chi...",[[studying hard will give children a better fu...,"[[Claim], [Premise], [Premise], [Premise], [Pr...","[[on the other hand], [], [for instance], [], []]",TRAIN,essay402,"[2, Yes, No, No, No, 2, No, No, No, No, 2, No,...","[1, 2, 2, 2, 2]","Start On the other hand, studying hard will gi...","[0, 25, 51, 110, 137]","[15, 41, 100, 127, 154]",True
1717,"At the same time, playing sports will give goo...",[[playing sports will give good effects on chi...,"[[Claim], [Premise], [Premise], [Premise], [Pr...","[[], [], [firstly], [], [because], [], [so], [...",TRAIN,essay402,"[3, Yes, No, No, No, 3, No, No, No, No, 3, No,...","[1, 2, 2, 2, 2, 2, 2, 2]","Start At the same time, playing sports will gi...","[0, 25, 63, 99, 117, 143, 163, 182]","[15, 53, 89, 107, 133, 153, 172, 200]",True


### dataset

In [44]:
train_df = dataset_df[dataset_df.split=='TRAIN'].reset_index(drop=True)
test_df = dataset_df[dataset_df.split=='TEST'].reset_index(drop=True)

In [45]:
dataset_train = Dataset.from_pandas(train_df)
dataset_test = Dataset.from_pandas(test_df)

In [46]:
train_val_datasets = dataset_train.train_test_split(train_size=0.8)
dataset_train = train_val_datasets['train']
dataset_val = train_val_datasets['test']



In [47]:
dataset = DatasetDict({"train": dataset_train, "test": dataset_test, "validation": dataset_val})

In [48]:
dataset

DatasetDict({
    train: Dataset({
        features: ['paragraph', 'paragraph_components_list', 'paragraph_labels_list', 'paragraph_markers_list', 'split', 'essay_nr', 'structural_fts_as_text_combined', 'paragraph_labels_numeric', 'full_paragraph_w_cls', 'component_cls_indices', 'feature_cls_indices', 'sanity_1'],
        num_rows: 1088
    })
    test: Dataset({
        features: ['paragraph', 'paragraph_components_list', 'paragraph_labels_list', 'paragraph_markers_list', 'split', 'essay_nr', 'structural_fts_as_text_combined', 'paragraph_labels_numeric', 'full_paragraph_w_cls', 'component_cls_indices', 'feature_cls_indices', 'sanity_1'],
        num_rows: 358
    })
    validation: Dataset({
        features: ['paragraph', 'paragraph_components_list', 'paragraph_labels_list', 'paragraph_markers_list', 'split', 'essay_nr', 'structural_fts_as_text_combined', 'paragraph_labels_numeric', 'full_paragraph_w_cls', 'component_cls_indices', 'feature_cls_indices', 'sanity_1'],
        num_rows:

In [49]:
# torch.save(dataset, os.path.join("/notebooks/KURI-BERT/notebooks/full_formula_w_fts/CLS_work", 'pe_dataset_for_cls_implementation.pt'))

In [50]:
torch.save(dataset, os.path.join("/notebooks/KURI-BERT/notebooks/full_formula_w_fts/CLS_work", 'pe_dataset_for_start_implementation.pt'))

In [51]:
### Check Area. Or maybe Poland area hehehe