In [2]:
import pandas as pd
import numpy as np
import torch
from collections import defaultdict
from datasets import Dataset

In [27]:
# Load entities (5500)
entities = pd.read_csv('../data/entities/SingleToken/entities_languageAgnostic.csv')

# Load Relations
relations = pd.read_json('../data/knowledge/properties_w_aliases_full_cleaned.json')

**Check if still single tokens**

In [24]:
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')

In [25]:
languages = ['zh', 'ja']

df = pd.read_csv('../data/entities/SingleToken/multilingual/zh_ja.csv')
df

Unnamed: 0,id,zh,ja
0,Q1059055,鸩,鴆
1,Q30335559,麴,麹
2,Q645110,檁,桁
3,Q2217308,锤,錘
4,Q823101,阙,闕
...,...,...,...
214,Q11472,纸,紙
215,Q5185279,诗,詩
216,Q12280,桥,橋
217,Q3957,镇,街


In [26]:
for idx, entity in df.iterrows():
    wordTokens = [tokenizer.tokenize(entity[k]) for k in languages]
    if not (all(len(l) == 1 for l in wordTokens)):
        df = df.drop([idx])
        
df

Unnamed: 0,id,zh,ja
139,Q20021676,왕,王


In [21]:
df.to_csv('../data/entities/SingleToken/xlm/zh_ja.csv', index=False)

##### Prepare Data

In [28]:
def gen_index_pairs(n, max_size=np.Inf, limit=np.Inf):
    pairs = set()
    ind = list()

    while len(pairs) < max_size:
        # return number between 0 and n (exclude)
        x, y = np.random.randint(n), np.random.randint(n)

        while ind.count(x) >= limit or ind.count(y) >= limit:
            x, y = np.random.randint(n), np.random.randint(n)

        i = 0
        while (x, y) in pairs or (y, x) in pairs or x == y:
            if i > 10:
                return
            x, y = np.random.randint(n), np.random.randint(n)
            i += 1

        ind.append(x)
        ind.append(y)

        pairs.add((x, y))
        yield x, y


# n: how many I have
# num_indices: how many I need
# generates max_size random unique indices (for indexing in what n is refering to)
def generate_unique_indices(n, num_indices):

    # if we can't generate unique indices because the data is too small
    if n < num_indices:
        # Generate indices with as few reusing as possible
        return generate_all_indices(n, num_indices)
    else:
        return generate_indices(n, num_indices, 1)


# Generates indices with as few reuing as possible
def generate_all_indices(n, num_indices):
    taken = []

    # Take all indices
    times = math.floor(num_indices / n)
    for i in range(times):
        taken += list(range(n))

    # Increase length by rest indices
    taken += list(range(num_indices - len(taken)))

    return taken


# Can be used to limit occurrence of subjects within a relation
def generate_indices(n, num_indices, reuse_count=1, used_indices=None, max_instance_excluded=np.Inf, last_indices=None):
    if used_indices is None:
        used_indices = []
    taken = []

    if last_indices is not None:
        # Reuse last_indices if not already used too much
        if all(used_indices.count(x) < max_instance_excluded for x in last_indices):
            return last_indices

    while len(taken) < num_indices:
        # return number between 0 and n (exclude)
        x = np.random.randint(n)

        i = 0
        # if x is already taken or excluded, I need to get another one
        while x in taken or used_indices.count(x) == max_instance_excluded:
            if i > n/2:
                logger.warning(f'Index generation failed to get {num_indices} indices!')
                return
            x = np.random.randint(n)
            i += 1

        for _ in range(reuse_count):
            if len(taken) == num_indices:
                break
            taken.append(x)

    return taken


def generate_index_pairs(n, index_list, max_size=np.Inf):
    pairs = set()
    k = 0

    while len(pairs) < max_size:
        # return number between 0 and n (exclude)
        x = index_list[k]
        y = np.random.randint(n)

        i = 0
        while (x, y) in pairs or (y, x) in pairs or x == y:
            if i > 10:
                return
            y = np.random.randint(n)
            i += 1

        pairs.add((x, y))
        k += 1

        yield x, y


def contains_all(lst, elements):
    return all(x in lst for x in elements)



In [40]:
# Takes {'entity1 relation': ['entity2_1', 'entity2_2', ...], ...}
# to ['entity1 relation entity2_1', 'entity1 relation entity2_2', ...]
def dict_to_list(d):
    dict_list = []
    for key in d:
        for e2 in d[key]:
            dict_list.append(key + ' ' + e2)
    return dict_list

In [41]:
def generate_knowledge(entities, relations, source_lang=None, target_lang=None, n_relations=10, n_facts=1000,
                       use_alias=True, verify_model=False, multilingual=False, n_shot=0, train_w_alias=False):
    train = []

    # Create a dictionary of languages {'ex': [test_ex]}
    test = defaultdict(lambda: dict())

    # Sample relations
    relations_sampled = relations.sample(n_relations)

    # Generate n_facts entity1s, which we repeat for every relation but with different entity2
    entities1 = generate_unique_indices(entities.shape[0], n_facts)

    for index, relation in relations_sampled.iterrows():
        # Print Relation being used
        seen = set()

        # Create Test
        for lang in target_lang:
            test[lang][relation[lang]] = dict()
            test[lang][relation[lang]]['relation'] = defaultdict(list)

            # ----
            if use_alias:
                test[lang][relation[lang]]['alias'] = dict()
                for alias in relation[lang + '_alias'] or []:
                    test[lang][relation[lang]]['alias'][alias] = defaultdict(list)

                test[lang][relation[lang]]['translate'] = dict()
                for ts in relation[lang + '_translate_alias'] or []:
                    test[lang][relation[lang]]['translate'][ts] = defaultdict(list)

                test[lang][relation[lang]]['subword'] = dict()
                for word in relation[lang + '_subword_alias'] or []:
                    test[lang][relation[lang]]['subword'][word] = defaultdict(list)

        # Generate n_facts entity2s
        entity_generator = generate_index_pairs(entities.shape[0], entities1, n_facts)

        for e_id, f_id in entity_generator:
            # Sanity Check for uniqueness of pairs.
            if e_id == f_id or (e_id, f_id) in seen or (f_id, e_id) in seen:
                logger.warning("WARNING: Pair!")

            # Add pair to the list of seen pairs for this relation, so we don't get duplicates.
            seen.add((e_id, f_id))

            # Append facts in source lang to training set and target lang to test set.
            for source in source_lang:
                # Get labels of entities
                if multilingual:
                    e_train = entities[source][e_id].capitalize()
                    f_train = entities[source][f_id]
                else:
                    e_train = entities['label'][e_id].capitalize()
                    f_train = entities['label'][f_id]

                train.append(e_train + ' ' + relation[source] + ' ' + f_train)

                if train_w_alias:
                    # Add all aliases (or not if it is None)
                    for alias in relation[source + '_alias'] or []:
                        train.append(e_train + ' ' + alias + ' ' + f_train)

                    # Add all translations
                    for ts in relation[source + '_translate_alias'] or []:
                        train.append(e_train + ' ' + ts + ' ' + f_train)

                    # Add all subwords
                    for subword in relation[source + '_subword_alias'] or []:
                        train.append(e_train + ' ' + subword + ' ' + f_train)

            # Iterate over target languages and add to test
            for target in target_lang:
                if multilingual:
                    e_test = entities[target][e_id].capitalize()
                    f_test = entities[target][f_id]
                else:
                    e_test = entities['label'][e_id].capitalize()
                    f_test = entities['label'][f_id]

                test[target][relation[target]]['relation'][e_test + ' ' + relation[target]].append(f_test)

                if use_alias:
                    # Add all aliases (or not if it is None)
                    for alias in relation[target + '_alias'] or []:
                        test[target][relation[target]]['alias'][alias][e_test + ' ' + alias].append(f_test)

                    # Add all translations
                    for ts in relation[target + '_translate_alias'] or []:
                        test[target][relation[target]]['translate'][ts][e_test + ' ' + ts].append(f_test)

                    # Add all subwords
                    for subword in relation[target + '_subword_alias'] or []:
                        test[target][relation[target]]['subword'][subword][e_test + ' ' + subword].append(f_test)

    # Dictionary of Key: Subject+Relation, Value: Number of Objects (for precision@k)
    precision_k = defaultdict(int)
    for lang in test:
        for relation in test[lang]:
            for subj_rel in test[lang][relation]['relation']:
                precision_k[subj_rel] = len(test[lang][relation]['relation'][subj_rel])

    # Has to bemultilingual since we want to see its impact on multilingual entities
    if multilingual and n_shot > 0:
        # For every relation, take n_shot target facts and remove them from test and add them to training
        for target in target_lang:
            for relation in test[target]:
                data = test[target][relation]['relation']
                data_keys = list(data.keys())

                # In case of having multiple target, we only take the first
                for i in range(n_shot):
                    train.append(data_keys[i] + ' ' + data[data_keys[i]][0])

                    # Remove it from test data
                    del test[target][relation]['relation'][data_keys[i]][0]

                    if not test[target][relation]['relation'][data_keys[i]]:
                        del test[target][relation]['relation'][data_keys[i]]

    # Create Validation Set - 90% test, 10% validation.
    validation = defaultdict(list)
    validation_langs = target_lang
    n_valid = int(0.1 * n_facts)

    # Iterate over relations in validation language
    for validation_lang in validation_langs:
        for relation in test[validation_lang]:
            data = test[validation_lang][relation]['relation']

            if len(data.keys()) <= n_valid:

                # Take 10% of facts
                # Amount of facts to take per key to get 10%
                facts_per_key = int(n_valid / len(data.keys()))

                for key in data:
                    # This might happen if we do n_shot because not all keys have the same amount of facts
                    if len(data[key]) < facts_per_key:
                        # Instead count facts already taken and take more at the end?
                        raise ValueError('Key doesnt have enough facts!')

                    validation[key] += data[key][:facts_per_key]

                    # Remove them from the key
                    del test[validation_lang][relation]['relation'][key][:facts_per_key]
            else:
                # Just take a fact per key of the first 0.1*n_facts keys
                for key in list(data.keys())[:n_valid]:
                    validation[key].append(data[key][0])

                    # Remove them from the key
                    del test[validation_lang][relation]['relation'][key][0]

                    # If the key is now empty, remove it
                    if not test[validation_lang][relation]['relation'][key]:
                        del test[validation_lang][relation]['relation'][key]

    return train, validation, test, relations_sampled, precision_k

In [42]:
source_language = ['en']
target_language = ['de']
n_relations = 10
n_facts = 1000

In [43]:
train, validation, test, relations, precision_k = generate_knowledge(entities,
                                                                     relations,
                                                                     source_language,
                                                                     target_language,
                                                                     n_relations,
                                                                     n_facts)


In [44]:
train_dict = {'sample': train}
validation_list = dict_to_list(validation)
validation_dict = {'sample': validation_list}

### Preprocessing

First, we pad text so they are a uniform length. While it is possible to padtext in the tokenizer function by setting padding=True, it is more efficient to only pad the text to the length of the longest element in its batch. This is known as dynamic padding. You can do this with the DataCollatorWithPadding function:

##### Convert to datasets

In [47]:
from datasets import load_dataset, Dataset

In [48]:
train_ds = Dataset.from_dict(train_dict)
test_ds = Dataset.from_dict(test_dict)

In [49]:
train_ds

Dataset({
    features: ['sample'],
    num_rows: 16000
})

##### Load Model

In [4]:
from transformers import BertModel, BertTokenizerFast, TrainingArguments, Trainer, DataCollatorWithPadding, BertForMaskedLM
from transformers import AutoTokenizer, AutoModelForMaskedLM

In [5]:
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')

Downloading:   0%|          | 0.00/4.83M [00:00<?, ?B/s]

In [6]:
model = AutoModelForMaskedLM.from_pretrained("xlm-roberta-base")

Downloading:   0%|          | 0.00/1.04G [00:00<?, ?B/s]

In [9]:
tokenizer.mask_token_id

250001

##### Tokenize

In [48]:
def tokenize(tokenizer, dataset):
    def tokenize_fn(examples):
        result = tokenizer(examples["sample"])
        return result

    # Use batched=True to activate fast multithreading!
    tokenized_ds = dataset.map(
        tokenize_fn, batched=True, remove_columns=["sample"]
    )

    return tokenized_ds

In [49]:
train_ds = Dataset.from_dict(train_dict)
validation_ds = Dataset.from_dict(validation_dict)

tokenized_train = tokenize(tokenizer, train_ds)  # Train is shuffled by Huggingface
tokenized_validation = tokenize(tokenizer, validation_ds)

  0%|          | 0/10 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

### Finetuning

In [50]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [56]:
from custom_trainer import CustomTrainer
from datasets import load_metric
from transformers import TrainingArguments, DataCollatorForLanguageModeling, IntervalStrategy

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)
eval_data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [57]:
# Metric for Precision@1
def precision_at_one(eval_pred):
    metric = load_metric("accuracy")
    relation_logits, relation_labels = eval_pred

    # Relation Accuracy
    indices = np.where(relation_labels != -100)  # Select only the ones that are masked
    correct_predictions = relation_logits[indices] == relation_labels[indices]
    relation_precision = metric.compute(predictions=relation_logits[indices],
                                        references=relation_labels[indices])['accuracy']
    return {'eval_accuracy': relation_precision, 'correct_predictions': correct_predictions}

In [58]:
training_args = TrainingArguments(
        output_dir='./output/',
        num_train_epochs=200,
        per_device_train_batch_size=128,
        per_device_eval_batch_size=128,
        learning_rate=5e-5,
        logging_strategy=IntervalStrategy.NO,
        evaluation_strategy=IntervalStrategy.EPOCH,
        save_strategy=IntervalStrategy.NO,
        seed=42
    )

trainer = CustomTrainer(
            model=model,
            args=training_args,
            train_dataset=tokenized_train,
            eval_dataset=tokenized_validation,
            tokenizer=tokenizer,
            data_collator=data_collator,
            eval_data_collator=eval_data_collator,
            compute_metrics=precision_at_one
        )

Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [59]:
trainer.train()

***** Running training *****
  Num examples = 10000
  Num Epochs = 200
  Instantaneous batch size per device = 128
  Total train batch size (w. parallel, distributed & accumulation) = 128
  Gradient Accumulation steps = 1
  Total optimization steps = 15800


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

In [29]:
trainer.evaluate(eval_dataset=tokenized_test_ds)



{'eval_accuracy': 0.4131, 'eval_loss': 2.8397233486175537}

#### Testing

In [50]:
model.to('cpu')
model.eval()

BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=T

In [38]:
relations_sampled

Unnamed: 0,id,en,de,es,fr
720,P102,member of political party,Parteizugehörigkeit,miembro del partido político,parti politique
83,P452,industry,Branche,industria,secteur d'activité
741,P7501,audio system,Audiosystem,sistema de audio,système audio
623,P6195,funding scheme,Fördertopf,método de financiamiento,plan de financement
561,P2392,teaching method,Lehrmethode,método de enseñanza,méthode pédagogique
514,P57,director,Regisseur,director,réalisateur ou metteur en scène
214,P3274,content deliverer,Serviceprovider,proveedor de contenido,fournisseur du contenu
136,P1142,political ideology,politische Weltanschauung,ideología política,idéologie politique
642,P4151,game mechanics,Spielmechanik,sistema de juego,système de jeu
800,P1750,name day,Namenstag,onomástico,fête du prénom


In [44]:
# For every relation check how high accuracy is
i = 0

for _, relation in relations_sampled.iterrows():
    print('RELATION: ' + relation[source_lang] + ', ' + relation[target_lang])
    
    # Get set of relation facts
    relation_test = test[i*n_facts:(i+1)*n_facts]

    # Tokenize
    relation_test_ds = Dataset.from_dict({'sample': relation_test})
    tokenized_relation_ds = relation_test_ds.map(tokenize_function, batched=True, remove_columns=["sample"])
    
    # Evaluate
    print(trainer.evaluate(eval_dataset=tokenized_relation_ds))
    
    i += 1

RELATION: member of political party, Parteizugehörigkeit


  0%|          | 0/1 [00:00<?, ?ba/s]



{'eval_accuracy': 0.356, 'eval_loss': 2.6923699378967285}
RELATION: industry, Branche


  0%|          | 0/1 [00:00<?, ?ba/s]



{'eval_accuracy': 0.08, 'eval_loss': 5.250670909881592}
RELATION: audio system, Audiosystem


  0%|          | 0/1 [00:00<?, ?ba/s]



{'eval_accuracy': 0.995, 'eval_loss': 0.027172649279236794}
RELATION: funding scheme, Fördertopf


  0%|          | 0/1 [00:00<?, ?ba/s]



{'eval_accuracy': 0.077, 'eval_loss': 5.592531681060791}
RELATION: teaching method, Lehrmethode


  0%|          | 0/1 [00:00<?, ?ba/s]



{'eval_accuracy': 0.088, 'eval_loss': 5.110952377319336}
RELATION: director, Regisseur


  0%|          | 0/1 [00:00<?, ?ba/s]



{'eval_accuracy': 0.645, 'eval_loss': 1.4310952425003052}
RELATION: content deliverer, Serviceprovider


  0%|          | 0/1 [00:00<?, ?ba/s]



{'eval_accuracy': 0.701, 'eval_loss': 0.9867790341377258}
RELATION: political ideology, politische Weltanschauung


  0%|          | 0/1 [00:00<?, ?ba/s]



{'eval_accuracy': 0.317, 'eval_loss': 2.876711130142212}
RELATION: game mechanics, Spielmechanik


  0%|          | 0/1 [00:00<?, ?ba/s]



{'eval_accuracy': 0.371, 'eval_loss': 2.611694812774658}
RELATION: name day, Namenstag


  0%|          | 0/1 [00:00<?, ?ba/s]



{'eval_accuracy': 0.501, 'eval_loss': 1.8172553777694702}


In [98]:
k = 1
relation_test = train[k*n_facts:(k+1)*n_facts]
relation_test

['Wizard industry Astrid',
 'Pie industry Lebens',
 'Dresdner industry Lloyd',
 'Counter industry Gruppe',
 'Hause industry Emergency',
 'Elton industry Grâce',
 'Os industry co',
 'Spider industry Ekim',
 'Aragón industry Montréal',
 'Figaro industry Monitor',
 'Reilly industry Garrett',
 'Worth industry Davenport',
 'Carnaval industry Genoa',
 'Mer industry Classics',
 'Hollywood industry Römer',
 'Cécile industry Stream',
 'Ardèche industry Baker',
 'Angel industry Lord',
 'Palestina industry Sulla',
 'ao industry Oper',
 'Passo industry Bug',
 'Agora industry Palatinat',
 'Rees industry Freie',
 'Application industry Résumé',
 'Visconti industry Borough',
 'Nantes industry Cassini',
 'Lucas industry Steen',
 'Brock industry India',
 'Humphrey industry View',
 'Hitchcock industry Prato',
 'Bara industry Titus',
 'Churchill industry Gordon',
 'Ver industry Norman',
 'Nos industry Lago',
 'Tibet industry Rally',
 'Frida industry Science',
 'Tag industry Hague',
 'Haas industry Poitou'



#### Manual Testing

In [15]:
# Load Tokenizer and Model if not given
tokenizer = BertTokenizerFast.from_pretrained('bert-base-multilingual-cased')
model = BertForMaskedLM.from_pretrained("bert-base-multilingual-cased")

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [17]:
# Replace entity2 by [MASK]
fact = 'Harry is Tim'
word_list = fact.split()
entity2 = word_list[-1]
query = fact.replace(entity2, '') + '[MASK]'

In [23]:
# Get Top 5 Tokens
encoded_input = tokenizer(query, return_tensors='pt')
token_logits = model(**encoded_input).logits

mask_token_index = torch.where(encoded_input["input_ids"] == tokenizer.mask_token_id)[1]
mask_token_logits = token_logits[0, mask_token_index, :]

# Pick the [MASK] candidates with the highest logits
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()

for chunk in top_5_tokens:
    if entity2 in tokenizer.decode(chunk):
        return False

SyntaxError: 'return' outside function (2948525193.py, line 13)

In [None]:
train_dict['sample']

In [None]:
test_dict['sample']

In [94]:
text = "Wizard Industrie [MASK]"
encoded_input = tokenizer(text, return_tensors='pt')
token_logits = model(**encoded_input).logits

mask_token_index = torch.where(encoded_input["input_ids"] == tokenizer.mask_token_id)[1]
mask_token_logits = token_logits[0, mask_token_index, :]

# Pick the [MASK] candidates with the highest logits
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()

for chunk in top_5_tokens:
    print(f"\n'>>> {tokenizer.decode(chunk)}'")


'>>> Ames'

'>>> Morrow'

'>>> Henderson'

'>>> Astrid'

'>>> Stewart'


In [97]:
for t in train_dict['sample']:
    if 'Dresdner' in t:
        print(t)

Dresdner member of political party Pela
Dresdner industry Lloyd
Dresdner audio system Llobregat
Dresdner funding scheme Damm
Stat funding scheme Dresdner
Dresdner teaching method Remote
Dresdner director WK
Dresdner content deliverer NT
Dresdner political ideology Neckar
Dresdner game mechanics Calder
Dresdner name day Mariana
Cécile name day Dresdner


In [11]:
basemodel = BertForMaskedLM.from_pretrained("bert-base-multilingual-cased")

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [20]:
text = "Dresner [MASK]"
encoded_input = tokenizer(text, return_tensors='pt')
token_logits = basemodel(**encoded_input).logits

mask_token_index = torch.where(encoded_input["input_ids"] == tokenizer.mask_token_id)[1]
mask_token_logits = token_logits[0, mask_token_index, :]

# Pick the [MASK] candidates with the highest logits
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()

for chunk in top_5_tokens:
    print(f"\n'>>> {tokenizer.decode(chunk)}'")


'>>> .'

'>>> ,'

'>>> Land'

'>>> :'

'>>> ;'
