In [1]:
### Imports and configuration

# setup variables

import os
import json
import tqdm
from s2orc.config import CURRENT_VERSION

# jsonlines https://jsonlines.readthedocs.io/en/latest/#api
import jsonlines
import gzip
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import hiplot

LOCAL_S2ORC_DIR = 's2orc-data'

psychology_paper_dir = os.path.join(LOCAL_S2ORC_DIR, CURRENT_VERSION, 'psychology')
psychology_paper_suffix = 'psych.text.jsonl'

links_dir = os.path.join(LOCAL_S2ORC_DIR, CURRENT_VERSION, 'psych_links')
links_suffix = 'psych.text.link.jsonl'


In [2]:
## Get corpus into memory

start = 0
span = 100 # all: 1700

links = []

links_files = sorted(os.listdir(links_dir), key=lambda f: int(f.split('.')[0]))[start:(start+span)]
for link_file in tqdm.tqdm(links_files):
    with gzip.open(os.path.join(links_dir, link_file), 'rb') as f_in:
        batch_links = list(jsonlines.Reader(f_in))
        for link in batch_links:
            if link['citing_paper']['grobid_parse'].get('body_text') is not None and link['cited_paper']['grobid_parse'].get('body_text') is not None:
                links.append(link)

np.random.seed(2134234)
links = np.array(links)
np.random.shuffle(links)

100%|██████████| 100/100 [00:01<00:00, 58.22it/s]


# work on the data

In [3]:
from flair.embeddings import WordEmbeddings, DocumentPoolEmbeddings, TransformerDocumentEmbeddings
from flair.data import Sentence

In [153]:
import regex
from syntok.segmenter import split
from syntok.tokenizer import Tokenizer

token_filter_re = r'^\p{L}(\p{L}|\.|[0-9])+$'
science_blacklist = {'et', 'al', 'al.'} # remove words specific for scientific papers without any importance for the task

def filter_tokens(tokens):
    # Filter out non-common-word/non-real-world-entity tokens
    return [
        token for token in tokens if 
            regex.match(token_filter_re, token.text) 
            and token.text not in science_blacklist
    ]

author_re = r'(((de|De|van|Van|von|Von)\s+(\p{Ll}+\s+)?)?\p{Lu}(\p{Ll}|-)+)'

citation_re = (
               r'([(;,]|\s)'
            + author_re +
               r'(\s?((([&,]|and|\s)+\s?' + author_re + r')|(et al\.?)))?' # alternative second author
               r'('
                 r'([;,]|\s)+'
                 r'\s*[0-9]{4}\p{Ll}?\s*' # year
               r')+'
               r'([);,]|\s)'
              )

in_text_citation_re = (
    author_re + r'\s*\((\s*[0-9]{4}\p{Ll}?\s*,?)\)'
)
def filter_citations(text):
    n_subs_made = 1
    while n_subs_made > 0:
        text, n_subs_made = regex.subn(citation_re, ';', text)
        text, n_subs_made2 = regex.subn(citation_re, ';', text)
        n_subs_made += n_subs_made2
    return text

def process_section_to_chunks(text):
    chunksize = 1
    tokenized_sents = list(split(Tokenizer().tokenize(text)))
    sents = [' '.join(str(token) for token in sent) for sent in tokenized_sents]
    sents = [' '.join(sents[i:i+chunksize]) for i in range(len(sents)-chunksize)]
    sentences = []
    for sent in sents:
        sent = text_to_sentence(sent)
        if sent is not None:
            sentences.append(sent)
    return sentences

def text_to_sentence(text):
    text = filter_citations(text) 
    s = Sentence(text, use_tokenizer=True)
    s.tokens = filter_tokens(s.tokens)
    if len(s.tokens) > 0:
        return s
    else:
        return None

In [5]:
# extract the important information from the original S2ORC corpus format
def process_link(link, process_section):
    context = link['citation_context']
    citing_paper = link['citing_paper']
    cited_paper = link['cited_paper']
    
    parts = []
    for text_chunk in cited_paper['grobid_parse']['body_text']:
        text = text_chunk.get('text')
        if text is not None:
            chunk_parts = process_section(text)
            parts.extend(chunk_parts)
    citing_string = ''.join([context['pre_context'], context['context_string'], context['post_context']])
    return {
        'citing_str': context['context_string'],
        'citing_context': citing_string,
        'citing_context_part': text_to_sentence(citing_string),
        'cited_text_parts': parts,
    }


In [6]:
def calc_embedding_scores(link, metrics, embedding_name, embedding):
    all_sim = {}
    
    s = link['citing_context_part']
    embedding.embed(s)
    citation_embedding = s.embedding.detach().numpy()

    sentences = link['cited_text_parts']
    for sentence in sentences:
        embedding.embed(sentence)
        all_sim[sentence.to_plain_string()] = {}
        for name, metric in metrics.items():
            sim = 1 - metric(sentence.embedding.detach().numpy(), citation_embedding)
            full_name = '_'.join([embedding_name, name])
            all_sim[sentence.to_plain_string()][full_name] = sim
        sentence.clear_embeddings()
    s.clear_embeddings()
    
    return pd.DataFrame(all_sim).T


### Bag of words (unweighted)

In [7]:
def calc_bow_occurrence_score(link, name):
    scores = []
    s = Sentence(link['citing_context'], use_tokenizer=True)
    context_token_set = set(token.text for token in s.tokens)
    sentences = link['cited_text_parts']
    for sentence in sentences:
        sent_token_set = set(token.text for token in sentence.tokens)
        score = len(sent_token_set.intersection(context_token_set)) / len(sent_token_set.union(context_token_set))            
        scores.append(score)
    return pd.DataFrame({
        name: scores
    }, index=[sent.to_plain_string() for sent in sentences])

### Bag of words weighted by Inverse Term Frequency (with lemmatization)

In [156]:
from wordfreq import word_frequency
from collections import defaultdict
from nltk.stem import WordNetLemmatizer 
import skfuzzy as fuzz

def get_length_penalty(sent_len):
    # 0.5 value for len=2
    # quickly raising to 1 at ~10 words
    return fuzz.sigmf(sent_len, 2, 0.5)

def get_word_itf(string_tokens):
    min_val = 10e-7
    return [1./max(min_val, word_frequency(token, 'en')) for token in string_tokens]

def transform_tokens(string_tokens, lemmatizer):
    return [lemmatizer.lemmatize(token.lower()) for token in string_tokens]

def calc_bow_itf_score(link, name):
    """
    Score using term frequency
    """
    lemmatizer = WordNetLemmatizer()
    scores = []
    s = link['citing_context_part']
    s_tokens = [token.text for token in s.tokens]
    context_token_set = set(transform_tokens(s_tokens, lemmatizer=lemmatizer))
    sentences = link['cited_text_parts']
    for sentence in sentences:
        sent_tokens = [token.text for token in sentence.tokens]
        sent_token_set = set(transform_tokens(sent_tokens, lemmatizer))
        if len(sent_token_set) == 0:
            scores.append(0)
            continue
        matching_tokens = sent_token_set.intersection(context_token_set)
        all_tokens = sent_token_set.union(context_token_set)
        raw_score = np.sum(get_word_itf(matching_tokens)) / np.sum(get_word_itf(all_tokens))
        # Eliminate artifacts by penalizing extremely short matches
        raw_score *= get_length_penalty(len(sent_token_set))
        if np.isclose(raw_score, 0):
            score = 0.
        else:
            score = 1./-np.log(raw_score)
        scores.append(score)
    return pd.DataFrame({
        name: scores
    }, index=[sent.to_plain_string() for sent in sentences])


###  TS SS distance metric

In [9]:
from scipy.spatial import distance
from scipy.linalg import norm
from Vector_Similarity import *

def TS_SS(vec1, vec2) :
    val = Triangle(vec1, vec2) * Sector(vec1, vec2)
    return min(1, val/3)

def Triangle(vec1, vec2) :
    theta = math.radians(Theta(vec1,vec2))
    return (norm(vec1) * norm(vec2) * math.sin(theta)) / 2

def Theta(vec1, vec2) :
    return math.acos(1 - distance.cosine(vec1, vec2)) + math.radians(10)

def Magnitude_Difference(vec1, vec2) :
    return abs(norm(vec1) - norm(vec2))

def Sector(vec1, vec2) :
    ED = distance.euclidean(vec1, vec2)
    MD = Magnitude_Difference(vec1, vec2)
    theta = Theta(vec1, vec2)
    return math.pi * math.pow((ED+MD),2) * theta/360

2.23606797749979
0.9999999999999998
0.0004639582566999478


### Prepare data

In [10]:
bert_embedding = TransformerDocumentEmbeddings('bert-base-uncased', fine_tune=False)
roberta_embedding = TransformerDocumentEmbeddings('roberta-base', fine_tune=False)
glove_embedding = DocumentPoolEmbeddings([WordEmbeddings('glove')])

In [11]:
n_train_links = int(0.5 * len(links))
n_validation_links = int(0.2 * len(links))
n_test_links = len(links) - n_train_links - n_validation_links
train_links = links[:15]
validation_links = links[15:45]
test_links = links[-n_test_links:]
#links = None

In [12]:
import copy
def prepare_link_and_val(link, val_links):
    result = {"original" : process_link(link, process_section_to_chunks)}
    for i in range(len(val_links)):
        val_link = copy.deepcopy(val_links[i])
        val_link2 = copy.deepcopy(link)
        val_link2['citation_context'] = val_link['citation_context']
        val_link['citation_context'] = link['citation_context']
        result.update({
            "val_orig_context_"+str(i) : process_link(val_link, process_section_to_chunks),
            "val_orig_paper_"+str(i) : process_link(val_link2, process_section_to_chunks)
        })
    return result

### Score pipeline

In [147]:
from plot_text_sim import plot_text_sim 
from scipy.spatial import distance
import itertools
metrics = {"cos" : distance.cosine, "ts_ss" : TS_SS}

# do it in a function to prevent memory leaks
def calculate_similarities(train_links, validation_links):
    results = []
    for link, val_links in tqdm.tqdm(list(itertools.zip_longest(train_links, validation_links))):
        
        preprocessed = prepare_link_and_val(link, val_links or [])
        bow_itf = {name: calc_bow_itf_score(link, name='bow_itf_'+name) for name, link in preprocessed.items()}
        
        bert = {name: calc_embedding_scores(link, metrics, embedding_name='bert_'+name, embedding = bert_embedding) for name, link in preprocessed.items()}
        
        glove = {name: calc_embedding_scores(link, metrics, embedding_name='glove_'+name, embedding = glove_embedding) for name, link in preprocessed.items()}
        roberta = {name: calc_embedding_scores(link, metrics, embedding_name='roberta_'+name, embedding = roberta_embedding) for name, link in preprocessed.items()}
        bow_occur = {name: calc_bow_occurrence_score(link, name='bow_occurrence_'+name) for name, link in preprocessed.items()}
        
        data_emb = {name : pd.merge(pd.merge(glove[name], bert[name], left_index = True, right_index = True), roberta[name], left_index = True, right_index = True) for name, link in preprocessed.items()}
        data_bow = {name : pd.merge(bow_occur[name], bow_itf[name], left_index = True, right_index = True) for name, link in preprocessed.items()}
        data = {name : pd.merge(data_emb[name], data_bow[name], left_index = True, right_index = True) for name, link in preprocessed.items()}
#         data = {name : bow_itf[name] for name, link in preprocessed.items()}
        results.append({
            'citing_str': preprocessed['original']['citing_str'],
            'citing_context': preprocessed['original']['citing_context'],
            'data': data,
        })

    return results, preprocessed


In [None]:

start = 12
span = 1
val_span = 2
val = np.split(validation_links[start:start+val_span*span], span)
train = train_links[start:start+span]
results, preprocessed = calculate_similarities(train, val)

In [14]:
processed_results = copy.deepcopy(results)

for result in processed_results:
    df = result['data']['original']
    df['bert_original_cos_ampl'] = df['bert_original_cos']**6
    df['comb_bow_itf_bert_cos'] = np.sqrt(df['bow_itf_original'] * df['bert_original_cos_ampl'])
    df['comb_bow_itf_bert_ts_ss'] = np.sqrt(df['bow_itf_original'] * df['bert_original_ts_ss'])
    # derive weighted moving average
    for column in df.columns:
        df[f'{column}_rolling'] = df[column].rolling(4, center=True, win_type='gaussian').mean(std=2)


NameError: name 'results' is not defined

In [None]:
processed_results = copy.deepcopy(results)

for result in processed_results:
    df = result['data']['original']
    df['comb_bow_itf_bert'] = np.sqrt(df['bow_itf_original'] * (1 - df['bert_original_cos'])**4)
    df['comb_bow_itf_glove'] = np.sqrt(df['bow_itf_original'] * (1 - df['glove_original_cos'])**4)
    df['comb_bow_itf_roberta'] = np.sqrt(df['bow_itf_original'] * (1 - df['roberta_original_cos'])**4)
    # derive weighted moving average
    for column in df.columns:
        df[f'{column}_rolling'] = df[column].rolling(4, center=True, win_type='gaussian').mean(std=2)

In [None]:
import pickle
with open("results_emb_bow.p", "wb") as file:
    pickle.dump(processed_results, file)

In [None]:
with open("preprocessed_.p", "wb") as file:
    pickle.dump(preprocessed, file)

In [None]:
def mean_results(results):
    return{
        name : {
            column : np.mean(df[column]) for column in df.columns if df[column].dtype == float
        } for name, df in results.items()
    }

In [None]:
mean_res = mean_results(results[0]["data"])

In [None]:
with open("results_mean.p", "wb") as file:
    pickle.dump(mean_res, file)

In [None]:
process_link(train_links[start], process_section_to_chunks)['citing_str'] 

In [15]:
import hiplot as hip
result_df = results[0]['data']['original'].assign(text_beginning = lambda df: df.index.str[:10])
exp = hip.Experiment.from_dataframe(result_df)
displayed_exp = exp.display()

NameError: name 'results' is not defined

In [157]:
_test_results, _test_preprocessed = calculate_similarities([test[2]], [])


  0%|          | 0/1 [00:00<?, ?it/s]

{'opposed', 'item', 'diesendruck', 'our', 'that', 'hammer', 'are', 'from', 'can', 'be', 'category', 'a', 'the', 'is', 'one', 'improved', 'to', 'available', 'researcher', 'time', 'learning', 'two', 'studying', 'motivated', 'gentner', 'data', 'weinshall', 'this', 'presented', 'article', 'of', 'hypothesis', 'publicly', 'have', 'osf.io', 'in', 'at', 'vh7pn', 'demonstrated', 'study', 'by', 'same', 'simultaneously', 'http'}
637
{'the', 'that', 'are', 'from', 'by', 'to', 'of'}
{'in', 'one', 'is', 'to', 'this', 'of'}
{'be', 'the', 'that', 'is', 'one', 'in', 'from', 'by', 'to', 'of'}
{'the'}
{'the', 'two', 'at', 'by', 'same', 'of'}
{'to', 'be', 'of', 'the'}
{'be', 'the', 'learning', 'in', 'from', 'by', 'of'}
{'by', 'is', 'of', 'the'}
{'to', 'study', 'that', 'the'}
{'to', 'are', 'the'}
{'of', 'in', 'from', 'the'}
{'to', 'of', 'our', 'the'}
{'a'}
{'a', 'the', 'that', 'is', 'by', 'to', 'of'}
{'the', 'in', 'is', 'that', 'to'}
{'the', 'have', 'study', 'to', 'of'}
{'the', 'gentner', 'have', 'that', '

{'be', 'the', 'two', 'that', 'in', 'to', 'this', 'of'}
{'the', 'in', 'at', 'that', 'are'}
{'to', 'have', 'our', 'the'}
{'the', 'in', 'one', 'to', 'of'}
{'be', 'the', 'are', 'to', 'of'}
{'a', 'the', 'are', 'to', 'of'}
{'a', 'the', 'one', 'this', 'of'}
{'be', 'the', 'that', 'at', 'are', 'from', 'to', 'can'}
{'that', 'at', 'one', 'to', 'of'}
{'a', 'the', 'our', 'is', 'in', 'to', 'can', 'of'}
{'of', 'be', 'that', 'at'}
{'this', 'hypothesis', 'study', 'the'}
{'be', 'a', 'the', 'to', 'of'}
{'the', 'in', 'at', 'to', 'this', 'of'}
{'be', 'our', 'that', 'from', 'to', 'this'}
{'is', 'the'}
{'be', 'a', 'the', 'our', 'that', 'is', 'in', 'from', 'by', 'same', 'to'}
{'in', 'a', 'the'}
{'the', 'in', 'is', 'to', 'of'}
{'be', 'in', 'that', 'the'}
{'of', 'in', 'the'}
{'the', 'learning', 'that', 'is', 'in', 'are', 'to', 'of'}
{'be', 'a', 'the', 'from', 'to', 'of'}
{'be', 'the', 'in', 'is', 'by', 'to', 'of'}
{'have'}
{'to', 'be', 'of'}
{'to', 'of', 'is'}
{'to', 'a', 'the'}
{'be', 'the', 'our', 'is', 'by',

  0%|          | 0/1 [00:11<?, ?it/s]


KeyboardInterrupt: 

### Slightly wider manual comparison

In [160]:
start = 0
span = 10
test = test_links[start:start+span]

test_results, test_preprocessed = calculate_similarities(test, [])

processed_test_results = copy.deepcopy(test_results)

for result in processed_test_results:
    df = result['data']['original']
    df['bert_original_cos_ampl'] = df['bert_original_cos']**6
    df['comb_bow_itf_bert_cos'] = np.sqrt(df['bow_itf_original'] * df['bert_original_cos_ampl'])
    df['comb_bow_itf_bert_ts_ss'] = np.sqrt(df['bow_itf_original'] * df['bert_original_ts_ss'])
    # derive weighted moving average
    for column in df.columns:
        df[f'{column}_rolling'] = df[column].rolling(4, center=True, win_type='gaussian').mean(std=2)


  0%|          | 0/10 [00:00<?, ?it/s]

{'equivalent', 'usually', 'on', 'rohrer', 'found', 'massed', 'last', 'and', 'that', 'spacing', 'repeated', 'without', 'vul', 'due', 'the', 'failed', 'pashler', 'improves', 'is', 'effort', 'report', 'only', 'to', 'well', 'refers', 'researcher', 'rely', 'been', 'documented', 'practice', 'learning', 'not', 'review', 'experiment', 'comparison', 'century', 'wixted', 'spaced', 'retention', 'significant', 'achieve', 'presentation', 'statistically', 'of', 'literature.over', 'information', 'see', 'mental', 'have', 'in', 'evaluated', 'meaningful', 'cepeda', 'by', 'effect', 'which'}
444
{'researcher', 'practice', 'the', 'in', 'to'}
set()
{'practice', 'the', 'learning', 'and', 'of'}
{'found', 'practice', 'learning', 'and', 'that', 'is', 'review'}
{'the', 'practice', 'is', 'effect', 'of'}
{'the', 'practice', 'failed', 'and', 'that', 'review', 'in', 'spacing', 'retention', 'to', 'of'}
{'information', 'practice', 'the', 'and', 'review', 'not', 'retention', 'effect', 'to', 'which', 'of'}
{'to', 'ceped

 10%|█         | 1/10 [01:28<13:18, 88.70s/it]

{'on', 'thus', 'effect', 'and', 'training', 'primate', 'present', 'rodent', 'also', 'a', 'the', 'between', 'analog', 'moreover', 'is', 'some', 'used', 'for', 'discrimination', 'we', 'ha', 'cocaine', 'been', 'mechanism', 'comparison', 'stimulus', 'allowing', 'primarily', 'examining', 'dopaminergic', 'of', 'addition', 'methamphetamine', 'mediated', 'amphetamine', 'drug', 'in', 'study', 'cathinone', 'determined', 'by', 'direct', 'examined', 'translational'}
128
{'been', 'a', 'the', 'and', 'in'}
{'on', 'the', 'in', 'is', 'cathinone', 'used', 'of'}
{'effect', 'and', 'been'}
{'and', 'of'}
{'cocaine', 'amphetamine', 'and', 'stimulus', 'effect', 'for', 'of'}
{'amphetamine', 'cocaine', 'and', 'in', 'methamphetamine'}
{'and', 'in'}
{'been', 'a', 'drug', 'and', 'of'}
{'ha', 'been', 'and', 'in', 'of'}
{'been', 'the', 'and', 'in', 'of'}
{'of', 'is'}
{'of', 'ha', 'and', 'been'}
{'on', 'mechanism', 'drug', 'and', 'is', 'effect', 'of'}
{'of', 'a', 'the', 'and', 'present', 'methamphetamine'}
{'on', 'th

 20%|██        | 2/10 [01:53<09:16, 69.60s/it]

{'opposed', 'item', 'diesendruck', 'our', 'that', 'hammer', 'are', 'from', 'can', 'be', 'category', 'a', 'the', 'is', 'one', 'improved', 'to', 'available', 'researcher', 'time', 'learning', 'two', 'studying', 'motivated', 'gentner', 'data', 'weinshall', 'this', 'presented', 'article', 'of', 'hypothesis', 'publicly', 'have', 'osf.io', 'in', 'at', 'vh7pn', 'demonstrated', 'study', 'by', 'same', 'simultaneously', 'http'}
637
{'the', 'that', 'are', 'from', 'by', 'to', 'of'}
{'in', 'one', 'is', 'to', 'this', 'of'}
{'be', 'the', 'that', 'is', 'one', 'in', 'from', 'by', 'to', 'of'}
{'the'}
{'the', 'two', 'at', 'by', 'same', 'of'}
{'to', 'be', 'of', 'the'}
{'be', 'the', 'learning', 'in', 'from', 'by', 'of'}
{'by', 'is', 'of', 'the'}
{'to', 'study', 'that', 'the'}
{'to', 'are', 'the'}
{'of', 'in', 'from', 'the'}
{'to', 'of', 'our', 'the'}
{'a'}
{'a', 'the', 'that', 'is', 'by', 'to', 'of'}
{'the', 'in', 'is', 'that', 'to'}
{'the', 'have', 'study', 'to', 'of'}
{'the', 'gentner', 'have', 'that', '

{'by', 'from', 'the'}
{'by', 'of', 'in', 'the'}
{'to', 'the'}
{'to', 'the'}
{'by', 'that', 'the'}
{'the', 'at', 'one', 'to', 'of'}
{'that', 'of', 'the'}
{'the', 'that', 'from', 'to', 'of'}
{'the'}
{'this', 'the'}
{'to', 'the'}
{'the'}
{'to', 'of', 'the'}
{'the'}
{'time', 'the', 'that', 'at', 'same', 'to'}
{'this', 'in', 'the'}
{'in', 'the'}
{'the'}
{'to', 'the'}
{'to', 'in'}
{'be', 'the'}
{'of', 'in', 'the'}
{'the', 'that', 'by', 'to', 'of'}
{'that', 'at'}
{'be', 'by', 'the'}
{'the'}
{'the'}
{'to', 'of', 'the'}
{'simultaneously', 'of', 'the'}
{'of', 'the'}
{'to', 'the'}
{'in'}
{'of', 'that', 'the'}
{'to', 'in'}
{'of', 'be', 'that', 'the'}
{'to', 'at', 'available', 'the'}
{'to', 'the'}
{'to', 'available', 'that', 'the'}
{'to', 'be', 'have'}
{'to'}
{'of', 'available', 'in', 'the'}
{'from', 'at', 'of', 'the'}
{'of', 'this', 'in', 'the'}
{'that', 'the'}
{'in', 'one', 'of', 'the'}
{'of', 'a', 'the'}
{'that', 'the'}
{'available', 'of', 'the'}
{'time', 'be', 'the', 'at', 'same', 'of'}
{'to'}


 30%|███       | 3/10 [03:53<09:52, 84.71s/it]

{'chamber', 'subject', 'here', 'each', 'decision', 'are', 'cpp', 'rodent', 'applied', 'conditioning', 'explore', 'least', 'the', 'comparing', 'allowed', 'subsequent', 'spent', 'first', 'conditioned', 'consisting', 'model', 'commonly', 'reward', 'to', 'used', 'paradigm', 'for', 'ha', 'similar', 'time', 'been', 'two', 'i.e.', 'session', 'typically', 'abuse', 'review', 'widely', 'apparatus', 'place', 'distinct', 'of', 'context', 'see', 'preference', 'drug', 'value', 'an', 'in', 'animal', 'at', 'initial', 'making', 'study', 'addiction', 'by', 'interconnected', 'different', 'measure', 'task'}
238
{'preference', 'the', 'drug', 'an', 'review', 'in', 'animal', 'conditioned', 'cpp', 'reward', 'to', 'place', 'for', 'of'}
{'the', 'drug', 'in', 'cpp', 'reward', 'to', 'for'}
{'the', 'typically', 'reward', 'to', 'of'}
{'the', 'drug', 'are', 'cpp', 'reward', 'to', 'conditioning', 'of'}
{'the', 'drug', 'reward', 'to', 'an', 'for', 'of'}
{'of', 'in', 'the'}
{'cpp', 'used'}
{'each', 'of'}
{'the', 'revie

 40%|████      | 4/10 [04:41<07:21, 73.57s/it]

{'adolescent', 'were', 'frequency', 'and', 'alcoholism', 'shown', 'disorder', 'theta', 'further', 'lower', 'also', 'gene', 'between', 'wa', 'adult', 'tf', 'group', 'alcohol', 'offspring', 'to', 'revealed', 'for', 'power', 'erp', 'related', 'these', 'control', 'delta', 'dependence', 'band', 'significant', 'implicated', 'of', 'range', 'information', 'discriminate', 'example', 'alcoholic', 'activity', 'have', 'in', 'study', 'association', 'measure', 'unique', 'an', 'provide', 'recent'}
230
{'in', 'study', 'to', 'measure', 'of'}
{'also', 'and', 'in', 'an', 'of'}
{'also', 'and', 'in', 'to', 'an', 'for', 'of'}
{'of', 'related', 'and', 'alcoholism', 'in', 'disorder', 'study', 'to', 'an', 'erp'}
{'related', 'between', 'and', 'to', 'significant'}
{'to', 'related'}
{'alcoholic', 'wa', 'in', 'alcohol', 'to', 'of'}
{'alcoholism', 'of'}
{'and', 'in', 'alcohol', 'offspring', 'of'}
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
{'also', 'of', 'alcoholic', 'in'}
{'alcoholism', 'have


invalid value encountered in float_scalars

 50%|█████     | 5/10 [05:29<05:30, 66.06s/it]

{'change', 'suffer', 'found', 'and', 'such', 'ptg', 'facial', 'are', 'fatigue', 'treatment', 'from', 'appearance', 'systematic', 'life', 'a', 'comparable', 'with', 'level', 'clinical', 'moderate', 'negatively', 'to', 'cancer', 'other', 'survivor', 'these', 'been', 'it', 'over', 'population', 'including', 'review', 'quality', 'high', 'problem', 'swallowing', 'this', 'distress', 'of', 'however', 'their', 'speech', 'have', 'pain', 'in', 'hnc', 'consistently', 'effect', 'affect', 'which'}
124
{'and', 'in', 'to', 'this', 'cancer', 'of'}
{'and', 'cancer', 'of', 'are'}
{'and', 'have', 'in', 'treatment', 'to', 'cancer', 'of'}
{'survivor', 'suffer', 'a', 'and', 'such', 'from', 'problem', 'cancer'}
{'life', 'it', 'and', 'with', 'quality', 'treatment', 'to', 'this', 'of'}
{'of', 'cancer', 'have', 'in'}
{'however', 'and', 'in', 'cancer', 'of'}
{'and', 'with', 'population', 'including', 'treatment', 'to', 'this', 'cancer', 'of'}
{'and', 'in', 'to', 'cancer', 'of'}
{'to', 'in', 'a', 'of'}
{'of', 'ca

 60%|██████    | 6/10 [05:56<03:37, 54.35s/it]

{'represent', 'and', 'such', 'sensory', 'that', 'van', 'signal', 'amygdala', 'dangerous', 'all', 'can', 'also', 'response', 'den', 'a', 'the', 'ancestral', 'moreover', 'carlsson', 'structure', 'induce', 'activate', 'to', 'specific', 'other', 'spider', 'related', 'encoding', 'been', 'these', 'tested', 'de', 'typically', 'stimulus', 'danger', 'potentially', 'of', 'snake', 'have', 'psychophysiological', 'an'}
335
{'and', 'danger'}
{'to', 'of', 'the'}
{'a', 'the', 'and', 'such', 'danger', 'to', 'of'}
{'an', 'of', 'the'}
{'and', 'have'}
{'and', 'a', 'the'}
{'stimulus', 'and'}
{'a', 'and', 'such', 'stimulus', 'to'}
{'stimulus', 'and', 'that'}
{'the', 'and', 'that', 'to', 'of'}
{'to', 'and', 'an', 'that'}
{'stimulus', 'and', 'of', 'the'}
{'and', 'the'}
{'and'}
{'response', 'a', 'the', 'sensory', 'stimulus', 'psychophysiological', 'of'}
{'a', 'the', 'such', 'to', 'other', 'of'}
{'to', 'stimulus', 'typically'}
{'the', 'and', 'that', 'stimulus', 'to', 'of'}
{'the', 'such', 'that', 'stimulus', 'o

 70%|███████   | 7/10 [07:06<02:56, 58.96s/it]

{'existence', 'indirectly', 'necessary', 'such', 'that', 'wheelchair', 'referred', 'attunement', 'demonstrating', 'the', 'a', 'variable', 'with', 'deal', 'optic', 'or', 'locomotion', 'is', 'attributing', 'unfamiliarity', 'fixation', 'to', 'optical', 'use', 'pattern', 'specific', 'rate', 'ha', 'been', 'great', 'practice', 'finding', 'condition', 'control', 'angle', 'supported', 'under', 'expansion', 'this', 'of', 'perceptual', 'effectively', 'in', 'judging', 'approaching', 'demonstrated', 'motor', 'by', 'an', 'task'}
509
{'of', 'the', 'in', 'an', 'task'}
{'the', 'in', 'that', 'to', 'task'}
{'the', 'in', 'that', 'by', 'of'}
{'this', 'in', 'the'}
{'the', 'or', 'that', 'to', 'an', 'of'}
{'the', 'such', 'to', 'an', 'of'}
{'the', 'with', 'or', 'that', 'to', 'of'}
{'a', 'the', 'or', 'such', 'with', 'that', 'is', 'in', 'to', 'of'}
set()
{'of', 'the'}
{'the', 'is', 'by', 'optical', 'of'}
{'variable'}
{'the', 'or', 'is', 'by', 'to', 'of'}
{'the', 'in', 'is', 'to', 'an', 'of'}
{'by', 'in', 'of', 

 80%|████████  | 8/10 [08:47<02:22, 71.41s/it]

{'regarding', 'current', 'research', 'leading', 'and', 'dechurch', 'le', 'our', 'leaf', 'important', 'performance', 'author', 'a', 'the', 'wa', 'practitioner', 'with', 'is', 'pearsall', 'tmms', 'order', 'to', 'therefore', 'much', 'about', 'many', 'for', 'human', 'tmm', 'mark', 'team', 'absence', 'foster', 'this', 'of', 'organization', 'information', 'how', 'little', 'suggest', 'area', 'improve', 'their', 'purpose', 'resource', 'yet', 'e.g.', 'in', 'future', 'known', 'antecedent', 'literature', 'mesmermagnus', 'an'}
166
{'area', 'and', 'team', 'for', 'of', 'organization'}
{'the', 'and', 'team', 'to', 'of', 'organization'}
{'their', 'and', 'team', 'in', 'order', 'to', 'of'}
{'information', 'the', 'their', 'and', 'team', 'to', 'of'}
{'literature', 'and', 'team', 'the'}
{'the', 'their', 'and', 'team', 'to', 'about', 'of'}
{'information', 'the', 'and', 'with', 'team', 'is', 'in', 'to', 'for', 'of'}
{'performance', 'research', 'a', 'the', 'their', 'and', 'mark', 'team', 'in', 'yet', 'anteced

 90%|█████████ | 9/10 [09:25<01:01, 61.41s/it]

{'asked', 'current', 'were', 'and', 'are', 'circumstance', 'from', 'answer', 'learner', 'respond', 'the', 'with', 'knowledge', 'main', 'holyoak', 'to', 'eye', 'two', 'analogous', 'problem', 'extension', 'meaning', 'presented', 'of', 'explain', 'how', 'reader', 'adapted', 'blind', 'test', 'analogical', 'convergence', 'gick', 'through', 'derived', 'story', 'participant', 'his', 'character'}
637
{'the', 'are', 'from', 'to', 'of'}
{'to', 'and', 'problem', 'of'}
{'the', 'and', 'analogical', 'knowledge', 'from', 'to', 'of'}
{'analogical', 'the'}
{'two', 'and', 'of', 'the'}
{'to', 'and', 'of', 'the'}
{'of', 'and', 'from', 'the'}
{'and', 'of', 'the'}
{'to', 'analogical', 'problem', 'the'}
{'how', 'the', 'and', 'are', 'problem', 'to'}
{'the', 'and', 'problem', 'from', 'of'}
{'the', 'and', 'analogical', 'problem', 'to', 'of'}
{'analogical'}
{'the', 'and', 'analogical', 'to', 'of'}
{'to', 'problem', 'the'}
{'to', 'of', 'problem', 'the'}
{'the', 'and', 'problem', 'to', 'of'}
{'the', 'are', 'proble

{'the', 'and', 'analogical', 'problem', 'to', 'of'}
{'to', 'of', 'the'}
{'the', 'two', 'problem', 'from', 'of'}
{'the', 'test', 'story', 'to', 'character', 'of'}
set()
{'to', 'of', 'the'}
{'to', 'and', 'the'}
{'and', 'the'}
{'to', 'the'}
{'the'}
{'to', 'and', 'were', 'the'}
{'the'}
{'and', 'with', 'of', 'the'}
set()
{'two', 'were', 'from', 'the'}
{'two', 'to', 'of'}
{'to', 'of', 'the'}
{'to', 'were', 'the'}
{'to', 'of'}
{'to'}
{'to', 'and', 'of', 'the'}
{'and', 'of', 'the'}
{'to', 'and', 'of', 'the'}
{'and', 'the'}
{'two', 'and', 'from', 'the'}
{'to', 'the'}
{'and'}
{'from', 'the'}
{'and', 'of', 'the'}
{'through', 'to', 'the'}
{'to', 'the'}
{'his', 'the'}
{'his', 'to', 'of', 'the'}
{'of', 'the'}
{'were', 'the', 'and', 'from', 'his', 'to', 'of'}
{'the'}
{'the'}
{'to', 'the'}
{'the'}
{'the', 'and', 'his', 'to', 'of'}
{'and', 'the'}
{'to', 'the'}
{'and', 'the'}
{'the'}
{'and', 'the'}
{'his', 'to', 'and', 'the'}
{'to'}
{'the'}
{'of', 'the'}
{'to', 'of', 'the'}
{'and'}
{'the'}
{'the'}
{'the

100%|██████████| 10/10 [11:28<00:00, 68.87s/it]


In [164]:
import pickle
with open("results_emb_bow_test.p", "wb") as file:
    pickle.dump(processed_test_results, file)

In [103]:
from scipy import stats
stats.spearmanr(df['bert_original_cos_ampl'],df['bert_original_ts_ss'] )

SpearmanrResult(correlation=0.9898346133962346, pvalue=0.0)

In [161]:

def display_best(df, column):
    print(f'\n-- {column.upper()} --')
    print(*[f'{i+1}: "{chunk}"' 
            for i, chunk in enumerate(df[column].sort_values(ascending=False).index.values.tolist()[:5])
           ],
          sep='\n'
         )
    
for result in processed_test_results:
    result['citing_str']
    result['citing_context']
    df = result['data']['original']
    display_best(df, 'bow_itf_original')
    display_best(df, 'bert_original_ts_ss')
    display_best(df, 'comb_bow_itf_bert_ts_ss')


'Cepeda, Pashler, Vul, Wixted, & Rohrer (2006)'

'rely on the effects of mental effort, comparisons have not been well documented in the literature.Over the last century learning researchers have found that spaced practice improves information retention (see review by Dempster, 1988) . In a report by Cepeda, Pashler, Vul, Wixted, & Rohrer (2006) of 271 spaced practice experiments, only 4.4% failed to achieve meaningful and statistically significant spacing effects. Information retention due to spaced practice is usually evaluated in comparison to massed practice, which refers to repeated presentation of equivalent information without'


-- BOW_ITF_ORIGINAL --
1: "The term spacing effect refers to enhanced learning during spaced as compared to massed study episodes for given item"
2: "Only of comparisons of massed and spaced performance showed no effect or negative effect from spacing making the spacing effect quite robust"
3: "The interaction between magnitude of the spacing effect and retention interval was examined by calculating the difference in performance between massed and spaced presentations and collapsing over each of seven retention interval ranges"
4: "The spacing effect hinges upon comparison of massed and spaced presentations of to be learned item"
5: "Because these and other theories are able to make differential predictions for spaced vs massed presentations as well as for changes in lag our theoretical discussion applies to both spacing and lag effects"

-- BERT_ORIGINAL_TS_SS --
1: "In spite of abundant evidence for distributed practice benefits number of empirical studies e.g and recent review of t

'(Gatch et al. 2015;'

'used in the present study is primarily mediated by dopaminergic mechanisms. Moreover, cocaine has also been used as the training stimulus in rodent drug discrimination studies examining some of the amphetamine and cathinone analogs examined in the present study (Gatch et al. 2015; Gatch et al. 2013; Glennon and Young 1984b) , thus allowing for a direct translational comparison between rodents and non-human primates. For comparison, we also determined the effects of a 3,4-methylenedioxy addition on the cocaine-like effects of methamphetamine (3,4-methylenedioxymethamphetamine;'


-- BOW_ITF_ORIGINAL --
1: "Similarly methamphetamine amphetamine and cocaine substitute in methcathinone trained rats"
2: "Other cathinone compounds including mephedrone methylone butylone mephedrone methylenedioxypyrovalerone MDPV and FMC have been reported to produce increases in locomotor activity see review by and fully substitute for the discriminative stimulus effects of cocaine amphetamine and methamphetamine Dal Cason Gatch"
3: "These findings are in agreement with earlier findings that several abused cathinones fully substituted for the discriminative stimulus effects of cocaine amphetamine and methamphetamine Dal Cason Gatch and that methamphetamine and MDMA fully substitute in rats trained to discriminate MDPV"
4: "Table Curve was used to estimate the peak ambulation following administration of each cathinone analog"
5: "All of the compounds had comparable potencies when tested in methamphetamine or in cocaine trained rats"

-- BERT_ORIGINAL_TS_SS --
1: "These findings are 

'Gick & Holyoak, 1983;'

'that motivated our hypothesis is that researchers have demonstrated that learning can be improved by studying two items of the same category simultaneously, as opposed to one at a time (Catrambone & Holyoak, 1989; Gentner, Loewenstein, & Thompson, 2003; Gick & Holyoak, 1983; Hammer, Diesendruck, Weinshall, & The data from the studies presented in this article are publicly available from: https://osf.io/vh7pn.'


-- BOW_ITF_ORIGINAL --
1: "For example Gentner and Gentner in press have demonstrated that alternative analogies known by subjects prior to the experiment produce systematically varying patterns of difficulty among types of electricity problems"
2: "However the present study provides evidence against this hypothesis"
3: "More generally the function of an analogy is to derive new solution hypothesis or prediction this is done by finding an initial partial mapping between the two analogs and then extending the mapping by retrieving or creating additional knowledge about the analog that was initially less well understood"
4: "This pattern is in accord with our hypothesis regarding analog similarity while dissimilar analogs have greater potential to yield optimal schemas they are also more likely to fail to produce any useful schema"
5: "Experiment thus yielded no support for the hypothesis that augmenting the story analog with verbal principle would increase analogical transfer"

-- BERT

'Bardo and Bevins, 2000;'

'decision making task, similar to the conditioned place preference (CPP) paradigm widely applied to the study of addiction in animal models. CPP has been commonly used to measure the reward value of different drugs of abuse (for reviews, see Bardo and Bevins, 2000; Tzschentke, 2007) . Here, drug-free subjects (typically rodents), are first allowed to explore an apparatus consisting of at least two distinct interconnected chambers to measure initial preference (i.e., by comparing time spent in each context). In subsequent conditioning sessions,'


-- BOW_ITF_ORIGINAL --
1: "The purpose of this review is to provide an evaluation of conditioned place preference CPP as an experimental protocol for measuring drug reward in laboratory animals"
2: "Another limitation of CPP stems from the tendency of animals to prefer one of the two distinct contexts of the apparatus before conditioning occurs"
3: "Since it is not clear what class of behaviors are reinforced during CPP conditioning trials the term reward seems more appropriate to describe drug induced CPP"
4: "Despite this advantage it has been argued that CPP is not particularly sensitive to changes in drug dose"
5: "Since the early there has been some disagreement about whether drug CPP and self administration represent two alternative methods for measuring common reward process"

-- BERT_ORIGINAL_TS_SS --
1: "Although methodological details differ among laboratories typical CPP experiment includes differentially pairing two distinct sets of environmental contextual cues with the s

'Rangaswamy et al., 2007)'

'TF measures of ERP activity. Recent studies, for example, have revealed an association between P3-related delta and theta and alcoholism. Power in these frequency ranges was lower in adult alcoholics and in high-risk adolescent and adult offspring of alcoholics Rangaswamy et al., 2007) . Delta and theta bands were also shown to provide unique information to discriminate between alcoholic and control groups . Further, significant associations between event-related activity in these bands and genes implicated in alcohol dependence and related disorders have'


-- BOW_ITF_ORIGINAL --
1: "This study was designed to evaluate the neural oscillatory activity in the theta and delta frequency ranges within the temporal window of the P300 response to the visual target in adolescent offspring of alcoholics who are considered at high risk for developing alcoholism"
2: "The primary purpose of the present study was to assess if the low theta and delta band power seen previously in the alcohol dependent subjects were trait markers for alcoholism"
3: "The present study strongly indicates adolescent offspring of alcoholics HR appear to have weaker or possibly less organized theta system than the control LR subjects and reduced post stimulus theta power may be strong endophenotype for alcoholism and related disorders"
4: "To summarize the theta and delta post stimulus oscillations are remarkably reduced in adolescent offspring of alcoholics"
5: "We have previously demonstrated that power in theta and delta bands in the ms window were significantly lower in

'[7]'

'its treatment, such as pain, fatigue, problems with speech and swallowing, and changes in facial appearance. These effects negatively affect their health-related quality of life [6] , and over 25% of HNC survivors suffer from clinical levels of distress [7] . However, in this population, moderate to high levels of PTG have consistently been found [8] [9] [10] , which are comparable to levels of PTG in other cancer populations [11] [12] [13] . In a systematic review including'


-- BOW_ITF_ORIGINAL --
1: "Many cancer patients and survivors suffer from psychological problems such as depression"
2: "Depression can easily be overlooked because symptoms of cancer and its treatment resemble neurovegetative symptoms of depression such as fatigue loss of appetite and sleep disturbance"
3: "Other prospective studies showed that there are distinct patterns regarding the course of psychological distress ranging from resilience no distress before or after treatment recovery elevated distress followed by return to normal delayed recovery and persisting distress"
4: "Conversely in patients with symptoms of depression assessment by diagnostic interviews may lead to underrecognition of unmet needs for psychological support as some oncologists may be insufficiently skilled to identify psychological distress and perceived social support in patients"
5: "value of indicates no observed heterogeneity and larger values show increasing heterogeneity with as low as moderate and as 

'(Öhman et al., 2001;'

'al., 2009 Tamietto et al., , 2015 de Gelder et al., 2010 de Gelder et al., , 2012 Van den Stock et al., 2011a , 2015 . Moreover, stimuli that represent an ancestral danger, such as snakes and spiders (Öhman et al., 2001; Öhman, 2009; Troiani and Schultz, 2013) have also been tested. Typically, all these stimuli can induce specific psychophysiological responses and can activate the amygdala and other structures related to the sensory encoding of potentially dangerous signals (Carlsson et al.,'


-- BOW_ITF_ORIGINAL --
1: "This model was based on series of studies demonstrating psychophysiological responses to fear stimuli snakes spiders and angry faces that because of backward masking were blocked from conscious processing see and for reviews"
2: "Because it has been demonstrated that small animal stimuli can control psychophysiological responses from mere preattentive level of information processing e.g it is interesting to examine whether the coupling of automatic emotional activation and efficient capture of attention that holds for angry faces also is true for another class of evolutionary fear relevant stimuli snakes and spiders"
3: "This stimulus driven call for processing resources is associated with phasic psychophysiological activation manifested as orienting responses which facilitates further sensory processing of the stimulus"
4: "Furthermore there is good evidence to support that angry but not happy faces can activate psychophysiological responses even though pre

'(Michaels and de Vries, 1998;'

'locomotion. Attributing the specific patterns of fixation under the wheelchair condition to unfamiliarity with wheelchair use is indirectly supported by the findings demonstrating that a great deal of practice is necessary to effectively use optical variables in motor control (Michaels and de Vries, 1998; Jacobs et al., 2001; Fajen and Devaney, 2006) . This is referred to as perceptual attunement. The existence of perceptual attunement has been demonstrated with perceptual-motor tasks, such as judging optic angles or the expansion rate of an approaching'


-- BOW_ITF_ORIGINAL --
1: "If the goal of perceptual learning is the discovery of perceptual variable that is sufficient to satisfy the task demands on judgments or actions in that ecology our results may provide some insight into the process by which access is gained to such variables"
2: "Perceptual theories in turn differ in their claims about how such information might or might not be exploited three theories are of interest in this article"
3: "Educating attention also can involve the development of smart perceptual device"
4: "As causal support we include information available for detection and smart perceptual devices suitable for the detection of that information"
5: "Behind the direct and directed perception versions are carefully articulated arguments about the meaning of the term specification the nature of perceptual information and recognition of if not emphasis on role for perceptual learning"

-- BERT_ORIGINAL_TS_SS --
1: "However even when the kinematics specify the ki

'Pearsall, Ellis, & Bell, 2010)'

'Yet much less is known about TMM antecedents, leading many authors to suggest this as an important area for future research (e.g., DeChurch & MesmerMagnus, 2010; Lim & Klein, 2006; Marks, Zaccaro, & Mathieu, 2000; Mohammed et al., 2010; Pearsall, Ellis, & Bell, 2010) . This absence of information in the literature leaves organizations and human resource practitioners with little information regarding how to foster TMMs in order to improve the performance of their teams. Therefore, our purpose in the current research was'


-- BOW_ITF_ORIGINAL --
1: "Although significant research has focused on exploring the potential benefits of these two cognitions on outcomes such as team performance and satisfaction e.g Edwards Day as well as identifying their potential antecedents e.g Marks Marks Sabella the processes and developmental phases underlying their emergence in newly formed teams have yet to be explored e.g Ilgen"
2: "Next teams performed an additional minute experimental task during which team performance was assessed"
3: "While they were aware that the vehicles and knowledge about the tracks was divided among the team members they did not initially know which area of responsibility and expertise the other members possessed"
4: "Each team member was given sheet that illustrated his or her own specific role which they were able to keep during the experimental task"
5: "The individual members responses were averaged to create team score which was standardized for comparison"

-- BERT_ORIGINAL_TS_SS --
1: "

'Gick and Holyoak (1983)'

'The knowledge extension test presented learners with a convergence problem adapted from Gick and Holyoak (1983) . Through the eyes of the main character two analogical stories are presented to the reader. Participants were asked to explain how the character derived meaning from the analogous stories to respond to his current circumstance. Answers were blind'


-- BOW_ITF_ORIGINAL --
1: "Gick and Holyoak tested variety of such control groups which received either an irrelevant story that was not analogous to the radiation problem or no story at all no more than of such subjects ever produced the convergence solution"
2: "We should note however that Gick and Holyoak used both between subject and within subject procedures in different experiments and obtained essentially the same estimates of the percentage of subjects who could apply the analogy compare their Experiments II and"
3: "As noted above Gick and Holyoak presented an initial story analogy to subjects in the guise of recall experiment"
4: "The basic procedure used to separate spontaneous noticing of an analogy from its application after hint was adapted from Gick and Holyoak and was used in all of the present experiments"
5: "We will discuss the Gick and Holyoak study in more detail since it led directly to the present investigation"

-- BERT_ORIGINAL_TS_SS --
1: "Subjects were told 

In [162]:



for result in processed_test_results:
    result['citing_str']
    result['citing_context']
    df = result['data']['original']
    _ = plot_text_sim(df['bow_itf_original'], df.index, title='bow_itf')
    _ = plot_text_sim(df['bert_original_cos_ampl'], df.index, title='bert_cos')
    _ = plot_text_sim(df['bert_original_ts_ss'], df.index, title='bert_ts_ss')
    _ = plot_text_sim(df['comb_bow_itf_bert_cos'], df.index, title='comb_bow_itf_bert_cos')
    _ = plot_text_sim(df['comb_bow_itf_bert_ts_ss'], df.index, title='comb_bow_itf_bert_ts_ss')



'Cepeda, Pashler, Vul, Wixted, & Rohrer (2006)'

'rely on the effects of mental effort, comparisons have not been well documented in the literature.Over the last century learning researchers have found that spaced practice improves information retention (see review by Dempster, 1988) . In a report by Cepeda, Pashler, Vul, Wixted, & Rohrer (2006) of 271 spaced practice experiments, only 4.4% failed to achieve meaningful and statistically significant spacing effects. Information retention due to spaced practice is usually evaluated in comparison to massed practice, which refers to repeated presentation of equivalent information without'

'(Gatch et al. 2015;'

'used in the present study is primarily mediated by dopaminergic mechanisms. Moreover, cocaine has also been used as the training stimulus in rodent drug discrimination studies examining some of the amphetamine and cathinone analogs examined in the present study (Gatch et al. 2015; Gatch et al. 2013; Glennon and Young 1984b) , thus allowing for a direct translational comparison between rodents and non-human primates. For comparison, we also determined the effects of a 3,4-methylenedioxy addition on the cocaine-like effects of methamphetamine (3,4-methylenedioxymethamphetamine;'

'Gick & Holyoak, 1983;'

'that motivated our hypothesis is that researchers have demonstrated that learning can be improved by studying two items of the same category simultaneously, as opposed to one at a time (Catrambone & Holyoak, 1989; Gentner, Loewenstein, & Thompson, 2003; Gick & Holyoak, 1983; Hammer, Diesendruck, Weinshall, & The data from the studies presented in this article are publicly available from: https://osf.io/vh7pn.'

'Bardo and Bevins, 2000;'

'decision making task, similar to the conditioned place preference (CPP) paradigm widely applied to the study of addiction in animal models. CPP has been commonly used to measure the reward value of different drugs of abuse (for reviews, see Bardo and Bevins, 2000; Tzschentke, 2007) . Here, drug-free subjects (typically rodents), are first allowed to explore an apparatus consisting of at least two distinct interconnected chambers to measure initial preference (i.e., by comparing time spent in each context). In subsequent conditioning sessions,'

'Rangaswamy et al., 2007)'

'TF measures of ERP activity. Recent studies, for example, have revealed an association between P3-related delta and theta and alcoholism. Power in these frequency ranges was lower in adult alcoholics and in high-risk adolescent and adult offspring of alcoholics Rangaswamy et al., 2007) . Delta and theta bands were also shown to provide unique information to discriminate between alcoholic and control groups . Further, significant associations between event-related activity in these bands and genes implicated in alcohol dependence and related disorders have'

'[7]'

'its treatment, such as pain, fatigue, problems with speech and swallowing, and changes in facial appearance. These effects negatively affect their health-related quality of life [6] , and over 25% of HNC survivors suffer from clinical levels of distress [7] . However, in this population, moderate to high levels of PTG have consistently been found [8] [9] [10] , which are comparable to levels of PTG in other cancer populations [11] [12] [13] . In a systematic review including'

'(Öhman et al., 2001;'

'al., 2009 Tamietto et al., , 2015 de Gelder et al., 2010 de Gelder et al., , 2012 Van den Stock et al., 2011a , 2015 . Moreover, stimuli that represent an ancestral danger, such as snakes and spiders (Öhman et al., 2001; Öhman, 2009; Troiani and Schultz, 2013) have also been tested. Typically, all these stimuli can induce specific psychophysiological responses and can activate the amygdala and other structures related to the sensory encoding of potentially dangerous signals (Carlsson et al.,'

'(Michaels and de Vries, 1998;'

'locomotion. Attributing the specific patterns of fixation under the wheelchair condition to unfamiliarity with wheelchair use is indirectly supported by the findings demonstrating that a great deal of practice is necessary to effectively use optical variables in motor control (Michaels and de Vries, 1998; Jacobs et al., 2001; Fajen and Devaney, 2006) . This is referred to as perceptual attunement. The existence of perceptual attunement has been demonstrated with perceptual-motor tasks, such as judging optic angles or the expansion rate of an approaching'

'Pearsall, Ellis, & Bell, 2010)'

'Yet much less is known about TMM antecedents, leading many authors to suggest this as an important area for future research (e.g., DeChurch & MesmerMagnus, 2010; Lim & Klein, 2006; Marks, Zaccaro, & Mathieu, 2000; Mohammed et al., 2010; Pearsall, Ellis, & Bell, 2010) . This absence of information in the literature leaves organizations and human resource practitioners with little information regarding how to foster TMMs in order to improve the performance of their teams. Therefore, our purpose in the current research was'

'Gick and Holyoak (1983)'

'The knowledge extension test presented learners with a convergence problem adapted from Gick and Holyoak (1983) . Through the eyes of the main character two analogical stories are presented to the reader. Participants were asked to explain how the character derived meaning from the analogous stories to respond to his current circumstance. Answers were blind'