In [1]:
import os
import torch
import torch.nn as nn
from tqdm.auto import tqdm
from dotenv import load_dotenv
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoConfig
from sentence_transformers import SentenceTransformer
from trainer.loss import ContrastiveLoss, ArcFace, MultipleNegativeRankingLoss

load_dotenv()

def login_to_huggingface() -> None:
    login(os.environ.get("HUGGINGFACE_API_KEY"))
    return

login_to_huggingface()

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/qcqced/.cache/huggingface/token
Login successful


In [2]:
import pandas as pd
import numpy as np

df = pd.read_csv('dataset_class/datafolder/arxiv_qa/total/metric_learning_total_paper_chunk.csv')
df

  df = pd.read_csv('dataset_class/datafolder/arxiv_qa/total/metric_learning_total_paper_chunk.csv')


Unnamed: 0,paper_id,doc_id,title,doc,question,label
0,2307.12976,2307.12976_0,evaluating the ripple effects of knowledge edi...,evaluating the ripple effects of knowledge edi...,,0
1,2307.12976,2307.12976_2,evaluating the ripple effects of knowledge edi...,modern language models capture a large body of...,,1
2,2307.12976,2307.12976_3,evaluating the ripple effects of knowledge edi...,"construct rippleedits, a diagnos- tic benchmar...",,2
3,2307.12976,2307.12976_4,evaluating the ripple effects of knowledge edi...,figure 1: illustration of the evaluation scope...,,3
4,2307.12976,2307.12976_5,evaluating the ripple effects of knowledge edi...,introduction\nmodel may be incorrect or become...,,4
...,...,...,...,...,...,...
214028,2303.11749,2303.11749_58,detecting everything in the open world: toward...,<table><caption>table 5. comparison with exist...,,214028
214029,2303.11749,2303.11749_59,detecting everything in the open world: toward...,<table><caption>table 6. ablation study on reg...,,214029
214030,2303.11749,2303.11749_60,detecting everything in the open world: toward...,<table><thead><th>decouple</th><th>proposal ge...,,214030
214031,2303.11749,2303.11749_61,detecting everything in the open world: toward...,"pij = 1 1 + \text{exp}_{}(−zt ijej/τ ) /πγ j ,...",,214031


In [3]:
""" do sampling, get 2 unique doc_id from each paper_id """

df_sample = df.groupby('paper_id').apply(lambda x: x.sample(2)).reset_index(drop=True)
df_sample

Unnamed: 0,paper_id,doc_id,title,doc,question,label
0,909.09100,0909.0910_182,chaos in partial differential equations.pdf,9.1. a lax pair for 2d euler equation\nthe 2d ...,,55302
1,909.09100,0909.0910_398,chaos in partial differential equations.pdf,\text{2b}_{}(i) qceiπe∓ \text{i2}_{}(ϑ1− ϑ2) ....,,55433
2,1011.52700,1011.5270_10,classifying clustering schemes.pdf,remark 1.1. the reader may not be familiar wit...,,152452
3,1011.52700,1011.5270_86,classifying clustering schemes.pdf,classifying clustering schemes\n7.3. functoria...,,152527
4,1206.55380,1206.5538_68,representation learning: a review and new pers...,6.1 directed graphical models directed latent ...,,208706
...,...,...,...,...,...,...
5199,2312.09243,2312.09243_89,occnerf: advancing 3d occupancy prediction in ...,"p = [f −1 x (xpc), f −1 y (ypc), f −1 z (xpc)]...",,210245
5200,2312.09244,2312.09244_75,helping or herding? reward model ensembles mit...,<table><thead><th>scale</th><th>ensemble</th><...,,179398
5201,2312.09244,2312.09244_25,helping or herding? reward model ensembles mit...,"the story changes, however, when we move to ou...",,179349
5202,2312.09254,2312.09254_27,revisiting depth completion from a stereo matc...,ddad [16]. the ddad dataset captures driving s...,,195376


In [4]:
df_sample.to_csv('dataset_class/datafolder/arxiv_qa/total/sampling_metric_learning_total_paper_chunk.csv', index=False)

In [23]:
""" Get total list of paper """

paper_list = os.listdir('api/arxiv/train/')
pid_list = [pid.split('_')[0] for pid in paper_list]

In [29]:
""" Get the list from currently not in db yet """

df_paper = df['paper_id'].astype(str).unique().tolist()
not_in_db = [pid for pid in tqdm(pid_list) if pid not in df_paper]

print(f"Total number of papers: {len(not_in_db)}")

  0%|          | 0/4738 [00:00<?, ?it/s]

Total number of papers: 2311


In [31]:
import pickle

def save_pkl(input_dict, filename: str) -> None:
    """ Save pickle file
    """
    with open(f'{filename}.pkl', 'wb') as file:
        pickle.dump(input_dict, file)

save_pkl(df_paper, 'api/arxiv/exist_list.pkl')

In [30]:
import os

def delete_files(directory, pid):
    """
    특정 조건에 맞는 파일을 삭제하는 함수

    Args:
    directory (str): 파일을 검색할 디렉토리 경로
    condition (function): 파일명이 주어졌을 때 True를 반환하면 파일을 삭제하는 조건 함수
    """
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        if os.path.isfile(file_path) and filename.startswith(pid):
            print(f"Deleting {file_path}")
            os.remove(file_path)

directory_path = 'api/arxiv/train/'
for pid in pid_list:
    delete_files(directory_path, pid)


Deleting api/arxiv/train/2311.15722_GLIME: General, Stable and Local LIME Explanation.pdf
Deleting api/arxiv/train/2305.14236_REC-MV: REconstructing 3D Dynamic Cloth from Monocular Videos.pdf
Deleting api/arxiv/train/2312.06091_Learning Unknown Intervention Targets in Structural Causal Models from Heterogeneous Data.pdf
Deleting api/arxiv/train/2306.05637_On the Importance of Feature Decorrelation for Unsupervised Representation Learning in Reinforcement Learning.pdf
Deleting api/arxiv/train/2306.17492_Preference Ranking Optimization for Human Alignment.pdf
Deleting api/arxiv/train/2201.11990_Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B, A Large-Scale Generative Language Model.pdf
Deleting api/arxiv/train/2311.14468_Efficient Gradient Estimation via Adaptive Sampling and Importance Sampling.pdf
Deleting api/arxiv/train/2308.12202_Curriculum Learning with Adam: The Devil Is in the Wrong Details.pdf
Deleting api/arxiv/train/2308.11184_ReFit: Recurrent Fitting Network fo

In [2]:
""" test for calculating loss with batch instance """

query_h = torch.randn(4, 8)  # batch size, hidden state
context_h = torch.randn(4, 8)  # batch size, hidden state
cl_label = torch.tensor([1, 0, 1, 0])
arcface_label = torch.tensor([0,1,2,3])

print(f"query's hidden state is: {query_h}")
print(f"context's hidden state is: {context_h}")

cl = ContrastiveLoss()
cl_loss = cl(query_h, context_h, cl_label)

print(f"contrastive loss is: {cl_loss}")

mnrl = MultipleNegativeRankingLoss()  # make instance in ram
mnrl_loss = mnrl(query_h, context_h)

print(f"multiple negative ranking loss is: {mnrl_loss}")  # so large than contrastive loss, because of scaler constant value

arcface = ArcFace(dim_model=8, num_classes=4)
ce_loss = nn.CrossEntropyLoss(reduction='mean')
arc_query, arc_context = arcface(query_h, arcface_label), arcface(context_h, arcface_label)
arc_loss = ce_loss(arc_query, arcface_label) + ce_loss(arc_context, arcface_label)

print(f"arcface loss is: {arc_loss}")

query's hidden state is: tensor([[ 0.9093,  0.4960,  0.5848,  1.1696, -1.1214,  0.7545, -0.7490,  0.3394],
        [ 0.8048, -0.3955,  1.3547, -2.1583,  0.0388,  0.3670, -1.3757, -0.2926],
        [-0.7121,  0.2876,  0.1503,  0.0779, -0.3757, -0.8671, -0.3234, -0.1743],
        [-0.6929, -0.6078,  0.4177, -0.3774,  0.0104,  1.0541, -2.5757, -0.0502]])
context's hidden state is: tensor([[-0.9991,  0.1006, -1.2891, -0.9167,  0.0075,  1.4130,  1.4518, -0.9321],
        [ 0.2754, -1.3103,  0.5436,  0.9745, -0.6077, -1.1893, -0.0947,  1.4513],
        [ 0.1037,  1.4833,  0.6821,  0.4362, -0.7200, -0.1383,  0.2761,  0.0455],
        [ 1.1397, -1.0453,  0.2057, -2.4381, -0.8149,  0.4375, -1.7154, -0.7276]])
contrastive loss is: 0.3615054190158844
multiple negative ranking loss is: 10.132601737976074


AssertionError: Torch not compiled with CUDA enabled

In [3]:
test_model = torch.load('./saved/arxiv_clm_4096_llama2_7b_hf_state_dict.pth')
for key in test_model.keys():
    print(key)

In [6]:
list(test_model.keys())[-1]

In [None]:
model = AutoModelForCausalLM.from_config(AutoConfig.from_pretrained('meta-llama/Llama-2-7b-hf'))

for k in model.state_dict().keys():
    print(k)

In [None]:
""" rename model attr name  """

In [13]:
import re

def convert_to_latex(text: str) -> str:
    pattern = r'(\b\w+\b)(\d*)(?=\()'
    return re.sub(pattern, r'\\text{\1}_{\2}', text)

example_text = """FFN(x) = max(0, xW1 + b1)W2 + b2 (2)"""

latex_lines = convert_to_latex(example_text.strip())
print(latex_lines)

In [2]:
import os
import pandas as pd

In [5]:
base_path = "dataset_class/datafolder/arxiv_qa/partition/"
df_list = os.listdir(base_path)

df = pd.DataFrame(columns=['paper_id', 'doc_id', 'title', 'doc'])
for sub_url in df_list:
    sub_df = pd.read_csv(base_path + sub_url)
    df = pd.concat([df, sub_df])

df

Unnamed: 0,paper_id,doc_id,title,doc,question
0,2307.12976,2307.12976_0,Evaluating the Ripple Effects of Knowledge Edi...,Evaluating the Ripple Effects of Knowledge Edi...,
1,2307.12976,2307.12976_1,Evaluating the Ripple Effects of Knowledge Edi...,Abstract,
2,2307.12976,2307.12976_2,Evaluating the Ripple Effects of Knowledge Edi...,Modern language models capture a large body of...,
3,2307.12976,2307.12976_3,Evaluating the Ripple Effects of Knowledge Edi...,"construct RIPPLEEDITS, a diagnos- tic benchmar...",
4,2307.12976,2307.12976_4,Evaluating the Ripple Effects of Knowledge Edi...,Figure 1: Illustration of the evaluation scope...,
...,...,...,...,...,...
58,2303.11749,2303.11749_58,Detecting Everything in the Open World: Toward...,<table><caption>Table 5. Comparison with exist...,
59,2303.11749,2303.11749_59,Detecting Everything in the Open World: Toward...,<table><caption>Table 6. Ablation study on reg...,
60,2303.11749,2303.11749_60,Detecting Everything in the Open World: Toward...,<table><thead><th>decouple</th><th>proposal ge...,
61,2303.11749,2303.11749_61,Detecting Everything in the Open World: Toward...,"pij = 1 1 + \text{exp}_{}(−zT ijej/τ ) /πγ j ,...",


In [6]:
output_path = 'dataset_class/datafolder/arxiv_qa/total/total_paper_chunk.csv'
df.to_csv(output_path, index=False)

In [5]:
""" load dataset for testing make prompt to generate question-document pairs dataset """
import os
import numpy as np
import pandas as pd
import google.generativeai as genai

from tqdm.auto import tqdm
from generate_question.generate_question import google_gemini_api


df = pd.read_csv('dataset_class/datafolder/arxiv_qa/total/metric_learning_total_paper_chunk.csv')
df

  df = pd.read_csv('dataset_class/datafolder/arxiv_qa/total/metric_learning_total_paper_chunk.csv')


Unnamed: 0,paper_id,doc_id,title,doc,question,label
0,2307.12976,2307.12976_0,evaluating the ripple effects of knowledge edi...,evaluating the ripple effects of knowledge edi...,,0
1,2307.12976,2307.12976_2,evaluating the ripple effects of knowledge edi...,modern language models capture a large body of...,,1
2,2307.12976,2307.12976_3,evaluating the ripple effects of knowledge edi...,"construct rippleedits, a diagnos- tic benchmar...",,2
3,2307.12976,2307.12976_4,evaluating the ripple effects of knowledge edi...,figure 1: illustration of the evaluation scope...,,3
4,2307.12976,2307.12976_5,evaluating the ripple effects of knowledge edi...,introduction\nmodel may be incorrect or become...,,4
...,...,...,...,...,...,...
214028,2303.11749,2303.11749_58,detecting everything in the open world: toward...,<table><caption>table 5. comparison with exist...,,214028
214029,2303.11749,2303.11749_59,detecting everything in the open world: toward...,<table><caption>table 6. ablation study on reg...,,214029
214030,2303.11749,2303.11749_60,detecting everything in the open world: toward...,<table><thead><th>decouple</th><th>proposal ge...,,214030
214031,2303.11749,2303.11749_61,detecting everything in the open world: toward...,"pij = 1 1 + \text{exp}_{}(−zt ijej/τ ) /πγ j ,...",,214031


In [3]:
""" remove .pdf in the title """

# df['title'] = df['title'].apply(lambda x: x.replace('.pdf', ''))
# df

Unnamed: 0,paper_id,doc_id,title,doc,question,label
0,2307.12976,2307.12976_0,evaluating the ripple effects of knowledge edi...,evaluating the ripple effects of knowledge edi...,,0
1,2307.12976,2307.12976_2,evaluating the ripple effects of knowledge edi...,modern language models capture a large body of...,,1
2,2307.12976,2307.12976_3,evaluating the ripple effects of knowledge edi...,"construct rippleedits, a diagnos- tic benchmar...",,2
3,2307.12976,2307.12976_4,evaluating the ripple effects of knowledge edi...,figure 1: illustration of the evaluation scope...,,3
4,2307.12976,2307.12976_5,evaluating the ripple effects of knowledge edi...,introduction\nmodel may be incorrect or become...,,4
...,...,...,...,...,...,...
214028,2303.11749,2303.11749_58,detecting everything in the open world: toward...,<table><caption>table 5. comparison with exist...,,214028
214029,2303.11749,2303.11749_59,detecting everything in the open world: toward...,<table><caption>table 6. ablation study on reg...,,214029
214030,2303.11749,2303.11749_60,detecting everything in the open world: toward...,<table><thead><th>decouple</th><th>proposal ge...,,214030
214031,2303.11749,2303.11749_61,detecting everything in the open world: toward...,"pij = 1 1 + \text{exp}_{}(−zt ijej/τ ) /πγ j ,...",,214031


In [7]:
df = df[1500:1600]
question = [google_gemini_api(title=row['title'], context=row['doc'], foundation_model='gemini-1.5-flash') for i, row in tqdm(df.iterrows(), total=len(df))]
df['question'] = question
df

  0%|          | 0/100 [00:00<?, ?it/s]

500 An internal error has occurred. Please retry or report in https://developers.generativeai.google/guide/troubleshooting
500 An internal error has occurred. Please retry or report in https://developers.generativeai.google/guide/troubleshooting
500 An internal error has occurred. Please retry or report in https://developers.generativeai.google/guide/troubleshooting


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['question'] = question


Unnamed: 0,paper_id,doc_id,title,doc,question,label
1500,2308.13989,2308.13989_75,ldl: line distance functions for panoramic loc...,line transformer-based approach based on yoon ...,How does the LDL approach utilize line transfo...,1500
1501,2308.13989,2308.13989_76,ldl: line distance functions for panoramic loc...,d. details on experimental setup\nin this sect...,How does the LDL method handle illumination va...,1501
1502,2308.13989,2308.13989_77,ldl: line distance functions for panoramic loc...,0.7 0 o\ncandidate pose search evaluation we c...,How does LDL compare to NetVLAD in terms of ca...,1502
1503,2308.13989,2308.13989_78,ldl: line distance functions for panoramic loc...,"for other regions. for the inversion network, ...",How does the LDL approach differ from previous...,1503
1504,2308.13989,2308.13989_79,ldl: line distance functions for panoramic loc...,image in the stanford 2d-3d-s dataset [4] and ...,How does the line distance function (LDL) appr...,1504
...,...,...,...,...,...,...
1595,2303.01999,2303.01999_71,unsupervised 3d shape reconstruction by part r...,<table><caption>table 3. ablation for phase co...,"What is the purpose of the ""borrow"" function i...",1595
1596,2303.01999,2303.01999_72,unsupervised 3d shape reconstruction by part r...,<table><caption>table 8. training target numbe...,What is the impact of varying the number of tr...,1596
1597,2303.01999,2303.01999_74,unsupervised 3d shape reconstruction by part r...,<table><thead><th>method</th><th>scd|</th><th>...,How do the different phases of the unsupervise...,1597
1598,2303.01999,2303.01999_75,unsupervised 3d shape reconstruction by part r...,"<table><tr><td colspan=""2"">1:</td></tr><tr><td...",What is the purpose of the `swap` procedure in...,1598


In [6]:
output_path = 'dataset_class/datafolder/arxiv_qa/total/1500_1600_test.csv'
df.to_csv(output_path, index=False)

In [8]:
def google_gemini_api(
    prompt: str,
    foundation_model: str = 'gemini-pro',
    temperature: float = 0
) -> str:
    """ make Arxiv Questioning & Answering dataset function with Google AI Gemini API

    As you run this function before, you must set up the your own Google API key for the Gemini API.
    you can use the gemini-pro-api for free with the Google API key.

    we will use the Zero-Shot Learning for generating the QA dataset from the given paper link.
    Args:
        prompt: str, input prompt for generating the question
        foundation_model (str): The foundation model for extracting food ingredients from the given text,
                                default is 'gemini-pro'
        temperature (float): default 0.0, the temperature value for the diversity of the output text
                             (if you set T < 1.0, the output text will be more deterministic, sharpening softmax dist)
                             (if you set T > 1.0, the output text will be more diverse, flattening softmax dist)

    References:
        https://colab.research.google.com/github/google/generative-ai-docs/blob/main/site/en/tutorials/quickstart_colab.ipynb?hl=ko#scrollTo=HTiaTu6O1LRC
        https://ai.google.dev/gemini-api/docs/models/gemini?hl=ko
        https://ai.google.dev/gemini-api/docs/get-started/python?hl=ko&_gl=1*7ufqxk*_up*MQ..*_ga*MTk2ODk3NDQyNi4xNzE0OTIwMjcw*_ga_P1DBVKWT6V*MTcxNDkyMDI2OS4xLjAuMTcxNDkyMDI2OS4wLjAuOTQwNDMwMTE.
        https://ai.google.dev/gemini-api/docs/quickstart?hl=ko&_gl=1*12k4ofq*_up*MQ..*_ga*MTk2ODk3NDQyNi4xNzE0OTIwMjcw*_ga_P1DBVKWT6V*MTcxNDkyMDI2OS4xLjAuMTcxNDkyMDI2OS4wLjAuOTQwNDMwMTE.
        https://ai.google.dev/api/python/google/generativeai/GenerativeModel?_gl=1*1ajz3qu*_up*MQ..*_ga*MTk2ODk3NDQyNi4xNzE0OTIwMjcw*_ga_P1DBVKWT6V*MTcxNDkyNDAyOC4yLjAuMTcxNDkyNDAyOC4wLjAuMTkwOTQyMjU0#generate_content
    """
    GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
    genai.configure(api_key=GOOGLE_API_KEY)

    model = genai.GenerativeModel(foundation_model)
    generation_config = genai.types.GenerationConfig(
        candidate_count=1,
        temperature=temperature
    )

    datasets = ''
    try:
        response = model.generate_content(
            contents=prompt,
            generation_config=generation_config,
        )
        datasets = response.text

    except Exception as e:
        print(e)

    return datasets


In [9]:
""" code cell for prompt testing """

query = f"""You're a question machine. Read the title and context given above and generate the right question based on given context. Here are some rules for generating the questions:
1. Questions should also be able to capture the features or characteristics of a given context.
2. The purpose of asking you to create questions is to create a dataset of question-document pairs.
3. Please create with purpose and generate creative, informative, and diverse questions.
4. Do not return questions that are too similar to each other, or too general.
5. Please only return the question text, keep the number of questions between 1 and 5 with total length less than 100 tokens.
6. If you want to ask multiple questions, please separate them with spaces without newlines."""

titles, contexts = df.title.tolist()[0:30], df.doc.tolist()[0:30]
dataset = list(zip(titles, contexts))

prompt = ""
chunk_size = 30
for i in range(0, len(dataset), chunk_size):
    for j, data in enumerate(dataset[i:i+chunk_size]):
        title, context = data
        prompt += f"\n\ntitle{j}:{title}\ncontext{j}:{context}"

    prompt += query
    print(prompt)
    result = google_gemini_api(
        prompt=prompt,
        foundation_model="gemini-1.5-flash"
    )

result



title0:ldl: line distance functions for panoramic localization
context0:line transformer-based approach based on yoon et al. [57], line transformer-based approach finds candidate poses attaining the most line matches with the query image, and refines poses using pnp-ransac. for establishing line matches, we first render nt × nr synthetic views from the point cloud where we set nt = 100 and nr = 216. then, the top k1 = 100 poses are selected whose netvlad [3] features are closest to the query image. this intermediate step is necessary as the line transformer features are com- putationally expensive and thus could not be naively evalu- ated for all nt × nr views. for each synthetic view from the selected poses, we extract line transformer embeddings and establish matchings with the query image. similar to the structure-based baseline, we convert panoramas to cube- maps during the line matching process. finally, we select the top k2 = 20 poses that have the most line matches, and refine

'How does the proposed method for unsupervised 3D shape reconstruction differ from existing approaches? What are the key challenges in retrieving and assembling 3D shape parts in an unsupervised manner? What types of 3D shapes can be effectively reconstructed using this part retrieval and assembly approach? What are the potential applications of this unsupervised 3D shape reconstruction method?  What are the limitations of the proposed method for unsupervised 3D shape reconstruction? \n'

In [50]:
""" code cell 2 for second version of prompt testing

first ver: title, context
second ver: context
"""

query = """You're a question machine.\nThe given text has a number of contexts numbered. Create questions that are appropriate for each context.\nQuestions should capture the features or characteristics of the given context.\nThe purpose of asking you to create questions is to create a dataset of question-document pairs.\nPlease create with purpose and generate creative, informative, and meaningful questions.\nDo not return questions that are too similar to each other or too general.\nPlease only return the questions' text, and the number of questions should be between 1 and 5 per single individual context group. If you want to ask more than one questions about single context, please separate them with space, not newlines.\nEach context's questions should be no more than 100 tokens.\nSeparate questions for different context group with line breaks."""


dataset = df.doc.tolist()[0:10]

prompt = ""
chunk_size = 10
for i in range(0, len(dataset), chunk_size):
    for j, data in enumerate(dataset[i:i+chunk_size]):
        prompt += f"[context{j}]\n{data}\n\n"

    prompt += query
    print(prompt)
    result = google_gemini_api(
        prompt=prompt,
        foundation_model="gemini-1.5-flash"
    )

result

[context0]
Evaluating the Ripple Effects of Knowledge Editing in Language Models
Roi Cohen1 Eden Biran1 Ori Yoran1 Amir Globerson1,2 Mor Geva1,2, 1Blavatnik School of Computer Science, Tel Aviv University 2Google Research {roi1, edenbiran, oriy}@mail.tau.ac.il, {gamir, morgeva}@tauex.tau.ac.il

[context1]
Abstract

[context2]
Modern language models capture a large body of factual knowledge. However, some facts can be incorrectly induced or become obsolete over time, resulting in factually in- correct generations. This has led to the de- velopment of various editing methods that allow updating facts encoded by the model. Evaluation of these methods has primarily focused on testing whether an individual fact has been successfully injected, and if sim- ilar predictions for other subjects have not changed. Here we argue that such evaluation is limited, since injecting one fact (e.g. “Jack Depp is the son of Johnny Depp”) introduces a “ripple effect” in the form of additional facts that the

''