In [1]:
import wandb 
import pandas as pd 

In [4]:
wandb.login()
wandb.init(
    project="prompting-validators",
    entity="macrocosmos",
)



In [51]:
wandb.login()
entity = "macrocosmos"
project = "prompting-validators"
run_id = "lnpiqdt4"
api = wandb.Api()
run = api.run(f"{entity}/{project}/{run_id}")
history = run.history()
wb_df = pd.DataFrame(history)
# Filter the DataFrame to keep rows where 'is_organic' is not NaN
filtered_df = wb_df.dropna(subset=["is_organic_sample"])
filtered_df["is_organic_sample"].value_counts()

KeyError: ['is_organic_sample']

In [36]:
def fetch_data_from_runs(project_name, netuid, required_tag="2.6.2"):
    dataframes = []
    runs = wandb.Api(timeout=120).runs(f"{project_name}")

    # Filter runs based on 'netuid', 'state', and 'tag'
    filtered_runs = [
        run for run in runs
        if run.config.get('netuid') == netuid
        and run.state == 'running'
        and required_tag in run.tags
    ]

    print(f"Number of runs: {len(filtered_runs)}")

    for run in filtered_runs:
        run_id = run.id
        print(run_id)
        try:
            run_data = run.history()
            df = pd.DataFrame(run_data)
            df = df[['uids', 'status_messages', '_timestamp', 'completions', 'rewards', 'reference', 'task', 'challenge', 'turn', 'step', 'is_organic_sample']]
            df['run_id'] = run_id
            print(df.is_organic_sample.value_counts())
        except Exception as e:
            print(f"Error: {e}")
            continue
        dataframes.append(df)
    return dataframes

fetch_data_from_runs(project_name="macrocosmos/prompting-validators", netuid="1")

Number of runs: 0


[]

In [50]:
###specific version of software
from datetime import datetime, timedelta
def fetch_data_from_runs(project_name, required_tags=["2.6.2", "netuid_1"]):
    dataframes = []
    filters = {
        "tags": {"$all": required_tags},
        "state": "running"
    }
    runs = wandb.Api(timeout=120).runs(f"{project_name}", filters=filters)
    filtered_runs = [run for run in runs]
    print(f"Number of filtered runs: {len(filtered_runs)}")
    for run in filtered_runs:
        print(run.id)

    timeframe = datetime.now() - timedelta(hours=7)

    for run in filtered_runs:
        run_id = run.id
        print(run_id)
        try:
            run_data = run.history()
            df = pd.DataFrame(run_data)
            print(df.columns)
            df = df[['uids', 'status_messages', '_timestamp', 'completions', 'rewards', 'reference', 'task', 'challenge', 'turn', 'step', 'is_organic_sample']]
            df['run_id'] = run_id

            # Convert timestamp to datetime and filter for the last 24 hours
            df['_timestamp'] = pd.to_datetime(df['_timestamp'], unit='s')
            df = df[df['_timestamp'] >= timeframe]

            # Standardize 'reference' column by converting all to strings or correcting types
            df['reference'] = df['reference'].apply(lambda x: str(x))

            # Function to pad lists to match the longest list in each row
            def pad_lists(row, columns):
                max_len = max(len(row[col]) if isinstance(row[col], list) else 0 for col in columns)
                for col in columns:
                    if isinstance(row[col], list):
                        row[col] += [None] * (max_len - len(row[col]))
                    else:
                        row[col] = [None] * max_len
                return row
            columns_to_pad = ['uids', 'status_messages', 'completions', 'rewards']
            df = df.apply(lambda row: pad_lists(row, columns_to_pad), axis=1)

            # Explode relevant columns after padding
            df = df.explode(columns_to_pad)

            # Filter DataFrame based on 'our_miners' (assuming df_m is defined with 'our_miners')
            our_uids = df_m[df_m['our_miners']]['uid'].tolist()
            if len(our_uids) >= 50:
                selected_uids = our_uids[:50]
            else:
                selected_uids = our_uids
            filtered_df = df[df['uids'].isin(selected_uids)]

            # Add run ID as a column to identify data from different runs
            filtered_df['run_id'] = run_id

            # Append the correctly filtered DataFrame
            dataframes.append(filtered_df)
        except wandb.errors.CommError as e:
            print(f"Failed to fetch data for run {run_id}: {e}")
        except KeyError as e:
            print(f"KeyError: {e}")

    if dataframes:
        combined_df = pd.concat(dataframes, ignore_index=True)
        return combined_df
    else:
        return None

# Specify project name, netuid, and the required tag
project_name = "macrocosmos/prompting-validators"

# Fetch data from runs with the specified tag and conditions
combined_data = fetch_data_from_runs(project_name)

if combined_data is not None:
    print(combined_data.head())
else:
    print("No data fetched from runs.")

Number of filtered runs: 6
lnpiqdt4
cua9tfhk
906fzg99
y4x1bp6w
bcjllbpr
v8uhfy9i
lnpiqdt4
Index(['rouge_penalty_batch_time', 'rouge_reward_batch_time', 'challenge',
       'reference', 'step', 'stream_results_exceptions', 'timings',
       'system_prompt', '_step', 'completions', 'relevance_raw_reward',
       'streaming_raw_penalty', 'streaming_penalty', 'relevance_reward',
       'rouge_reward_extra_info.avg', 'rouge_penalty_extra_info.avg',
       'rouge_penalty_timings', 'profile', 'status_messages', 'rewards',
       'stream_results_all_chunks_timings', 'rouge_reward',
       'streaming_penalty_timings', 'stream_results_all_chunks',
       'stream_results_all_tokens_per_chunk', 'uids', 'stream_results_uids',
       'context_time', 'rouge_raw_penalty', 'rouge_penalty', 'desc',
       'status_codes', 'relevance_reward_timings', 'rouge_reward_timings',
       'rouge_raw_reward', 'relevance_reward_extra_info.threshold',
       'rouge_penalty_extra_info.metric', 'turn',
       'rouge_p

NameError: name 'df_m' is not defined

In [38]:
#### ARXIV DATASET RUN macrocosmos/prompting-validators/nc9e9ydv
api = wandb.Api()
run = api.run("macrocosmos/prompting-validators/sivqx3fx")
df = run.history()
df = df.dropna(subset=['reference'])
df.task.value_counts(normalize=True)

task
qa               0.365535
math             0.135770
sentiment        0.109661
date_qa          0.107050
generic          0.096606
summarization    0.093995
translation      0.091384
Name: proportion, dtype: float64

In [39]:
api = wandb.Api()
run = api.run("macrocosmos/prompting-validators/sivqx3fx")
df = run.history()
df = df.dropna(subset=['challenge'])
df[['challenge', 'task']]

Unnamed: 0,challenge,task
1,"\n\nwhat specific technological innovations did ada lovelace, often considered the first computer programmer, envision and describe in her notes on charles babbage's analytical engine, and how did her work on bernoulli numbers and her understanding of the engine's capabilities inform her predictions about its potential to go beyond mere calculation and perform any task that could be expressed in a series of steps?",generic
2,"\n\nwhat specific insights did this visionary's work on bernoulli numbers provide into the analytical engine's capabilities, and how did these insights influence her predictions about its potential to perform tasks beyond calculation?",qa
3,"I was wondering if you could help translate this into Spanish? \nOs feijões de outras espécies de Parkia (por exemplo, Parkia javanica e Parkia singularis) também são populares como ingrediente culinário na Indonésia, Malásia, Singapura, Brunei, Laos, sul da Tailândia, Birmânia e nordeste da Índia, especialmente Assam, Nagaland, Manipur, Mizoram e Tripura (consumidos principalmente pelo povo Tiprasa). Eles são vendidos em cachos, ainda na vagem, ou as sementes são vendidas em sacos de plástico. Pods são recolhidos da natureza, ou de árvores cultivadas: são exportados em jarros ou latas, picados em salmoura, ou congelados. [5]\n",translation
4,The definite integral within limits $0$ to $1$ of the equation $67x^2 + 52x + 62 = $\nWould you mind giving me some assistance?,math
6,"\n\nwhat circumstances led to the 2013 horse meat scandal in europe, where beef products from major retailers were found to contain horse meat, and how did the subsequent investigation and recall process impact the meat industry's supply chain transparency and consumer trust in the region?",generic
...,...,...
478,"I require assistance translating the following into English. Can you help? \nO Tar Heels terminou a temporada 2017–18 com um recorde de 26–11, 11–7 na peça ACC para terminar em uma gravata de quatro vias para o terceiro lugar. Como a semente no 6 no torneio ACC, eles derrotaram Syracuse,[1] Miami,[2] e Duke[3] antes de perder para Virginia no jogo do campeonato. [4] Eles receberam uma grande oferta para o torneio da NCAA como a semente No. 2 na região Oeste, onde derrotaram Lipscomb[5] antes de perder para o Texas A&M na segunda rodada. [6]\n",translation
479,Simplify $37^{1^{10}}$,math
480,"\n\ncan you break down the step-by-step process you used to arrive at that result, as i'd like to understand the reasoning behind the simplified form of the given expression?",qa
481,\n\nso i'm tryin to find out about elizabeth payne's family... Does it say anywhere what her grandpa peregrine bertie did for a living?,qa


In [22]:
# Get the percentage of rows that have "context" in the reference column
df['context'] = df.reference.str.contains('no context')
df.context.value_counts(normalize=True)

context
False    0.981723
True     0.018277
Name: proportion, dtype: float64

In [20]:
# Set the max width of the columns
pd.set_option('display.max_colwidth', 1000)
date_f = df[df.task == 'summarization']
date_f[['challenge', 'reference', 'task']]

Unnamed: 0,challenge,reference,task
7,"\n\nugh, can you just give me the cliffsnotes version of san giorgio in kemonia already? Like, what's the main point of this stupid church?","\n\nhere is a concise and accurate summary of the \n\nthe san giorgio in kemonia church, now known as san giuseppe cafasso, is a roman catholic parish church in palermo, sicily, italy. The church has undergone multiple transformations, with a possible 4th-century presence, and was later affiliated with cistercian and olivetan orders. In 1765, it was refurbished in a late-baroque style, and in 1953, it was rededicated to joseph cafasso, patron saint of prisoners, due to a former nearby female prison.",summarization
16,\n\nso what's the deal with susanne puddefoot? Who is she and why is she important?,"\n\nsusanne puddefoot (1934-2010) was a renowned english journalist, editor, and charity director. She was the first editor of the times women's page and a cambridge graduate. Born in blackpool, she was educated at girton college, where she was involved with granta and varsity. Puddefoot won a special award at the british press awards in 1967.",summarization
48,\n\nwhat can you tell me about mycobacterium parmense? Is it a type of bacteria that can cause infections in humans?,\n\nthere is no context provided. Please provide the context you'd like me to summarize. I'll be happy to help!,summarization
70,\n\nwhats the deal with cytora? Is it some kinda insurance company or smthn?,"\n\nhere is a concise and accurate summary of the \n\nthe genus cytora comprises 42 species, with 23 of them being first described in 2007, and the shell height is less than 7 mm.",summarization
86,\n\nwhats the main point of mile high edge? Is it some kinda building or something?,"\n\nthe mile high edge was a w-league soccer club based in denver, colorado, previously known as the mile high mustangs until 2005. The team ceased operations after the 2007 season.",summarization
87,\n\nwhat's the deal with evergreen high school in colorado? Is it a good school?,"\n\nhere is a concise and accurate summary of the \n\nevergreen high school is a public high school in colorado, known for its academic, athletic, and extracurricular excellence. It has won two blue ribbon awards from the us department of education. The school has a long history, dating back to the 19th century, and currently serves students from evergreen and surrounding areas. The school's student demographics are diverse, and its sports teams, the cougars, have a strong rivalry with conifer high school's lobos, with an annual tradition called the mountain bowl.",summarization
112,\n\nwhos george brown pitcher?,\n\ngeorge brown was a negro league pitcher who played in the 1920s.,summarization
134,\n\nwhat's the deal with the chi rho symbol? Is it just some fancy ancient graffiti or does it actually mean something?,"\n\nthe chi rho (☧) is an early christogram formed by superimposing the greek letters chi (χ) and rho (ρ) from the word ""christos"" (χριστος). According to lactantius, emperor constantine used this symbol on his soldiers' shields after a dream, leading to his victory in the battle of the milvian bridge in 312 ad. The symbol represents the connection between jesus' crucifixion and resurrection, often surrounded by a wreath to signify the victory of the resurrection over death.",summarization
136,\n\nwhats the deal with the 1868 surinamese general election? Was it like a big deal or somethin?,"\n\nhere is a concise and accurate summary of the \n\nin 1868, partial general elections were held in suriname to elect 3 out of 9 members of the colonial states. The 3 members served 6-year terms, with 3 elected every 2 years. Two candidates were elected in the first round, and a runoff was held for the third seat. The governor-general also appointed 4 members, with 1 replacement made in 1869.",summarization
139,\n\nwhat's the most important thing i need to know about wilbert king's life and accomplishments?,"\n\nwilbert king was an american athlete who played professional basketball and baseball from 1944 to 1947. He played in the negro leagues for several teams and had a stint in the national basketball league with the detroit gems, averaging 8.2 points per game in the 1946-1947 season.",summarization


In [23]:


api = wandb.Api()
run = api.run("macrocosmos/prompting-validators/m034psa4")
df = run.history()
df = df.dropna(subset=['reference'])
df.task.value_counts(normalize=True)
df[df.reference.str.contains("no context")].task.value_counts(normalize=True)

Series([], Name: proportion, dtype: float64)

In [24]:
df[df.reference.str.contains("no context")].task.value_counts(normalize=True)

Series([], Name: proportion, dtype: float64)

In [16]:
len(df)

196

In [4]:
# Download the logs from the run
run.file("logs.txt").download(replace=True)

# Load the logs and print the number of times 'AttributeError: 'NoneType' object has no attribute 'isdigit'' appears
with open("logs.txt") as f:
    logs = f.read()
    print(logs.count("AttributeError: 'NoneType' object has no attribute 'isdigit'"))

CommError: It appears that you do not have permission to access the requested resource. Please reach out to the project owner to grant you access. If you have the correct permissions, verify that there are no issues with your networking setup.(Error 404: Not Found)

In [1]:
# Import all functions from /workspace/prompting/prompting/tools/datasets/wiki.py
from prompting.tools.datasets.wiki import *


  from .autonotebook import tqdm as notebook_tqdm
2024-07-22 13:28:29,780	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [2]:
data = WikiDataset()

In [3]:
page = data.random(selector=random.choice)
print(page.title)

In [None]:
type(data.get(name = 'Bridge (dentistry)', selector=random.choice))

dict

In [14]:
p = data.get(name = 'Bridge (dentistry)', selector=random.choice)
p['internal_links'] = ['hey']
p

{'title': 'Bridge (dentistry)',
 'topic': 'Bridge failures',
 'subtopic': 'Bridge failure management',
 'content': 'Management of bridge failures depends upon the extent and type of failure and these can be prevented through forming a thorough treatment plan with the patient as well regularly emphasising the importance of maintaining a very good level of oral hygiene after the bridge has been placed. The importance of cleaning underneath the pontic, through the use of interdental cleaning aids, should also be reinforced as plaque control around fixed restorations is more difficult.\nManagement options include:\n\nKeeping the bridge under observation/review\nRepairing, replacing or removing the fault',
 'internal_links': ['hey'],
 'external_links': ['Dental restoration',
  'Human tooth',
  'Maryland bridge',
  'Resin-retained bridge',
  'Rochette bridge',
  'Resin retained bridge',
  'Commonly used terms of relationship and comparison in dentistry',
  'Acrylic resin',
  'Alveolar ridge'

In [17]:
p['content']

'Management of bridge failures depends upon the extent and type of failure and these can be prevented through forming a thorough treatment plan with the patient as well regularly emphasising the importance of maintaining a very good level of oral hygiene after the bridge has been placed. The importance of cleaning underneath the pontic, through the use of interdental cleaning aids, should also be reinforced as plaque control around fixed restorations is more difficult.\nManagement options include:\n\nKeeping the bridge under observation/review\nRepairing, replacing or removing the fault'

In [7]:
def _get_page(
    title, pageid=None, auto_suggest=False, redirect=True, seed=None
) -> wiki.WikipediaPage:
    """Cached Wikipedia page loading."""
    try:
        page = wiki.page(
            title=title, pageid=pageid, auto_suggest=auto_suggest, redirect=redirect
        )
        # create sections manually if not found
        if not page.sections:
            print("Manually Creating Sections")
            page._sections = [
                line.strip("= ")
                for line in page.content.splitlines()
                if re.search(r"=+\s+.*\s+=+", line)
            ]
        return page

    except wiki.DisambiguationError as e:
        bt.logging.debug(f"{e.__class__.__name__} loading page {title!r}: {e}")
        # exc info contains a tuple of (requested_title: str, possible_matches: List[str])
        pages = sys.exc_info()[1].args[1]
        if not type(pages) == list:
            return None
        title = random.Random(seed).choice(pages)
        return _get_page(title, auto_suggest=auto_suggest, redirect=redirect)

    except wiki.PageError as e:
        bt.logging.warning(f"{e.__class__.__name__} loading page {title!r}: {e}")
        if not auto_suggest:
            return _get_page(title, auto_suggest=True, redirect=redirect)
        return None

In [8]:
page = _get_page('Anneslie Historic District')

In [11]:
page.content

'The Anneslie Historic District () encompasses a residential area just north of the city line of Baltimore, Maryland in Towson.  It is a grid of five streets extending eastward from York Avenue and south from Regester Avenue.  The area was platted out in 1922 and mostly built out by the 1950s.  Properties in the northern section of the district, on Regester Avenue, Murdock, Anneslie, and Dunkirk Roads, were built in the 1920s and 1930s, in Bungalow, Foursquare, and cottage styles, while the streets further south were built out primarily with Cape, Tudor, and Colonial style houses. The district takes its name from Anneslie estate, whose house still stands in the district.\nThe district was added to the National Register of Historic Places in 2012.\n\nNational Register of Historic Places listings in Baltimore County, Maryland\n\nAnneslie Historic District, Baltimore County, including undated photo, at Maryland Historical Trust'

In [31]:
def process_page(
    page, valid_header: callable = None, valid_content: callable = None
) -> Dict:
    """Process a Wikipedia page and return a dictionary of sections with their content.

    Args:
        page: wikipedia.WikipediaPage
        valid_header: callable to determine if a section header is valid
        valid_content: callable to determine if a section content is valid
    Returns:
        dict: dictionary of sections and their content. Note that keys are tuples (header, section_title)
    """
    header = ""
    sections = {}

    for section_title in page.sections:
        content = page.section(section_title)
        if not content:
            header = section_title
            continue

        # Filter out sections that don't match the headers and/or are not valid
        if (valid_header and not valid_header(header)) or (
            valid_content and not valid_content(content)
        ):
            continue

        key = (header, section_title)
        sections[key] = content.splitlines()

    if not sections:
        bt.logging.debug(f"No valid sections found in page {page.title!r} ({page.url})")

    return sections

In [34]:
EXCLUDE_HEADERS = ("See also", "References", "Further reading", "External links")
exclude = list(EXCLUDE_HEADERS)
include = None
sections = process_page(
            page,
            valid_header=lambda x: x not in exclude and (not include or x in include),
            valid_content=lambda x: len(x.split()) >= 50,
        )

In [35]:
sections

{('',
  'Definitions'): ['Fixed bridge:  A dental prosthesis that is definitively attached to natural teeth and replaces missing teeth.', 'Abutment: The tooth that supports and retains a dental prosthesis.', 'Pontic: The artificial tooth that replaces a missing natural tooth.', 'Retainer: The component attached to the abutment for retention of the prosthesis. Retainers can be major or minor.', 'Unit: Pontics and abutment teeth are referred to as units. The total number of units in a bridge is equal to the number of pontics plus the number of abutment teeth.', 'Saddle: The area on the alveolar ridge which is edentulous where at least one missing tooth is to be reinstated.', 'Connector: Joins the pontic to the retainer or two retainers together. Connectors may be fixed or movable.', 'Span: The length of the alveolar ridge between the natural teeth where the bridge will be placed.', 'Resin bonded bridge: A dental prostheses where the pontic is connected to the surface of natural teeth whi

In [16]:
page.content.splitlines()

['A bridge is a fixed dental restoration (a fixed dental prosthesis) used to replace one or more missing teeth by joining an artificial tooth definitively to adjacent teeth or dental implants.',
 '',
 '',
 '== Definitions ==',
 'Fixed bridge:  A dental prosthesis that is definitively attached to natural teeth and replaces missing teeth.',
 'Abutment: The tooth that supports and retains a dental prosthesis.',
 'Pontic: The artificial tooth that replaces a missing natural tooth.',
 'Retainer: The component attached to the abutment for retention of the prosthesis. Retainers can be major or minor.',
 'Unit: Pontics and abutment teeth are referred to as units. The total number of units in a bridge is equal to the number of pontics plus the number of abutment teeth.',
 'Saddle: The area on the alveolar ridge which is edentulous where at least one missing tooth is to be reinstated.',
 'Connector: Joins the pontic to the retainer or two retainers together. Connectors may be fixed or movable.',

In [17]:
vars(page)

{'title': 'Bridge (dentistry)',
 'original_title': 'Bridge (dentistry)',
 'pageid': '1031150',
 'url': 'https://en.wikipedia.org/wiki/Bridge_(dentistry)',
 '_sections': ['Definitions',
  'Advantages and disadvantages of tooth replacement',
  'Types of bridge',
  'Conventional bridge',
  'Fixed-fixed bridges',
  'Cantilever',
  'Fixed-movable',
  'Resin-bonded bridge',
  'Combination Designs',
  'Hybrid Designs',
  'Advantages and Disadvantages of Conventional Bridge Designs',
  'Advantages and disadvantages of resin-bonded bridge designs',
  'Case selection and treatment planning',
  'Case selection',
  'Selection and evaluation of abutment teeth',
  'Biomechanical Considerations',
  'Indications for use',
  'Contraindications',
  'Types of artificial plastic teeth',
  'Types of bridges according to durability',
  'Types of bridges according to material',
  'Acrylic resin and porcelain fused to metal (PFM)',
  'IPs Emax',
  'Zirconia',
  'Clinical stages of bridgework',
  'Restoration 

In [1]:
import wandb
api = wandb.Api()
run_id = 'macrocosmos/prompting-validators/ydddch12'
run = api.run(f"{run_id}").history()
df = run[['uids', 'status_codes', 'status_messages', 'rewards', 'task']].explode(['uids', 'status_codes', 'status_messages', 'rewards'])
df.task.value_counts(normalize=True)

task
qa               0.278
generic          0.234
sentiment        0.216
math             0.208
date_qa          0.050
summarization    0.008
translation      0.006
Name: proportion, dtype: float64

In [9]:
run_id = 'macrocosmos/prompting-validators/vh9jvmtb'
run = api.run(f"{run_id}").history()
df = run[['uids', 'status_codes', 'status_messages', 'rewards', 'task']].explode(['uids', 'status_codes', 'status_messages', 'rewards'])
df.task.value_counts(normalize=True)

task
qa               0.358491
generic          0.188679
translation      0.132075
sentiment        0.132075
math             0.113208
summarization    0.056604
date_qa          0.018868
Name: proportion, dtype: float64

In [4]:
import wikipedia as wiki

In [5]:
(wiki.random(10)
 

['Kendayan language',
 'Mecyclothorax toxopei',
 'Qaleh Qazi-ye Sofla',
 'Meydan TV',
 'List of Skimo episodes',
 'Midland Independent School District',
 'Elizabeth Timothy',
 'Rebecca DiPietro',
 'X-Men: Regenesis',
 'Pond Inlet Airport']

In [9]:
array = meta.last_update

In [12]:
min_value = np.min(array)

# Step 2: Subtract the minimum value from all elements in the array
adjusted_array = array - min_value

# Step 3: Sort the resulting array
sorted_array = np.sort(adjusted_array)

In [11]:
import numpy as np 

In [16]:
sorted_array[:30]

array([    0,     5,    43,    49,    50,    57,    67,    98,   103,
         110,   111,  3291, 19286, 19300, 19306, 19318, 19334, 19352,
       19353, 19376, 38786, 47436, 58583, 58588, 58589, 58589, 58616,
       58618, 58621, 58629])

In [18]:
import wandb
api = wandb.Api()
run_id = 'macrocosmos/prompting-validators/nmm53lc4'
run = api.run(f"{run_id}").history()
df = run[['challenge', 'completions', 'rewards', 'task']].explode(['completions', 'rewards'])
df

Unnamed: 0,challenge,completions,rewards,task
0,\n\nwhat underlying psychological motivations ...,nelson mandela's transformation from a radical...,0.473716,generic
0,\n\nwhat underlying psychological motivations ...,nelson mandela's transformation from a radical...,0.446443,generic
0,\n\nwhat underlying psychological motivations ...,nelson mandela's transformation from a radical...,0.508189,generic
0,\n\nwhat underlying psychological motivations ...,nelson mandela's transformation from a radical...,0.446443,generic
0,\n\nwhat underlying psychological motivations ...,nelson mandela's transformation from a radical...,0.493607,generic
...,...,...,...,...
131,\n\nwhat's the deal with saiph being associate...,here is a concise and accurate summary of the ...,0.2982,qa
131,\n\nwhat's the deal with saiph being associate...,"saiph, the god of fertility, is associated wit...",0.249358,qa
131,\n\nwhat's the deal with saiph being associate...,What a fascinating question!\n\nIn Wardaman my...,0.450011,qa
131,\n\nwhat's the deal with saiph being associate...,here is a concise and accurate summary of the ...,0.265514,qa
