In [5]:
def raven_post(payload):
    """
    Sends a payload to a TGI endpoint.
    """
    import requests
    # Specify the LLM Endpoint
    # Now, let's prompt Raven!
    API_URL = "http://nexusraven.nexusflow.ai"
    headers = {
            "Content-Type": "application/json"
    }

    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

def query_raven(prompt):
    """
    This function sends a request to the TGI endpoint to get Raven's function call.
    This will not generate Raven's justification and reasoning for the call, to save on latency.
    """
    import requests
    output = raven_post({
        "inputs": prompt,
        "parameters" : {"temperature" : 0.001, "stop" : ["<bot_end>"], "do_sample" : False, "max_new_tokens" : 2048, "return_full_text" : False}})
    call = output[0]["generated_text"].replace("Call:", "").strip()
    return call

In [1]:
text = \
"""
John Doe lives at 123 Elm Street, Springfield. Next to him is Jane Smith, residing at 456 Oak Avenue, Lakeview. Not far away, we find Dr. Emily Ryan at 789 Pine Road, Westwood. Meanwhile, in a different part of town, Mr. Alan Turing can be found at 101 Binary Blvd, Computerville. Nearby, Ms. Olivia Newton stays at 202 Music Lane, Harmony. Also, Prof. Charles Xavier is located at 505 Mutant Circle, X-Town.
"""
print (text)


John Doe lives at 123 Elm Street, Springfield. Next to him is Jane Smith, residing at 456 Oak Avenue, Lakeview. Not far away, we find Dr. Emily Ryan at 789 Pine Road, Westwood. Meanwhile, in a different part of town, Mr. Alan Turing can be found at 101 Binary Blvd, Computerville. Nearby, Ms. Olivia Newton stays at 202 Music Lane, Harmony. Also, Prof. Charles Xavier is located at 505 Mutant Circle, X-Town.



In [2]:
raven_prompt = f'''
Function:
def address_name_pairs(names : list[str], addresses : list[str]):
"""
Give names and associated addresses.
"""

{text}<human_end>
'''

In [6]:
def address_name_pairs(names : list[str], addresses : list[str]):
  """
  Give names and associated addresses.
  """
  for name, addr in zip(names, addresses):
    print (name, ": ", addr)

result = query_raven(raven_prompt)
eval(result)

John Doe :  123 Elm Street, Springfield
Jane Smith :  456 Oak Avenue, Lakeview
Dr. Emily Ryan :  789 Pine Road, Westwood
Mr. Alan Turing :  101 Binary Blvd, Computerville
Ms. Olivia Newton :  202 Music Lane, Harmony
Prof. Charles Xavier :  505 Mutant Circle, X-Town


In [12]:
text = \
"""
Chin-Yew Lin. 2004. ROUGE: A package for automatic evaluation of summaries. In Text Summarization Branches Out, pages 74–81, Barcelona, Spain.
Association for Computational Linguistics.
Fatemehsadat Mireshghallah, Vaishnavi Shrivastava,
Milad Shokouhi, Taylor Berg-Kirkpatrick, Robert
Sim, and Dimitrios Dimitriadis. 2022. Useridentifier:
Implicit user representations for simple and effective
personalized sentiment analysis. In Proceedings of
the 2022 Conference of the North American Chapter
of the Association for Computational Linguistics: Human Language Technologies, NAACL 2022, Seattle,
WA, United States, July 10-15, 2022, pages 3449–
3456. Association for Computational Linguistics.
Swaroop Mishra, Daniel Khashabi, Chitta Baral, and
Hannaneh Hajishirzi. 2022. Cross-task generalization via natural language crowdsourcing instructions.
In Proceedings of the 60th Annual Meeting of the
Association for Computational Linguistics (Volume
1: Long Papers), ACL 2022, Dublin, Ireland, May
22-27, 2022, pages 3470–3487. Association for Computational Linguistics.
Jesse Mu, Xiang Lisa Li, and Noah Goodman. 2023.
Learning to compress prompts with gist tokens.
Jianmo Ni, Gustavo Hernández Ábrego, Noah Constant,
Ji Ma, Keith B. Hall, Daniel Cer, and Yinfei Yang.
2022. Sentence-t5: Scalable sentence encoders from
pre-trained text-to-text models. In Findings of the Association for Computational Linguistics: ACL 2022,
Dublin, Ireland, May 22-27, 2022, pages 1864–1874.
Association for Computational Linguistics.
OpenAI. 2023. GPT-4 technical report. CoRR,
abs/2303.08774.
Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida,
Carroll L. Wainwright, Pamela Mishkin, Chong
Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray,
John Schulman, Jacob Hilton, Fraser Kelton, Luke
Miller, Maddie Simens, Amanda Askell, Peter Welinder, Paul F. Christiano, Jan Leike, and Ryan Lowe.
2022. Training language models to follow instructions with human feedback. In NeurIPS.
Kishore Papineni, Salim Roukos, Todd Ward, and WeiJing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the
40th Annual Meeting of the Association for Computational Linguistics, July 6-12, 2002, Philadelphia,
PA, USA, pages 311–318. ACL.
"""

def paper_and_authors_pairs(authors : list[str], papers : list[str], years: list[int]):
  """
  Associates papers with their respective authors.
  """
  for author, paper, year in zip(authors, papers, years):
    print (f"{author} : {paper} {year}")

raven_prompt = f"""
Function:
def paper_and_authors_pairs(authors : list[str], papers : list[str], years: list[int]):
'''
Tuples of main author, paper and year.
'''

{text}<human_end>
"""

result = query_raven(raven_prompt)
eval(result)

Chin-Yew Lin : ROUGE: A package for automatic evaluation of summaries 2004
Association for Computational Linguistics : Useridentifier: Implicit user representations for simple and effective personalized sentiment analysis 2022
Fatemehsadat Mireshghallah : Cross-task generalization via natural language crowdsourcing instructions 2022
Vaishnavi Shrivastava : Learning to compress prompts with gist tokens 2023
Milad Shokouhi : Sentence-t5: Scalable sentence encoders from pre-trained text-to-text models 2022
Taylor Berg-Kirkpatrick : GPT-4 technical report 2023
Robert Sim : Training language models to follow instructions with human feedback 2022


In [13]:
unbalanced_text = \
"""
Dr. Susan Hill has a practice at 120 Green Road, Evergreen City, and also consults at 450 Riverdale Drive, Brookside. Mark Twain, the renowned author, once lived at 300 Maple Street, Springfield, but now resides at 200 Writers Block, Literaryville. The famous artist, Emily Carter, showcases her work at 789 Artisan Alley, Paintown, and has a studio at 101 Palette Place, Creativeland. Meanwhile, the tech innovator, John Tech, has his main office at 555 Silicon Street, Techville, and a secondary office at 777 Data Drive, Computown, but he lives at 123 Digital Domain, Innovatown.
"""
print (unbalanced_text)


Dr. Susan Hill has a practice at 120 Green Road, Evergreen City, and also consults at 450 Riverdale Drive, Brookside. Mark Twain, the renowned author, once lived at 300 Maple Street, Springfield, but now resides at 200 Writers Block, Literaryville. The famous artist, Emily Carter, showcases her work at 789 Artisan Alley, Paintown, and has a studio at 101 Palette Place, Creativeland. Meanwhile, the tech innovator, John Tech, has his main office at 555 Silicon Street, Techville, and a secondary office at 777 Data Drive, Computown, but he lives at 123 Digital Domain, Innovatown.



In [14]:
raven_prompt = \
f'''

@dataclass
class Record:
    name : str
    addresses : List[str]

Function:
def insert_into_database(names : List[Record]):
"""
Inserts the records into the database. 
"""

{unbalanced_text}<human_end>

'''

result = query_raven(raven_prompt)
print (result)

insert_into_database(names=[Record(name='Dr. Susan Hill', addresses=['120 Green Road', '450 Riverdale Drive']), Record(name='Mark Twain', addresses=['300 Maple Street', '200 Writers Block']), Record(name='Emily Carter', addresses=['789 Artisan Alley', '101 Palette Place']), Record(name='John Tech', addresses=['555 Silicon Street', '777 Data Drive', '123 Digital Domain'])])


In [35]:
text = \
"""
Bibliometric research has often relied on an argument of random errors, which are assumed to even out when the lens is shifted from the individual object (where one may find missing and erroneous citations and references, or deliberate differences in the intentions of a reference (e.g., criticism)) to the statistical aggregate (Van Raan, 1998). This is a meaningful approach, but it requires that the objects that are aggregated are more or less the same type of objects. Naïvely, we could assume that scientific publications are all the same; however, it is well established that there are rather large differences in, for example, referencing practices between fields (Glänzel & Schubert, 2003; Leydesdorff & Bornmann, 2011). This makes direct comparisons of citation counts and averages between fields somewhat meaningless. One might even argue that unstable changes even make comparisons within fields somewhat meaningless over longer time spans, whether those changes are due to changes in database inclusion or an absolute change in publication intensity (Nielsen & Andersen, 2021; Petersen, Pan et al., 2019). This is not new, and sophisticated indicators have been developed to account for some of these field differences in citation impact (Waltman & van Eck, 2019).
"""

raven_prompt = \
f'''

@dataclass
class Record:
    """pair of references including authors and the publication year, authors can be associated with multiple years."""
    authors : str # authors referenced in the text
    year : List[int]         # year of publication

Function:
def insert_into_database(authors : List[Record]):
"""
Inserts the records into the database. 
"""

{text}<human_end>

'''

result = query_raven(raven_prompt)
print (result)

insert_into_database(authors=[Record(authors='Glänzel & Schubert', year=[2003]), Record(authors='Leydesdorff & Bornmann', year=[2011]), Record(authors='Nielsen & Andersen', year=[2021]), Record(authors='Petersen, Pan et al.', year=[2019]), Record(authors='Waltman & van Eck', year=[2019])])


### Generating Valid JSON

{
  "city_name" : "London"
  "location" : {
      "country" : "United Kingdom",
      "continent" : {
          "simple_name" : "Europe",
          "other_name" : "Afro-Eur-Asia"
      }
  }
}

In [36]:
def city_info(city_name : str, location : dict):
  """
  Gets the city info
  """
  return locals()
def construct_location_dict(country : str, continent : dict):
  """
  Provides the location dictionary
  """
  return locals()
def construct_continent_dict(simple_name : str, other_name : str):
  """
  Provides the continent dict
  """
  return locals()

In [48]:
print (city_info("London", {}))

{'city_name': 'London', 'location': {}}


In [38]:
raven_prompt = \
'''
Function:
def city_info(city_name : str, location : dict):
"""
Gets the city info
"""

Function:
def construct_location_dict(country : str, continent : dict):
"""
Provides the location dictionary
"""

def construct_continent_dict(simple_name : str, other_name : str):
"""
Provides the continent dict
"""

User Query: {question}<human_end>
'''

In [49]:
question = "I want the city info for London, "\
"which is in the United Kingdom, which is in Europe or Afro-Eur-Asia."

output = query_raven(raven_prompt.format(question = question))
json0 = eval(output)
print (json0)

{'city_name': 'London', 'location': {'country': 'United Kingdom', 'continent': {'simple_name': 'Europe', 'other_name': 'Afro-Eur-Asia'}}}


In [50]:
import json
json.dumps(json0)

'{"city_name": "London", "location": {"country": "United Kingdom", "continent": {"simple_name": "Europe", "other_name": "Afro-Eur-Asia"}}}'

In [51]:
question = "I need details for the city of Tokyo, "\
"situated in Japan, a part of the Asian continent, "\
"which is sometimes referred to as Eurasia."

output = query_raven(raven_prompt.format(question = question))
json1 = eval(output)
print (json1)

{'city_name': 'Tokyo', 'location': {'country': 'Japan', 'continent': {'simple_name': 'Asian', 'other_name': 'Eurasia'}}}


In [52]:
import json
json.dumps(json0)

'{"city_name": "London", "location": {"country": "United Kingdom", "continent": {"simple_name": "Europe", "other_name": "Afro-Eur-Asia"}}}'