In [1]:
import monologue
#we use this to get fenced stuff out
from monologue.core.utils.ops import parse_fenced_code_blocks
#saves typing, fairly basic- generate pydantic types from example data
from monologue.core.utils.ops import pydantic_type_generator
#the entities we will create extend this which does the repr and pulls out some metadata
from monologue.entities import AbstractEntity


## Illustrate the type representation in logs

In [2]:
from pydantic import Field
class MyEntity(AbstractEntity):
    code: str = Field(is_key=True)
    created_at: str

my_entity = MyEntity(code='test', created_at= "2023-01-01")
my_entity

```json{"code": "test", "created_at": "2023-01-01", "__type__": "MyEntity", "__key__": "code", "__namespace__": null}```

## Show how the Columnar Store works

In [3]:
from monologue.core.stores import ColumnarDataStore
from monologue.entities.examples import NycTripEvent
store = ColumnarDataStore(NycTripEvent)
store

<monologue.core.stores.ColumnarStore.ColumnarDataStore at 0x138aeebf0>

### how we add data to the store

In [4]:
# import pandas as pd
# data = pd.read_csv("/Users/sirsh/Downloads/nyc_trip_data_sample.csv") 
# store.add(data)

### load the tool and ask questions

In [5]:
tool = store.as_tool()
tool

Tool(name='Stats and data table tool relating to examples NycTripEvent', description='Use this tool to answer questions about aggregates or statistics or to get sample values or lists of values relating to examples examples. \n            Do not select any values that are not in the provided list of columns. \n            Provide full sentence questions to this tool. If 0 results are returned, do not trust this tool completely.\n            Added context: None\n            About the entity: None\n            ', args_schema=None, return_direct=False, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, handle_tool_error=False, func=<function ColumnarDataStore.as_tool.<locals>.ask at 0x11139add0>, coroutine=None)

In [6]:
tool.run("What is least popular destination in new york city? Who has travelled there?")

[32m2023-10-01 17:51:31.093[0m | [34m[1mDEBUG   [0m | [36mmonologue.core.stores.ColumnarStore[0m:[36mask[0m:[36m59[0m - [34m[1mFor a table called TABLE with the ['index', 'passenger_name', 'pick_up_at', 'drop_off_at', 'passenger_count', 'trip_distance', 'payment_type', 'congestion_surcharge', 'airport_fee', 'borough_pick_up', 'zone_pick_up', 'borough_drop_off', 'zone_drop_off'], and the following column enum types {'passenger_count': [1.0, 0.0, 2.0, 5.0, 4.0, 3.0, nan, 6.0], 'payment_type': ['Cash', 'Credit card', 'Unknown', 'Dispute', 'No charge'], 'congestion_surcharge': [2.5, nan, -2.5, 0.0], 'airport_fee': [0.0, nan, 1.25, -1.25], 'borough_pick_up': ['Manhattan', 'Queens', 'Unknown', 'Brooklyn', 'Bronx', 'Staten Island', 'EWR'], 'zone_pick_up': ['Midtown Center', 'Central Park', 'Clinton East', 'LaGuardia Airport', 'Gramercy', 'Upper West Side South', 'Lincoln Square East', 'Midtown South', 'Lenox Hill West', 'Union Sq', 'East Village', 'Flatiron', 'Greenwich Village N

[{'borough_drop_off': 'Staten Island',
  'zone_drop_off': 'Stapleton',
  'trip_count': 1,
  'passengers': 'Katherine "Kate" Bishop'}]

# Vector Store Loading

In [1]:
import pandas as pd
from monologue.core.stores import VectorDataStore
data = pd.read_csv("/Users/sirsh/Downloads/marvel_bios.csv").rename(columns={'entity_key':'id'})
data.head()

Unnamed: 0,entity_type,id,text
0,people,"Henry Jonathan ""Hank"" Pym_0",\n\nGiant-Man\n\n\n\n\n\nGallery\n\nName\nDr. ...
1,people,"Henry Jonathan ""Hank"" Pym_1",Doris Pym (mother)Spouses:Maria Trovaya (1st w...
2,people,"Henry Jonathan ""Hank"" Pym_2",Irises:
3,people,"Henry Jonathan ""Hank"" Pym_3","Dr. Henry ""Hank"" Pym, Ph.D,[20] was an America..."
4,people,"Henry Jonathan ""Hank"" Pym_4","Hank was born and raised in East Nowhere, Nebr..."


In [2]:
from monologue.entities.examples import *
store = VectorDataStore(AvengingPassengersInstruct)

  from tqdm.autonotebook import trange


load INSTRUCTOR_Transformer
'NoneType' object has no attribute 'cadam32bit_grad_fp32'


  warn("The installed version of bitsandbytes was compiled without GPU support. "


max_seq_length  512


In [3]:
store.add(data)
df = store.load()
df

[32m2023-10-01 20:29:24.470[0m | [1mINFO    [0m | [36mmonologue.core.stores.VectorStore[0m:[36madd[0m:[36m68[0m - [1mAdding 8109 to s3://res-data-platform/stores/vector/v0/examples_AvengingPassengersInstruct...[0m


Unnamed: 0,entity_type,id,text,vector
0,people,"Henry Jonathan ""Hank"" Pym_0",\n\nGiant-Man\n\n\n\n\n\nGallery\n\nName\nDr. ...,"[-0.058788892, 0.007829103, -0.00860671, 0.046..."
1,people,"Henry Jonathan ""Hank"" Pym_1",Doris Pym (mother)Spouses:Maria Trovaya (1st w...,"[-0.05144007, 0.00525672, -0.0058651497, 0.027..."
2,people,"Henry Jonathan ""Hank"" Pym_2",Irises:,"[-0.016906587, -0.028654756, -0.0019730197, 0...."
3,people,"Henry Jonathan ""Hank"" Pym_3","Dr. Henry ""Hank"" Pym, Ph.D,[20] was an America...","[-0.044696458, 0.005965973, -0.010171299, 0.02..."
4,people,"Henry Jonathan ""Hank"" Pym_4","Hank was born and raised in East Nowhere, Nebr...","[-0.06321009, 0.009286675, -0.028450374, -0.00..."
...,...,...,...,...
8104,people,Kaluu_12,Since then Doctor Strange has purged himself o...,"[-0.018727517, 0.005965266, -0.015376446, 0.03..."
8105,people,Kaluu_13,Kaluu was later contacted via crystal ball by ...,"[-0.03819539, 0.0015838806, -0.02035464, 0.005..."
8106,people,Kaluu_14,"Kaluu is a powerful black magician, considered...","[-0.03541085, -0.011711877, 0.0009794249, 0.02..."
8107,people,Kaluu_15,Expert Occultist/Expert Magical Knowledge: Dur...,"[-0.037227213, 8.635753e-05, -0.00020599505, 0..."


In [37]:
df.iloc[10]['text']

'After an alien being from the Kosmos dimension killed Pym\'s colleague, the scientist Vernon van Dyne, Pym revealed his secret identity of Ant-Man to van Dyne\'s daughter Janet, who wished to avenge his death. Pym taught Janet how to use the gas within which he now contained the "Pym Particles," and which he used to shrink himself in size, and through biochemistry, gave her the ability to grow insect-like wings when she used the gas to shrink herself to insect size. As the Wasp, Janet van Dyne assisted the Ant-Man in finding and defeating the murderous Kosmosian.[7] Pym and van Dyne gradually fell in love; van Dyne reminded Pym of his deceased first wife, Maria.[7] Pym and van Dyne became crime-fighting partners in their costumed identities, fighting menaces like the Egghead,[35][36][37] the A-Chiltarians and their robot Cyclops,[38] the trumpet-playing criminal Trago,[39] the Porcupine,[40][41] the Human Top[42][43] and the Black Knight (Nathan Garrett). They were two of the founding

In [4]:
tool = store.as_tool(debug_db=False)
tool

Tool(name='Further details tool relate to AvengingPassengersInstruct entities', description='If and only if the other tools return no results, use this tool to get extra information about any AvengingPassengersInstruct entity that you are asked about.  \n                Do not pass identifiers and codes to this tool. Only pass proper nouns and questions in full sentences.\n                Added context: \n                About the entity: None\n                ', args_schema=None, return_direct=False, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, handle_tool_error=False, func=<bound method Chain.run of RetrievalQA(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, combine_documents_chain=StuffDocumentsChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, input_key='input_documents', output_key='output_text', llm_chain=LLMChain(memory=None, callbacks=None, callback_ma

In [5]:
tool.run("Who is Iron man?")

'Iron Man is a superhero from the Marvel Universe. His real name is Tony Stark, and he relies on his intellect and technological resources to defeat his enemies. Stark is known for being one of the most intelligent people in the Marvel Universe.'

In [18]:
tool.run("What can you tell me about captain america? What was his real name?")

'Captain America\'s real name is Steven Grant "Steve" Rogers. He is a character in the Marvel Comics universe. He is a human enhanced to the peak of human perfection by a Super-Soldier Serum. His aliases include Codenames like Cap-Wolf, The Captain, Nomad, Phoenix, Spider-King and many others. He is affiliated with several groups including the Avengers Unity Division, which he founded, and S.H.I.E.L.D. He has also been a leader of the Invaders and the Secret Avengers. His physical characteristics include a height of 6′2″, weight of 240 lbs, blue eyes, and blond hair. He was born in Manhattan, New York City, New York. His occupation has varied from adventurer, federal official, intelligence operative to soldier, liaison between S.H.I.E.L.D. and the Avengers, police officer, teacher, sparring partner, artist, and comic book artist for Marvel Comics.'

In [18]:
data = pd.read_csv("/Users/sirsh/Downloads/nyc_zones.csv").drop(columns='id',index=1).rename(columns={'entity_key':'id'})
data.head()

Unnamed: 0,index,entity_type,id,text
0,0,nyc_zone,Midtown Center,Page: Midtown Manhattan\nSummary: Midtown Manh...
2,2,nyc_zone,Clinton East,Page: Bill Clinton\nSummary: William Jefferson...
3,3,nyc_zone,LaGuardia Airport,Page: LaGuardia Airport\nSummary: LaGuardia Ai...
4,4,nyc_zone,Gramercy,Page: Gramercy Park\nSummary: Gramercy Park ()...
5,5,nyc_zone,Upper West Side South,Page: Upper West Side\nSummary: The Upper West...


In [13]:
store = VectorDataStore(Places)
# store.add(data)

In [23]:
tool = store.as_tool(debug_db=False)
tool.run("What can you tell me about civil airport in East Elmhurst Queens?")

"LaGuardia Airport is the civil airport located in East Elmhurst, Queens, New York City. It was established in 1929 and began operating as a public airport in 1939. The airport is named after former New York City mayor Fiorello La Guardia. As of 2019, it was the third-busiest airport in the New York metropolitan area, behind Kennedy and Newark airports, and the twenty-first busiest in the United States by passenger volume. The airport primarily serves domestic and limited international destinations. It is a hub for both American Airlines and Delta Air Lines. The airport has been criticized for its outdated facilities and inefficient operations, leading to a multibillion-dollar reconstruction of the airport's passenger infrastructure, which is expected to be completed by 2025."