In [None]:
%load_ext autoreload
%autoreload 2

### Install client library (you may need to restart your kernel)

In [2]:
%pip install -e client

Obtaining file:///disk/u/jfiottok/wd/engine/client
  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: engine
  Attempting uninstall: engine
    Found existing installation: engine 1.0
    Uninstalling engine-1.0:
      Successfully uninstalled engine-1.0
  Running setup.py develop for engine
Successfully installed engine-1.0
Note: you may need to restart the kernel to use updated packages.


#### This will give you you the `engine` module
##### this exposes `engine.get_info`, `engine.submit`, and `engine.retrieve`

In [1]:
import engine

##### `engine.get_info` returns a dictionary informing you of the currently loaded model as well as parameters about it, such as layer naming formats which will help you specify the particualar components of the model you want to access

In [2]:
info = engine.get_info()
info

{'attn_module_name_format': 'model.layers.{}.self_attn',
 'layer_name_format': 'model.layers.{}',
 'max_seq_length': 2048,
 'mlp_module_name_format': 'model.layers.{}.mlp',
 'model_name': 'LLaMa-65b',
 'n_attn_head': 52,
 'n_embd': 6656,
 'n_layer': 60}

#### `engine.submit(...)` submits your job request to the server. You can see the format and types of the request like this:

In [11]:
engine.models.submit.Request.schema()

{'title': 'Request',
 'type': 'object',
 'properties': {'job_id': {'title': 'Job Id', 'type': 'string'},
  'prompt': {'title': 'Prompt',
   'anyOf': [{'type': 'string'},
    {'type': 'array', 'items': {'type': 'string'}}]},
  'max_new_tokens': {'title': 'Max New Tokens',
   'default': 1,
   'type': 'integer'},
  'get_answers': {'title': 'Get Answers', 'default': False, 'type': 'boolean'},
  'top_k': {'title': 'Top K', 'default': 1, 'type': 'integer'},
  'generate_greedy': {'title': 'Generate Greedy',
   'default': True,
   'type': 'boolean'},
  'activation_requests': {'title': 'Activation Requests',
   'type': 'array',
   'items': {'$ref': '#/definitions/ActivationRequest'}}},
 'required': ['job_id', 'prompt'],
 'definitions': {'ActivationRequest': {'title': 'ActivationRequest',
   'type': 'object',
   'properties': {'final_output': {'title': 'Final Output',
     'default': True,
     'type': 'boolean'},
    'layers': {'title': 'Layers',
     'type': 'array',
     'items': {'type': 'st

##### Or by visiting out api documentation at [https://ndif.baulab.us/api/docs](https://ndif.baulab.us/api/docs)

##### In this example request, were going to:

<ul>
  <li>Process a couple of prompts like, "Michael Jordan plays the sport of"</li>
  <li>Have ten predicted tokens returned to us per prompt</li>
  <li>Have the top five most likely tokens returned to us, not just the top 1</li>
  <li>Finally, were going to use the format we recieved from the get_info() function, to specify that we also want the activations at a few layers in the model</li>
</ul>

In [12]:
response = engine.submit(
    prompt = [
        "Michael Jordan plays the sport of",
        "The Space Needle is located in the city of"
    ],
    max_new_tokens= 10,
    get_answers= True,
    top_k = 5,
    activation_requests = [
        {
            'layers':  [info["layer_name_format"].format(l) for l in range(5, 10)]

        }
    ]
)

INFO: 2023-04-14 17:06:35,628 - => Submitting request...
INFO: 2023-04-14 17:06:35,641 - => Successfully submitted job 'nikFcJHzgRi2q8D5TFVtEB'
INFO: 2023-04-14 17:06:35,643 - => Dumped request for job 'nikFcJHzgRi2q8D5TFVtEB' to /disk/u/jfiottok/wd/engine/jobs/nikFcJHzgRi2q8D5TFVtEB


##### The server will recieve our request, and return a status (Which we hope will say that it received our request with no problem), as well as the job id identifying our request. This is loaded into the `engine.models.result.Result` model. The request we submited will be saved to our local `./jobs` directory for future reference.

In [14]:
print(response.description)
print(response.status)
print(response.job_id)

Your job has been recieved is is waiting approval
JobStatus.RECIVED
nikFcJHzgRi2q8D5TFVtEB


##### Now to retrieve what we asked for, we call `engine.retrieve(<job_id>)` using the job_id recieved. This too will return an `engine.models.result.Result` object, and assuming your request has been approved and processed, the `data` field will be populated. 

###### This result will also be stored to disk in the same directory as the request

In [15]:
result = engine.retrieve(response.job_id)
result

INFO: 2023-04-14 17:11:26,369 - => Retrieving job 'nikFcJHzgRi2q8D5TFVtEB'...
INFO: 2023-04-14 17:11:26,381 - => Retrieved job 'nikFcJHzgRi2q8D5TFVtEB'
INFO: 2023-04-14 17:11:26,382 - => Dumped response for job 'nikFcJHzgRi2q8D5TFVtEB' to /disk/u/jfiottok/wd/engine/jobs/nikFcJHzgRi2q8D5TFVtEB


Result(job_id='nikFcJHzgRi2q8D5TFVtEB', status=<JobStatus.ERROR: 'ERROR'>, timestamp=datetime.datetime(2023, 4, 14, 21, 6, 36, 551911), description='Your job errored out', data=None)

In [10]:
result.description

'Your job has been completed'

In [100]:
result.keys()

dict_keys(['job_id', 'status', 'timestamp', 'description', 'data'])

In [12]:
result.data[0].__fields__

{'generated_text': ModelField(name='generated_text', type=List[str], required=True),
 'answer': ModelField(name='answer', type=List[Answer], required=True),
 'input_tokenized': ModelField(name='input_tokenized', type=Optional[list], required=False, default=None),
 'generated_tokens': ModelField(name='generated_tokens', type=list, required=True),
 'activations': ModelField(name='activations', type=Optional[Mapping[str, list[list[list[float]]]]], required=False, default=None)}

In [13]:
result.data[0].activations.keys()

dict_keys(['transformer.h.5', 'transformer.h.6', 'transformer.h.7', 'transformer.h.8', 'transformer.h.9'])

In [14]:
import torch
import json
import copy

In [19]:
for r in result.data:
    print("txt = ", r.generated_text)
    print("input_tokenized = ", r.input_tokenized)
    print("answer = ", r.answer)
    print('generated_tokens = ', r.generated_tokens)
    print("activations")
    for layer in r.activations:
        print(f"     {layer} : {torch.tensor(r.activations[layer]).shape}")

    print()
    

txt =  ["Michael Jordan plays the sport of basketball, but he's not a basketball player."]
input_tokenized =  [['Michael', 13256], [' Jordan', 8078], [' plays', 5341], [' the', 262], [' sport', 6332], [' of', 286]]
answer =  [Answer(top_token=' basketball', candidates=[Candidate(token=' basketball', token_id=9669, p=0.8828), Candidate(token=' football', token_id=4346, p=0.0157), Candidate(token=' golf', token_id=13126, p=0.0138), Candidate(token=' hoops', token_id=46730, p=0.0089), Candidate(token=' Basketball', token_id=25911, p=0.0056)])]
generated_tokens =  [[[{'token': ' basketball', 'id': 9669, 'p': 0.8828125}, {'token': ' football', 'id': 4346, 'p': 0.0156707763671875}, {'token': ' golf', 'id': 13126, 'p': 0.01383209228515625}, {'token': ' hoops', 'id': 46730, 'p': 0.00893402099609375}, {'token': ' Basketball', 'id': 25911, 'p': 0.005588531494140625}], [{'token': ',', 'id': 11, 'p': 0.11767578125}, {'token': ' with', 'id': 351, 'p': 0.09161376953125}, {'token': '.', 'id': 13, 'p'