In [12]:
# Import prerequisite libraries
import os
import requests
import tempfile
import json
import transformers
import torch
import pandas as pd
from transformers import AutoTokenizer
from transformers import LlamaModel, LlamaForQuestionAnswering, LlamaForCausalLM, LlamaTokenizer

In [13]:
N_FEATURES = 3
N_RESPONSES = 3
#EXPS = ["ames_housing_0.csv", "ames_housing_1.csv", "ames_housing_2.csv"]
#EXPS = ["mushroom_0.csv", "mushroom_1.csv", "mushroom_2.csv"]
#EXPS = ["cell_phone_churn_0.csv", "cell_phone_churn_1.csv", "cell_phone_churn_2.csv"]
EXPS = ["ames_housing_0.csv", ]
JSON_NAME = "sample.json"

exps = []
for exp in EXPS:
  exp_df = pd.read_csv(exp)
  exp_df = exp_df.sort_values(by="Contribution", key=abs, ascending=False)
  exps.append(exp_df.to_dict('records'))

try:
  with open(JSON_NAME, "r") as fp:
      save_json = json.load(fp)
except FileNotFoundError:
  save_json = {}

In [15]:
model_dir = "./llama-2-13b-chat-hf"
model = LlamaForCausalLM.from_pretrained(model_dir)
tokenizer_llama = LlamaTokenizer.from_pretrained(model_dir)

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [16]:
def parse_exp(exp, num_features=5, include_average=True):
  features = []
  if num_features is None:
    num_features = len(exp)
  for i in range(num_features):
    if include_average:
      features.append("({}, {}, {}, {})".format(exp[i]['Feature Name'].strip(),
                                                exp[i]['Feature Value'],
                                                exp[i]['Contribution'],
                                                exp[i]['Average/Mode']))
    else:
      features.append("({}, {}, {})".format(exp[i]['Feature Name'].strip(),
                                                exp[i]['Feature Value'].strip(),
                                                exp[i]['Contribution']))
  return ", ".join(features)

def show_responses(response, filename=None):
  f = None
  if filename is not None:
    f = open(filename, "w")
  for choice in response.choices:
    if f is not None:
      f.write(choice.message.content)
      f.write("\n")
    print(choice.message.content)
    print("\n")
  if f is not None:
    f.close()

In [35]:
prompt =  ("You are a helpful assistant. "
           "You are helping users understand an ML model's predictions. "
           "Do not use more tokens that necessary but make your answers sound natural."
           )
question =  ("Convert this feature contibution explanation, generated using SHAP, into a simple narrative. "
             "The explanation is presented in (feature, feature_value, contribution, average_feature_value) format: ")
explanation = parse_exp(exp, num_features=3)

In [18]:
pipeline_llama = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer_llama)
# torch_dtype=torch.float16,
# device_map="auto",
#)

In [31]:
def generate_answer_llama(question, explanation, pipeline=pipeline_llama, tokenizer=tokenizer_llama, json=None):

    input = f"""
        <<SYS>>
        {prompt}
        <</SYS>>
        [INST]
        User:{question}
        [/INST]\
        [INST]
        User:{explanation}
        [/INST]\n

        Assistant:
    """
    
    sequences = pipeline(
        input,
        do_sample=True,
        top_k=50,
        num_return_sequences=3,
        eos_token_id=tokenizer.eos_token_id,
        max_new_tokens=500,
        return_full_text=False,
        temperature=0.7,
    )
    
    for seq in sequences:
      generated_text = seq['generated_text']
      # Find the start of the assistant's answer and return only that part
      answer_start = generated_text.find("Assistant:") + len("Assistant:")
      response = generated_text[answer_start:].strip()
    
      if json is not None:
          if prompt not in json:
              json[prompt] = {}
          json[prompt][question] = response
          #pp_result(json, prompt, question)
      else:
          return response

def get_responses(response):
  responses = []
  for choice in response.choices:
    responses.append(choice.message.content)
  return responses
  
def pp_result(json, prompt, question):
  print("PROMPT: %s" % prompt)
  print("QUESTION: %s" % question)
  print("===")
  for response in json[prompt][question]:
    print(response)
    print("---")

In [32]:
    
# Example usage
results = generate_answer_llama(question, explanation, json=save_json)

print(results)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


None


In [33]:
with open(JSON_NAME, "w") as fp:
  json.dump(save_json, fp)

In [40]:
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM

## v2 models
#model_path = 'openlm-research/open_llama_3b_v2'
model_path = 'openlm-research/open_llama_7b_v2'

## v1 models
# model_path = 'openlm-research/open_llama_3b'
# model_path = 'openlm-research/open_llama_7b'
# model_path = 'openlm-research/open_llama_13b'

tokenizer = LlamaTokenizer.from_pretrained(model_path)
model = LlamaForCausalLM.from_pretrained(
    model_path, torch_dtype=torch.float16, device_map='auto',
)



tokenizer_config.json:   0%|          | 0.00/593 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/512k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/330 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

In [46]:
#prompt = 'Q: What is the largest animal?\nA:'
question =  ("Explain this feature contibution explanation into a simple natural language sentence. "
             "The explanation is presented in (feature, feature_value, contribution, average_feature_value) format: ")

input_ids = tokenizer(question+explanation, return_tensors="pt").input_ids

generation_output = model.generate(
    input_ids=input_ids, max_new_tokens=30
)
print(tokenizer.decode(generation_output[0]))


<s>Explain this feature contibution explanation into a simple natural language sentence. The explanation is presented in (feature, feature_value, contribution, average_feature_value) format: (Above grade (ground) living area square feet, 1256, -12527.462023188567, 1684.9), (Rates the overall material and finish of the house, 5, -10743.763013432692, 6.7), (Second floor square feet, 0, -10142.290455798697, 583.0)

# +
# %%capture
# !pip install -q git+https://github.com/davidsandberg/
