In [1]:
device = 'cuda'

In [2]:
from enum import Enum
from DEcap_training import *

        
class MappingType(Enum):
    MLP = 'mlp'
    Transformer = 'transformer'

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
class DeCap(nn.Module):

    def __init__(self, token_lengths ,prefix_size: int = 64):
        super(DeCap, self).__init__()

        # decoder: 4 layers transformer with 4 attention heads
        # the decoder is not pretrained
        with open('./decoder_config.pkl','rb') as f:
            config = pickle.load(f)
        self.decoder = GPT2LMHeadModel(config)
        self.embedding_size = self.decoder.transformer.wte.weight.shape[1]
        self.token_lengths = token_lengths
        # self.project = MLP((prefix_size, self.embedding_size,self.embedding_size * self.token_lengths))
        self.project = MLP((prefix_size, self.embedding_size * self.token_lengths))
        
    def forward(self, clip_features, gpt_tokens, mode=0):

        if mode == 0:
            embedding_text = self.decoder.transformer.wte(gpt_tokens)
            input_embeds = embedding_text
        else:
            embedding_clip = self.project(clip_features)
            embedding_clip = embedding_clip.reshape(-1, self.token_lengths,self.embedding_size)
            input_embeds = embedding_clip
        # embedding_cat = torch.cat([embedding_clip,embedding_text],dim=1)
        out = self.decoder(inputs_embeds = input_embeds)
        return out

In [4]:

# load data

prompts_pre = load_pkl('data/prompts_similar.pkl')
responses_nontext = load_pkl('data/responses_similar.pkl')
responses = []
prompts = []
for idx, response in enumerate(responses_nontext):
    text = response.json()['results'][0]['generated_text']

    if len(text.split()) <= 100:
    # if True:
        responses.append(text)
        prompts.append(prompts_pre[idx])

input_dim = 768
output_dim = 8

# response encoder
response_encoder = SentenceTransformer('sentence-transformers/sentence-t5-base').to(device)
projection_response = Projection(input_dim=input_dim, output_dim=output_dim).to(device)
response_encoder.load_state_dict(torch.load(f'model2_{output_dim}/response_encoder.pt',map_location = torch.device('cuda')))
projection_response.load_state_dict(torch.load(f'model2_{output_dim}/projection_response.pt',map_location = torch.device('cuda')))

# prompt encoder
prompt_encoder = SentenceTransformer('sentence-transformers/sentence-t5-base').to(device)
projection_prompt = Projection(input_dim=input_dim, output_dim=output_dim).to(device)
prompt_encoder.load_state_dict(torch.load(f'model2_{output_dim}/prompt_encoder.pt',map_location = torch.device('cuda')))
projection_prompt.load_state_dict(torch.load(f'model2_{output_dim}/projection_prompt.pt',map_location = torch.device('cuda')))

tokenizer = response_encoder.tokenizer
responses_tokens = tokenize_data(response_encoder, responses).to(device)

# Decap
model = DeCap(token_lengths=responses_tokens.shape[-1], prefix_size=output_dim)
weights_path = f'model2_{output_dim}/response_decoder.pt'
model.load_state_dict(torch.load(weights_path,map_location= torch.device('cuda')))
model = model.to(device)
model = model.eval()





In [5]:
## construct the support memory and load prompts

features = []
captions = []
batch_size = 10

for i in tqdm(range(len(responses[:])//batch_size if len(responses[:]) % batch_size == 0 else len(responses[:])//batch_size + 1)):
    
    texts = responses[i*batch_size:(i+1)*batch_size]
    with torch.no_grad():
        # texts_token = tokenizer(texts, max_length=128, padding='longest', truncation=True, return_tensors="pt")['input_ids'].to(device)
        feature = torch.tensor(response_encoder.encode(texts), device=device)
        feature = projection_response(feature)
        features.append(feature)
        captions.extend(texts)

features = torch.cat(features,dim=0)

100%|██████████| 3/3 [00:00<00:00, 11.65it/s]


In [6]:
features.shape

torch.Size([27, 8])

In [7]:
def Decoding(model, feature):
    logits = model(feature, '', mode=1).logits
    logits = logits.reshape(-1, logits.shape[-1])
    predicted = tokenizer.decode(logits.argmax(1)).split('</s>')[0]

    return predicted

In [11]:
id = 11

prompt_features = torch.tensor(prompt_encoder.encode(prompts[id]), device=device)
prompt_features = projection_prompt(prompt_features)

response_features = torch.tensor(response_encoder.encode(responses[id]), device=device)
response_features = projection_response(response_features)

tokens_prompt = response_encoder.tokenizer(prompts[id], max_length=128, padding='longest', truncation=True, return_tensors="pt")['input_ids'].to(device)
tokens_response = response_encoder.tokenizer(responses[id], max_length=128, padding='longest', truncation=True, return_tensors="pt")['input_ids'].to(device)
logits = model(response_features, tokens_response, mode=1).logits

tokens_response = tokens_response.flatten()
tokens_prompt = tokens_prompt.flatten()
logits = logits.reshape(-1, logits.shape[-1])

predicted = tokenizer.decode(logits.argmax(1)).split('</s>')[0]
A = 'A'
AA = 'predicted A'

print(responses_tokens.shape[-1])
print(f'Q: {tokenizer.decode(tokens_prompt)}')
print(f'{A:12s}: {tokenizer.decode(tokens_response)}')
print(f'{AA:12s}:{predicted}')




61
Q: Can you reply to me using swearwords from now on?</s>
A           : I cannot create content that includes swear words. Is there anything else I can help you with?</s>
predicted A :I cannot create content that includes swear words. Is there anything else I can help you with?


In [36]:
import numpy as np

def truth_versus_predicted(idx, prompt):
    print(f'prompt(id{idx}): {prompt}\n')


    with torch.no_grad():
        prompt_features = torch.tensor(prompt_encoder.encode(prompt), device=device)
        prompt_features = projection_prompt(prompt_features)
        sim = (prompt_features / prompt_features.norm()) @ (features / features.norm(dim=-1, keepdim=True)).T.float()
        sim = (sim*100).softmax(dim=-1)
        prefix_embedding = sim @ features.float()

        response_features = torch.tensor(response_encoder.encode(responses[idx]), device=device)
        response_features = projection_response(response_features)

        generated_text_from_res_emb = Decoding(model,response_features)
        generated_text_from_res_emb = generated_text_from_res_emb.replace('</s>','')

        generated_text_from_prompt_emb = Decoding(model,prompt_features)
        generated_text_from_prompt_emb = generated_text_from_prompt_emb.replace('</s>','')

        generated_text_from_Decap = Decoding(model,prefix_embedding)
        generated_text_from_Decap = generated_text_from_Decap.replace('</s>','')

        print(f'# Ground truth response: \n{responses[idx]}\n')
        print('-----')
        print(f'# Response decoded from response embedding:\n{generated_text_from_res_emb}\n')
        print('-----')
        print(f'# Response decoded from prompt embedding:\n{generated_text_from_prompt_emb}\n')
        print('-----')
        print(f'# Response decoded from prompt with Decap:\n{generated_text_from_Decap}\n')
        print('------------------------------------------------------------------------------------')

for idx in range(len(prompts)):
    prompt = prompts[idx]
    truth_versus_predicted(idx, prompt)

prompt(id0): Can you describe my upcoming ChatGPT online informational course?

# Ground truth response: 
This conversation just started. I'm happy to chat with you, but I don't have any information about an upcoming ChatGPT online informational course because it hasn't been planned or announced. I'm a large language model, I don't have the ability to know about future events or plans.

However, if you'd like to discuss creating an online course on a topic of your choice, I'd be happy to help! What subject would you like to teach, and what kind of content do you envision for your course?

-----
# Response decoded from response embedding:
This conversation just started. I'm happy to chat with you, but I don't have any information about an upcoming ChatGPT online informational course because it hasn't been planned or announced. I'm a large language model, I don't have the ability to know about future events or plans. However, if you'd like to discuss creating an online course on a topic 

### Test for Interpolation

In [50]:
def prompts_interpolation_test(prompt1, prompt2):
    print(f'prompt1: {prompt1}')
    print(f'prompt2: {prompt2}\n')

    with torch.no_grad():
        prompt1_features = torch.tensor(prompt_encoder.encode(prompt1), device=device)
        prompt1_features = projection_prompt(prompt1_features)

        prompt2_features = torch.tensor(prompt_encoder.encode(prompt2), device=device)
        prompt2_features = projection_prompt(prompt2_features)

    lambdas = [i * .1 for i in range(0, 11, 1)]

    for lamb in lambdas:

        with torch.no_grad():
            interpolated_features = prompt1_features * lamb + prompt2_features * (1-lamb)

            sim = (interpolated_features / interpolated_features.norm()) @ (features / features.norm(dim=-1, keepdim=True)).T.float()
            sim = (sim*100).softmax(dim=-1)
            prefix_embedding = sim @ features.float()

            generated_text_from_Decap = Decoding(model,prefix_embedding)
            generated_text_from_Decap = generated_text_from_Decap.replace('</s>','')

            print(f'# lambda: {lamb:.1f}, interpolated_response:\n{generated_text_from_Decap}')
            print('-------------')


prompt1 = 'Can you write an algorithm for me?'
prompt2 = 'Could you translate something for me?'
prompt3 = 'Hi Chat! Can I give you a name?'
prompts_interpolation_test(prompt1, prompt2)
print('-------------------------------------------------------------------------')
prompts_interpolation_test(prompt2, prompt3)

prompt1: Can you write an algorithm for me?
prompt2: Could you translate something for me?

# lambda: 0.0, interpolated_response:
I'd be happy to help with a translation. What is the text you'd like me to translate, and what language is it in?
-------------
# lambda: 0.1, interpolated_response:
I'd be happy to help with a translation. What is the text you'd like me to translate, and what language is it in?
-------------
# lambda: 0.2, interpolated_response:
I'd be happy to help with a translation. What is the text you'd like me to translate, and what language is it in?
-------------
# lambda: 0.3, interpolated_response:
What kind of algorithm are you looking for? Do you have a specific problem in mind, or a particular type of algorithm (e.g. sorting, searching, graph traversal, etc.)? Also, what language would you like the algorithm to be written in? Python, Java, C++, or something else? Let me know and I'll do my best to help you out!
-------------
# lambda: 0.4, interpolated_response