In [1]:
# reload magic
%load_ext autoreload
%autoreload 2

In [11]:
import openai
from openai import OpenAI

with open('OPENAI_API_KEY', 'r') as f:
    OPENAI_API_KEY = f.read().strip()
client = OpenAI(api_key=OPENAI_API_KEY)

In [3]:
# Import deterministic.py using local file path
import sys
import numpy as np
import matplotlib.pyplot as plt

sys.path.append('../sequence_generators')
import deterministic


In [132]:

def make_sequence_sets(length, n_train, n_data, p_bitflip=0.0):
    sequence_sets = []
    for i in range(n_data):
        gen = deterministic.SequenceGen(lookback=4, seed=228+i, number_of_generating_methods=1)
        data, generating_func = gen.deterministically_generate_sequences(length=length, num_seq=1, save=False)
        train_data = data[0][:n_train]
        if p_bitflip > 0:
            mask = np.random.choice([0, 1], size=(len(train_data),), p=[1-p_bitflip, p_bitflip]).astype(np.uint8)
            train_data = np.array(train_data, dtype=np.uint) ^ mask
        training_string = " ".join(train_data.astype(str))
        test_string = " ".join(data[0][n_train:])
        sequence_sets.append((training_string, test_string, generating_func[0]))

    return sequence_sets

In [180]:
def sample_completions(model, input_str, steps, num_samples, noisy=False, logprobs=True, top_logprobs=5, temp=None, logit_bias=None, **kwargs):
    ''' Sample completions from GPT-3
    Args:
        input_str: input sequence as a string
        steps: number of steps to predict
        num_samples: number of samples to return
        temp: temperature for sampling
        prompt: additional prompt before the input string
        model: name of GPT-3 model to use
    Returns:
        list of completion strings
    
    https://github.com/ngruver/llmtime/blob/main/models/promptcast.py
    '''
    trick_token_count = 1000 # this is a trick to get the model to attempt to predict a large number of tokens, which we truncate with the api
    chatgpt_sys_message = f"""You are a helpful assistant that predicts the next bit. The user will provide a sequence containing ONLY 0 or 1, 
                             and you will predict the next {trick_token_count} digits that come next. The sequence is represented by only digits 0 or 1 separated by spaces, NO COMMAS.
                             The data may be noisy, in which case you should predict the most likely sequence."""
    
    extra_input = """Please continue the following sequence with only digits 0 or 1 separated by only spaces. Do not produce any additional text. Do not include commas. 
                     Do not say anything like 'the next terms in the sequence are', just return the numbers. """
    noisy_prompt = """This data has been generated with some bitflip noise. Predict the most likely sequence WITHOUT NOISE. """
    if noisy:
        extra_input = extra_input + noisy_prompt
    extra_input = extra_input + "Sequence:\n"

    if model in ['gpt-3.5-turbo','gpt-4']:
        chatgpt_sys_message = chatgpt_sys_message
        extra_input = extra_input
        
        response = client.chat.completions.create(
            model=model,
            messages=[
                    {"role": "system", "content": chatgpt_sys_message},
                    {"role": "user", "content": extra_input+input_str}
                ],
            max_tokens=int(steps), 
            temperature=temp,
            logit_bias=logit_bias,
            n=num_samples,
            logprobs=logprobs,
            top_logprobs=top_logprobs,
            **kwargs
        )
        return response


In [204]:
def best_shifted_acc(preds, truth, bounds=(-4, 4)):
    """Since GPT starts in the wrong place sometimes, we will be generous and look for the best alignment between its predicted string and the test string.
    if we have to move by m places, we will discard m of the string during evaluation.
    """
    best_acc = 0
    best_shift = 0
    preds = np.array(preds)
    truth = np.array(truth)
    assert len(preds) == len(truth)
    for shift in range(bounds[0], bounds[1]):
        # minus sign means we shift 
        if shift < 0:
            x = preds[abs(shift):] # shifted preds
            y = truth[0:len(x)] # truncated truth
        elif shift > 0:
            x = truth[shift:] # shifted truth
            y = preds[0:len(x)] # truncated preds
        else:
            x = preds
            y = truth

        acc = 1 - np.mean(abs(x - y))
        if acc > best_acc:
            best_acc = acc
            best_shift = shift
    return best_acc, best_shift

In [205]:
preds = [0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0]
truth = [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1]
print(best_shifted_acc(preds, truth))

(0.9285714285714286, -2)


#### Experiment 1

Each digit gets its own token (with a space getting another token between each). We will use deterministic data only, and ask GPT to learn to extend a single sequence that we generate.

 - no noise
 - single input, space-separated bits
 - 20 trials, for 20 different generating functions
 
 **Succeeded** - the model had no problem doing a 3-lookback sequence extension. One issue is that these data are very repetitive.

In [206]:
## Generate a single-generating-method dataset

# For the first attempt, we will have one really long sequence, 
# and gpt will try to predict the final bit, then we will average
# its performance over some large number of evaluations
# TODO: hyperparameter tuning with temperature, other gpt-3 parameters from [1]

N_BITS=100
n_train = 80
n_test = N_BITS - n_train
n_data=20


sequence_sets = make_sequence_sets(N_BITS, n_train, n_data, p_bitflip=0.0)

In [209]:
logprobs = 4
model = 'gpt-3.5-turbo'
steps = n_test*2 - 1

completions = []
for training_string, test_string, generating_func in sequence_sets:
    completion = sample_completions(model, training_string, steps, 1, logprobs=True, top_logprobs=5, temp=1, logit_bias=None)
    completions.append( completion )

In [214]:
for i in range(n_data):
    completion = completions[i]
    preds = np.array([int(x) for x in completion.choices[0].message.content.split()])
    truth = np.array([int(x) for x in sequence_sets[i][1].split()])
    acc, shift = best_shifted_acc(preds, truth)
    print(f"Accuracy: {acc}, Shift: {shift}")
    print(f"Predicted: {preds}")
    print(f"Truth:     {truth}")
    print("")

Accuracy: 1.0, Shift: -1
Predicted: [0 1 1 0 0 1 1 1 0 0 1 1 1 0 0 1 1 1 0 0]
Truth:     [1 1 0 0 1 1 1 0 0 1 1 1 0 0 1 1 1 0 0 1]

Accuracy: 1.0, Shift: -1
Predicted: [1 1 0 0 0 1 1 0 0 0 1 1 0 0 0 1 1 0 0 0]
Truth:     [1 0 0 0 1 1 0 0 0 1 1 0 0 0 1 1 0 0 0 1]

Accuracy: 1.0, Shift: 0
Predicted: [1 1 1 0 1 1 1 1 0 1 1 1 1 0 1 1 1 1 0 1]
Truth:     [1 1 1 0 1 1 1 1 0 1 1 1 1 0 1 1 1 1 0 1]

Accuracy: 0.95, Shift: 0
Predicted: [1 0 1 0 1 0 0 1 0 1 0 0 1 0 1 0 0 1 0 1]
Truth:     [0 0 1 0 1 0 0 1 0 1 0 0 1 0 1 0 0 1 0 1]

Accuracy: 1.0, Shift: -4
Predicted: [0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Truth:     [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]

Accuracy: 1.0, Shift: -4
Predicted: [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Truth:     [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Accuracy: 1.0, Shift: -1
Predicted: [1 0 1 0 0 1 0 1 0 0 1 0 1 0 0 1 0 1 0 0]
Truth:     [0 1 0 0 1 0 1 0 0 1 0 1 0 0 1 0 1 0 0 1]

Accuracy: 1.0, Shift: -2
Predicted: [1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0]

### Experiment 2: Add bitflip noise



In [221]:
n_bits = 500 # need more because of bitflip chance...
n_train = 400
n_test = n_bits - n_train
n_data = 20
p_bitflip = 0.05
noisy_sequence_sets = make_sequence_sets(n_bits, n_train, n_data, p_bitflip=p_bitflip)

In [222]:
logprobs = 4
model = 'gpt-3.5-turbo'
steps = n_test*2 - 1

noisy_completions = []
for training_string, test_string, generating_func in noisy_sequence_sets:
    completion = sample_completions(model, training_string, steps, 1, logprobs=True, top_logprobs=5, temp=1, logit_bias=None, noisy=True)
    noisy_completions.append( completion )

In [223]:
for i in range(n_data):
    completion = noisy_completions[i]
    preds = np.array([int(x) for x in completion.choices[0].message.content.split()])
    truth = np.array([int(x) for x in noisy_sequence_sets[i][1].split()])
    acc, shift = best_shifted_acc(preds, truth)
    print(f"Accuracy: {acc}, Shift: {shift}")
    predstr = "".join([str(x) for x in preds])
    truthstr = "".join([str(x) for x in truth])
    print(f"Predicted: {predstr}")
    print(f"Truth:     {truthstr}")
    print("")

Accuracy: 0.7604166666666666, Shift: -4
Predicted: 1000111001110011100111001110011100111001110011111110011100111001100011000110001110111110111001110011
Truth:     1100111001110011100111001110011100111001110011100111001110011100111001110011100111001110011100111001

Accuracy: 1.0, Shift: -1
Predicted: 1100011000110001100011000110001100011000110001100011000110001100011000110001100011000110001100011000
Truth:     1000110001100011000110001100011000110001100011000110001100011000110001100011000110001100011000110001

Accuracy: 0.9299999999999999, Shift: 0
Predicted: 1110011101111011010111101111011111011001111011110111101111011110111101111011110110101111111110111101
Truth:     1110111101111011110111101111011110111101111011110111101111011110111101111011110111101111011110111101

Accuracy: 0.9696969696969697, Shift: -1
Predicted: 0101001010100101001010010100101001010010100101001010010100101001010010100101001010010100101001010010
Truth:     0010100101001010010100101001010010100101001010010100101001