In [21]:
!pip install openai -q

# Imports

In [22]:
from google.colab import userdata
from openai import OpenAI

from google.colab import drive

from tqdm.auto import tqdm

import pandas as pd

# Get all the required connections to drive and OpenAI

In [23]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
client = OpenAI(api_key=OPENAI_API_KEY, max_retries=5)

GPT-3.5 (Just for testing purposes)

1711835043 news complete

1711835043 twitter complete

GPT-4

1711835043 news complete

1711835043 twitter complete

1711835043 mix complete



# Setup notebook variables

In [31]:
model = "gpt-4" # "gpt-3.5-turbo-0125" or "gpt-4"
model_name = model.replace('.','').replace('-','_')
filestamp = "1711835043" # Select the timestamp of the latest version
source = "twitter" # twitter or news or mix
train_path = f'/content/drive/MyDrive/Thesis data/relevant_selection/{filestamp}_{source}_train.json'
test_path = f'/content/drive/MyDrive/Thesis data/relevant_selection/{filestamp}_{source}_test.json'
result_path = f'/content/drive/MyDrive/Thesis data/gpt_results/{filestamp}_{source}_{model_name}.csv'

# Test whether the API connection is working

In [6]:
completion = client.chat.completions.create(
  model="gpt-3.5-turbo-0125",
  max_tokens=1,
  messages=[
    {"role": "user", "content": 'Given the tweet in json format below, please indicate whether the price of the stock with ticker "spx" will go up or down tomorrow. Only answer with UP or DOWN. \n {Tweet: "$SPY $SPX $ES_F Rare but it seems the most likely now. ABC flat off of the (.236) $397 gap supply. I’ll be looking for a pop off fed minutes tomorrow to create a double top before continuation lower into June.\ I’m still thinking $NVDA will cause the next tech sell off. > [@ATMSnipes](https://twitter.com/ATMSnipes):> $SPY $SPX $ES_Fn Local bottom🎯 Local top🎯 The wedge is becoming clear but not finished just yet. This week looks like a chop fest waiting to happen before another impulse down. $392-$395 supply will be resistance $484, $480 should act as support"}'}
  ]
)

completion.choices[0].message.content

'DOWN'

# Load the relevant datasets for testing

In [26]:
test = pd.read_json(test_path)
train = pd.read_json(train_path)

# Create two shot prompts

In [8]:
up = train[train['result_1'] == 'UP']
down = train[train['result_1'] == 'DOWN']

In [9]:
def get_shots(data, positive, negative):
  twoshot_prompts = []
  twoshot_tokens = []
  for _, row in data.iterrows():
    pos = positive.sample(n=1)
    neg = negative.sample(n=1)
    positive_sample = pos.prompt.iloc[0] + pos.result_1.iloc[0]
    negative_sample = neg.prompt.iloc[0] + neg.result_1.iloc[0]
    zeroshot = row.prompt
    twoshot = positive_sample + negative_sample + zeroshot
    twoshot_prompts.append(twoshot)

    positive_count = pos.token_count.iloc[0]
    negative_count = neg.token_count.iloc[0]
    zeroshot_count = row.token_count

    twoshot_tokens.append(positive_count + negative_count + zeroshot_count)

  return twoshot_prompts, twoshot_tokens



In [10]:
test['two_shot'], test['two_shot_token_count'] = get_shots(test, up, down)

In [11]:
test['two_shot'][0]

'Instruct: Given the tweet below, please indicate whether the price of the stock with ticker "TSLA" will go up or down tomorrow. Only answer with UP or DOWN. \n Tweet: "🚨NEW #BREAKFASTRADES MORNING SCALP VIDEO:\n\nIn this video I have a PERFECTLY executed pattern trade on a $TSLA short\n\nEntry trigger: Pattern Breakdown Confirmation\nTotal position size: $125,000\nTrade duration: 3 minutes\nProfit: $625\n\nWatch it here 👇\nhttps://youtu.be/nhnKfbXSuLs" \n\nOutput:UPInstruct: Given the tweet below, please indicate whether the price of the stock with ticker "BHP" will go up or down tomorrow. Only answer with UP or DOWN. \n Tweet: "Great chart. Iron Ore may be names deserving of a closer look…\n\n$BHP $RIO $VALE $CLF $EWA $EWZ \n\n> [@daChartLife](https://twitter.com/daChartLife):\n> Yo, Charts - Here’s the relative in more detail. Any thoughts?" \n\nOutput:DOWNInstruct: Given the tweet below, please indicate whether the price of the stock with ticker "MARA" will go up or down tomorrow. 

# Get indication of tokens send to OpenAI

In [12]:
total_tokens = test.token_count.sum() + test.two_shot_token_count.sum()
if model == "gpt-4":
  cost = 1.1*30*total_tokens/1000000
else:
  cost = 1.1*.5*total_tokens/1000000

In [13]:
print(f'Zeroshot tokens: {test.token_count.sum()}, Twoshot tokens: {test.two_shot_token_count.sum()}, cost indication: {cost}')

Zeroshot tokens: 84972, Twoshot tokens: 252361, cost indication: 11.131989


# Function for the get request to OpenAI

In [46]:
def get_OPENAI(data, model_version, shot):
  responses = []
  for index, row in tqdm(data.iterrows(), total=len(data)):
    completion = client.chat.completions.create(
      model=model_version,
      seed = 42,
      max_tokens=1,
      messages=[
        {"role": "user", "content": row[shot]}
      ]
    )
    responses.append(completion.choices[0].message.content)
  return responses

# Zero shot responses

In [15]:
result = get_OPENAI(test, model, 'prompt')

  0%|          | 0/200 [00:00<?, ?it/s]

In [16]:
result_df = pd.DataFrame(result, columns=['zero_shot'])

# Two shot responses

In [17]:
result = get_OPENAI(test, model, 'two_shot')

  0%|          | 0/200 [00:00<?, ?it/s]

In [18]:
result_df[f'two_shot'] = result

In [19]:
result_df

Unnamed: 0,zero_shot,two_shot
0,UP,UP
1,UP,UP
2,UP,UP
3,DOWN,DOWN
4,The,UP
...,...,...
195,UP,UP
196,UP,UP
197,The,UP
198,DOWN,DOWN


In [20]:
result_df.to_csv(result_path)