In [1]:
import tweepy, openai, json,  re, random, datetime as dt

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    import tiktoken
    encoding = tiktoken.encoding_for_model(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

## Twitter api below

In [33]:
# retrieve twitter api keys from local json file
with open('..\\twitter_config.json') as f:
    creds = json.load(f)

bearer_token = creds['bearer_token']
consumer_key = creds['consumer_key']
consumer_secret = creds['consumer_secret']
access_token = creds['access_token']
access_token_secret = creds['access_token_secret']

# open instance of twitter api and get authorized access
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

# twitter handles of relevant news orgs to compile the news
screen_names = ['markets', 'WSJmarkets', 'CNBCnow', 'FTMarkets', 'MarketWatch', 'MarketCurrents'] 
responses = []
for sn in screen_names: # loops through screen_names list and appends tweepy objects of returned tweets for each entry to responses
    responses.append(
        api.user_timeline(
            since = None,
            screen_name = sn,
            count = 50,
            include_rts = False,
            tweet_mode = 'extended')
    )

start_date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1/3) # start date for tweets for the last 8 hours
end_date = dt.datetime.now(dt.timezone.utc) # grabs the UTC time for right now

text_list = [] # empty list in which to place tweet text
# this loop filters out older tweets and cleans tweets of http links
for res in responses:
    for i in range(0, len(res)):
        if res[i].created_at < end_date and res[i].created_at > start_date: # limit tweets to those in last hours/days specified above
            text_list.append( # appends tweet text to text_list
                re.sub(r'http\S+', '', res[i]._json['full_text']).strip() # uses regular expression to cut out http links
            )
random.shuffle(text_list) # randomizes text_list list of tweets
tweet_text = '\n'.join(text_list) # creates a single text string to deliver to GPT


## Putting the prompt together for OpenAI to digest

In [46]:
prompt1 = ' '.join('''
write a limerick from the tweets below
'''.split())

breaker1 = '''\n"""\n'''

prompt2 = tweet_text

prompt = prompt1 + breaker1 + prompt2 + breaker1 
num_tokens_from_string(prompt, "text-davinci-003")

3195

## Sending the prompt to OpenAI to do its work

In [58]:
# running this block costs money
with open('.\openai_key.json', 'r') as j:
     contents = json.loads(j.read())
openai.api_key = contents['openai_key']

n = 5
response = openai.Completion.create(
                                    model="text-davinci-003",
                                    prompt = prompt,# 
                                    suffix = None, # suffix that comes after a completion of inserted text
                                    max_tokens = 4096 - num_tokens_from_string(prompt, "text-davinci-003"), #
                                    temperature = .9, # the lower the more determininistic (less variability in) the model
                                    top_p = 1, # an alternative to sampling with temperature
                                    n = n, # how many completions to generate for each prompt
                                    frequency_penalty = 1.0,
                                    presence_penalty = 0.0,
                                   )
for i in range(n):

    print(response['choices'][i]['text'])
    print(len(response['choices'][i]['text'].split()))

In [59]:
# this line will send out the selected limerick as a tweet and tag the news orgs from which the source tweets were pulled
api.update_status(response['choices'][1]['text'] +'\n'+ '\n'.join(['@'+x for x in screen_names]))