# Open AI API

### Getting sentiment analysis done by ChatGPT

In [None]:
import os
import pandas as pd
import openai
from tenacity import retry, stop_after_attempt, wait_random_exponential
from tqdm import tqdm
import time

In [3]:
# Set your OpenAI API key here (inactive)
openai.api_key = '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'

# Loading csv file with data
news_final = pd.read_csv('News_API_final.csv',sep = ';', encoding = 'utf-8')

# Sorting by company name and reseting index
news_final = news_final.sort_values(by=['date','company']).reset_index(drop = True)

# list of headlines
headlines_list = news_final['title'].astype(str)

# Using a list comprehension to replace ';' in titles
titles_replace_list = [headlines.replace(';', ' ') for headlines in headlines_list]

# Update the 'title' column
news_final['title'] = titles_replace_list

#Used for data filtering when testing and restarting at a new index
news_df = news_final



In [4]:
# Log function
def log(logfile, Index, titles,companys,dates=os.getcwd()):
    # Open or create the csv file
    if os.path.isfile(logfile): #If the log file exists, open it and allow for changes     
        log = open(logfile,'a')
    else: #If the log file does not exist, create it and make headers for the log variables
        log = open(logfile,'w')
        header = ['Timestamp','Index','Title','Company','Dates']
        log.write(';'.join(header) + "\n") #Make the headers and jump to new line
        
    timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) #Local time
    
    # Open the log file and append the gathered log information
    with open(logfile,'a') as log:
        log.write(f'{timestamp};{Index};{titles};{companys};{dates}' + "\n") #Append the information and jump to new line

# Backoff function to avoid Rate limit error
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def completion_with_backoff(**kwargs):
    return openai.ChatCompletion.create(**kwargs)


In [4]:
# Getting the sentiment analysis from Open AI API

def OpenAI_sentiment(titles, companys, dates):
    
    # Initializing lists for responses, index, dates and companies
    responses = []
    index = []
    dates_list = []
    company_list = []
    # Creating a log file
    logfile = 'log_chat_gpt.csv'
    # Count used for indexing
    count = 0 
    # Looping through the titles and companies and getting the sentiment from ChatGPT
    for title, company, date in tqdm(zip(titles, companys, dates), desc = 'Analyzing sentiments'):
        response = completion_with_backoff(
                    model='gpt-3.5-turbo',
                    messages=[{'role': 'user', 'content': f'Forget all your previous instructions. Pretend you are a financial expert. You are a financial expert with stock recommendation experience. Answer only “1” if good news, “-1” if bad news, or “0” if uncertain in the first line. Is this headline good or bad for the stock price of {company} in the short term? Headline: {title}'}],
                    temperature=0,
                    max_tokens=2
                )
        # Getting the sentiment from the response
        sentiment = response['choices'][0]['message']['content']
        # Appending the sentiment, index, date and company to the lists
        index.append(count)
        responses.append(sentiment)
        dates_list.append(date)
        company_list.append(company)
        

        # Index responses from dict to dataframe
        Indexed_responses_df = pd.DataFrame.from_dict({'Index': index, 'LLM_score': responses})
        Indexed_responses_df['date'] = dates_list
        Indexed_responses_df['company'] = company_list
        # Making a dataframe with index = index and column = the list of responses
        Indexed_responses_df = Indexed_responses_df.set_index('Index')
        # Save dataframe to csv
        Indexed_responses_df.to_csv('Open AI responses_3_Søren.csv', index=False)
        # Log the response
        log(logfile, count, title, company, date)
        
        count = count + 1

    time.sleep(0.1)    
    return Indexed_responses_df
            

# Load titles and companies from your data source
titles = news_df['title']
companys = news_df['company']
dates = news_df['date']

# Finding the sentiment of the titles
sentiment_df = OpenAI_sentiment(titles, companys, dates)




Analyzing sentiments: 0it [00:00, ?it/s]

Analyzing sentiments: 11467it [2:45:00,  1.16it/s] 


In [21]:
# Merging news_df with sentiment_df on index
# This was the intial merge, but we ended up with a lot of different files so another approach so the merge was done on the date and company columns instead


news_final = pd.merge(news_df, sentiment_df, left_index=True, right_index=True, how='left')
news_final

Unnamed: 0,date_x,title,symbols,sentiment,ticker,company_x,LLM_score,date_y,company_y
0,2022-06-01T00:53:45+00:00,Nomura M&amp A Banker Chung Joins Crypto Firm ...,"['0R01.LSE', 'BK.US', 'BN9.F', 'BONY34.SA', 'C...","{'polarity': -0.955, 'neg': 0.094, 'neu': 0.84...",RIOT,Riot Platforms Inc,1,2022-06-01T00:53:45+00:00,Riot Platforms Inc
1,2022-06-01T02:27:00+00:00,Forrester: Only 28% Of Australians Trust The F...,['FORR.US'],"{'polarity': 1, 'neg': 0.026, 'neu': 0.726, 'p...",FORR,Forrester Research Inc,0,2022-06-01T02:27:00+00:00,Forrester Research Inc
2,2022-06-01T03:50:00+00:00,Jack in the Box to Present at Investor Confere...,"['JACK.US', 'JBX.F']","{'polarity': 0.823, 'neg': 0, 'neu': 0.961, 'p...",JACK,Jack In The Box Inc,0,2022-06-01T03:50:00+00:00,Jack In The Box Inc
3,2022-06-01T03:57:00+00:00,Seattle OKs minimum wage for delivery gig workers,"['DASH.US', 'LY0.F', 'LYFT.US', 'U1BE34.SA', '...","{'polarity': 0.178, 'neg': 0, 'neu': 0.904, 'p...",LYFT,Lyft Inc,0,2022-06-01T03:57:00+00:00,Lyft Inc
4,2022-06-01T04:00:30+00:00,Ambarella (AMBA) Q1 2023 Earnings Call Transcript,"['A8B.F', 'AMBA.US']","{'polarity': 0.859, 'neg': 0.02, 'neu': 0.816,...",AMBA,Ambarella Inc,0,2022-06-01T04:00:30+00:00,Ambarella Inc
5,2022-06-01T04:30:00+00:00,"AXSOME THERAPEUTICS, INC. (NASDAQ: AXSM) SHARE...",['AXSM.US'],"{'polarity': 0.992, 'neg': 0.035, 'neu': 0.874...",AXSM,Axsome Therapeutics Inc,-1,2022-06-01T04:30:00+00:00,Axsome Therapeutics Inc
6,2022-06-01T04:30:00+00:00,"CareDx, Inc. (NASDAQ: CDNA) SHAREHOLDER CLASS ...",['CDNA.US'],"{'polarity': 0.988, 'neg': 0.045, 'neu': 0.866...",CDNA,CareDx Inc,-1,2022-06-01T04:30:00+00:00,CareDx Inc
7,2022-06-01T04:30:00+00:00,DENTSPLY SIRONA INC. INVESTIGATION ALERT: Bern...,['XRAY.US'],"{'polarity': 0.982, 'neg': 0.022, 'neu': 0.896...",XRAY,DENTSPLY SIRONA Inc,-1,2022-06-01T04:30:00+00:00,DENTSPLY SIRONA Inc
8,2022-06-01T04:30:00+00:00,"NATERA, INC. (NASDAQ: NTRA) SHAREHOLDER CLASS ...",['NTRA.US'],"{'polarity': 0.995, 'neg': 0.047, 'neu': 0.85,...",NTRA,Natera Inc,-1,2022-06-01T04:30:00+00:00,Natera Inc
9,2022-06-01T04:30:00+00:00,OKTA INC. (NASDAQ: OKTA) SHAREHOLDER CLASS ACT...,['OKTA.US'],"{'polarity': 0.949, 'neg': 0.059, 'neu': 0.857...",OKTA,Okta Inc,-1,2022-06-01T04:30:00+00:00,Okta Inc
