# Sentiment Analysis in the Financial Sector

### Using OpenAI Embeddings API


In [3]:
import openai
import pandas as pd
import numpy as np
from getpass import getpass

openai.api_key = getpass()

In [4]:
import pandas as pd
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

def document_to_dataframe(document):
    sentences = sent_tokenize(document)
    sentences = [sentence.replace('\n', '').replace('\t', '') for sentence in sentences]
    df = pd.DataFrame(sentences, columns=['Sentence'])
    return df

# Example usage:
document = '''
This is the first sentence. This is the second sentence. 
And this is the third sentence. Here's a fourth sentence.
'''

df = document_to_dataframe(document)
print(df)


                          Sentence
0      This is the first sentence.
1     This is the second sentence.
2  And this is the third sentence.
3        Here's a fourth sentence.


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\TJBil\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [6]:
from openai.embeddings_utils import get_embedding
from openai.embeddings_utils import cosine_similarity

df['embedding'] = df['Sentence'].apply(lambda x: get_embedding(x, engine='text-embedding-ada-002'))
df.to_csv('test.csv')

earnings_search = input("Search:")
earnings_search_vector = get_embedding(earnings_search, engine="text-embedding-ada-002")
earnings_search_vector

df['similarities'] = df['embedding'].apply(lambda x: cosine_similarity(x, earnings_search_vector))
df = df.sort_values('similarities', ascending=False)
top_3_items = df.head(3)
results = top_3_items['Sentence'].tolist()
results = [str(element) for element in results]


results


["Here's a fourth sentence.",
 'This is the first sentence.',
 'This is the second sentence.']