In [1]:
!pip install faiss-cpu openai pandas numpy

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [26]:
import openai
import pandas as pd
import numpy as np
import faiss

# Set API key
openai.api_key = OPENAI_API_KEY

client = openai.OpenAI(api_key=OPENAI_API_KEY)

In [27]:
# Sample Bloomberg and Refinitiv data
bloomberg_data = pd.DataFrame({
    "bloomberg_element": ["PX_LAST", "PX_OPEN", "VOLUME"],
    "description": ["Closing price", "Opening price", "Trading volume"]
})

refinitiv_data = pd.DataFrame({
    "refinitiv_element": ["Close_Price", "Open_Price", "Trade_Vol"],
    "description": ["End of day closing price", "First price of the day", "Total traded volume"]
})

In [28]:
bloomberg_data

Unnamed: 0,bloomberg_element,description
0,PX_LAST,Closing price
1,PX_OPEN,Opening price
2,VOLUME,Trading volume


In [29]:
refinitiv_data

Unnamed: 0,refinitiv_element,description
0,Close_Price,End of day closing price
1,Open_Price,First price of the day
2,Trade_Vol,Total traded volume


In [30]:
# Generate embedding
def get_embedding(text):
    response = client.embeddings.create(input=[text], model="text-embedding-ada-002")
    return np.array(response.data[0].embedding)
  # Returns the embedding vector


In [31]:
# Generate embeddings for Refinitiv data
refinitiv_data["embedding"] = refinitiv_data["description"].apply(get_embedding)
refinitiv_vectors = np.vstack(refinitiv_data["embedding"].values)

In [32]:
refinitiv_data

Unnamed: 0,refinitiv_element,description,embedding
0,Close_Price,End of day closing price,"[-0.005830149166285992, 0.004738675430417061, ..."
1,Open_Price,First price of the day,"[-0.009062401950359344, 0.015450868755578995, ..."
2,Trade_Vol,Total traded volume,"[-0.0039274245500564575, -0.037334274500608444..."


In [33]:
# Index with FAISS
dimension = refinitiv_vectors.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(refinitiv_vectors)

In [48]:
index

<faiss.swigfaiss_avx2.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x7dd56719f0c0> >

In [34]:
refinitiv_vectors

array([[-0.00583015,  0.00473868, -0.00267327, ...,  0.00976617,
         0.01436044, -0.03968263],
       [-0.0090624 ,  0.01545087, -0.01530598, ..., -0.01837507,
         0.00134849, -0.0080745 ],
       [-0.00392742, -0.03733427,  0.00921147, ..., -0.02146178,
        -0.01308463, -0.03207057]])

In [35]:
dimension


1536

In [36]:
index

<faiss.swigfaiss_avx2.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x7dd56719f0c0> >

In [37]:
# Find matching Refinitiv element
def find_matching_refinitiv_element(bloomberg_query):
    query_embedding = get_embedding(bloomberg_query).reshape(1, -1)
    _, indices = index.search(query_embedding, k=3)
    matches = refinitiv_data.iloc[indices[0]]

    prompt = f"""
    Bloomberg element: {bloomberg_query}
    Possible Refinitiv matches:
    {matches[["refinitiv_element", "description"]].to_string(index=False)}

    Which is the best match and why?
    """

    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "system", "content": "You are a financial data expert."},
                  {"role": "user", "content": prompt}]
    )

    return response.choices[0].message.content, matches

In [38]:
# Example Query
query = "Closing price"
match_explanation, matches = find_matching_refinitiv_element(query)
print(match_explanation)
print(matches)

The best match for the Bloomberg element 'Closing price' in Refinitiv is 'Close_Price' as it represents the 'End of day closing price'. These two elements both refer to the final price at which a security is traded at the end of the trading day. The other options, 'Open_Price' and 'Trade_Vol', represent different data points- 'Open_Price' represents the first price of the day and 'Trade_Vol' represents the total traded volume, neither of which matches the 'Closing price' concept.
  refinitiv_element               description  \
0       Close_Price  End of day closing price   
1        Open_Price    First price of the day   
2         Trade_Vol       Total traded volume   

                                           embedding  
0  [-0.005830149166285992, 0.004738675430417061, ...  
1  [-0.009062401950359344, 0.015450868755578995, ...  
2  [-0.0039274245500564575, -0.037334274500608444...  


In [39]:
# Example Query
query = "Trading volume"

In [41]:
query_embedding = get_embedding(query)  #.reshape(1, -1)
query_embedding

array([-0.00421029, -0.04482995,  0.00745614, ..., -0.02281405,
       -0.00267051, -0.02838787])

In [46]:
query_embedding = query_embedding.reshape(1, -1)

In [47]:
 _, indices = index.search(query_embedding, k=3)

In [50]:
matches = refinitiv_data.iloc[indices[0]]

In [49]:
indices[0]

array([2, 0, 1])

In [51]:
matches

Unnamed: 0,refinitiv_element,description,embedding
2,Trade_Vol,Total traded volume,"[-0.0039274245500564575, -0.037334274500608444..."
0,Close_Price,End of day closing price,"[-0.005830149166285992, 0.004738675430417061, ..."
1,Open_Price,First price of the day,"[-0.009062401950359344, 0.015450868755578995, ..."


In [62]:
prompt = f"""
    Bloomberg element: {query}
    Possible Refinitiv matches:
    {matches[["refinitiv_element", "description"]].to_string(index=False)}

    Which is the best match and why?
    Write Secnd and thrid best also in proper formatted  way.
    """

In [54]:
prompt

'\n    Bloomberg element: Trading volume\n    Possible Refinitiv matches:\n    refinitiv_element              description\n        Trade_Vol      Total traded volume\n      Close_Price End of day closing price\n       Open_Price   First price of the day\n    \n    Which is the best match and why?\n    '

In [63]:
 response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "system", "content": "You are a financial data expert."},
                  {"role": "user", "content": prompt}]
    )

In [56]:
response

ChatCompletion(id='chatcmpl-Avws4nrdKSfn50CIM5lGkbGPhdXg6', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The best match for Bloomberg\'s "Trading Volume" is Refinitiv\'s "Trade_Vol". It refers to the total volume of shares traded within a specified period. This is because both elements have to do with the number of shares or contracts that are being traded, rather than the price at which they are being traded.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1738374360, model='gpt-4-0613', object='chat.completion', service_tier='default', system_fingerprint=None, usage=CompletionUsage(completion_tokens=66, prompt_tokens=79, total_tokens=145, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [64]:
response.choices[0].message.content  # Extract response text

'The best match for Bloomberg\'s "Trading Volume" from the given Refinitiv elements is "Trade_Vol" because it refers to the total traded volume which is the closest in meaning and function.\n\nThe second best match could be "Close_Price" as it relates to the end of day closing price which could possibly reflect the interest in trading and thus the trading volume indirectly. But, it\'s not a direct match as it doesn\'t represent the volume of trades.\n\nThe third best match could be "Open_Price" as it is the first price of the day. Like "Close_Price" it could possibly reflect trading interest and thus trading volume indirectly, but it is even less directly related to trading volume than the closing price. So while it might be of interest, it fails to provide a direct match for Bloomberg\'s "Trading volume".'

In [65]:
from IPython.display import display, Markdown
# Function to display response in Markdown format in Jupyter Notebook
def show_response_as_markdown(response_text, prompt):
    md_content = f"""
# **AI-Generated Financial Data Mapping Report**

**Query:** {prompt}

## **Response:**

{response_text}
"""
    display(Markdown(md_content))  # Render as Markdown in Jupyter Notebook

In [68]:
response_text = response.choices[0].message.content

In [69]:
show_response_as_markdown(response_text, query)


# **AI-Generated Financial Data Mapping Report**

**Query:** Trading volume

## **Response:**

The best match for Bloomberg's "Trading Volume" from the given Refinitiv elements is "Trade_Vol" because it refers to the total traded volume which is the closest in meaning and function.

The second best match could be "Close_Price" as it relates to the end of day closing price which could possibly reflect the interest in trading and thus the trading volume indirectly. But, it's not a direct match as it doesn't represent the volume of trades.

The third best match could be "Open_Price" as it is the first price of the day. Like "Close_Price" it could possibly reflect trading interest and thus trading volume indirectly, but it is even less directly related to trading volume than the closing price. So while it might be of interest, it fails to provide a direct match for Bloomberg's "Trading volume".
