In [6]:
import csv
import pandas as pd
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
import os
load_dotenv(override=True)
from openai import OpenAI
from pinecone import ServerlessSpec
from pinecone import Pinecone
import openai

  from tqdm.autonotebook import tqdm


In [7]:
# get openai api key from platform.openai.com
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') or 'YOUR_API_KEY'
api_key = os.getenv("PINECONE_API_KEY")

## Read the questions

In [26]:
# Path to the CSV file
csv_file_path = '../data/qna_data1.csv'

# List to store dictionaries
data_list = []

def read_questions():
    try:
        with open(csv_file_path, mode='r', encoding='utf-8') as file:
            # Create a CSV reader object using the file object
            reader = csv.DictReader(file)
            
            # Iterate over the rows of the file
            for i, row in enumerate(reader):
                # if i < 4:  # Read only the first four rows
                    # Append a dictionary for each row
                    data_list.append({
                        "Question": row["Question"],
                        "Option A": row["Option A"],
                        "Option B": row["Option B"],
                        "Option C": row["Option C"],
                        "Option D": row["Option D"],
                        "Correct Answer": row["Correct Answer"],
                        "Explanation": row["Explanation"]
                    })
                # else:
                #     break  # Exit the loop after reading four rows

    except FileNotFoundError:
        print("Error: The file does not exist.")
    except Exception as e:
        print(f"An error occurred: {e}")

    df = pd.DataFrame(data_list)
    return df
# Optionally print the list to verify contents
    print(df[0])


In [9]:
# configure client
pc = Pinecone(api_key=api_key)
index_name = 'damg-group3-assignment5-step1'

spec = ServerlessSpec(
    cloud="aws", region="us-east-1" 
)
index = pc.Index(index_name)

In [15]:
model_name = 'text-embedding-ada-002'
embeds = []

for question in data_list:
    query = question["Question"]
    print(query)
    
    embedres = openai.embeddings.create(
        model=model_name,
        input=query
    )
    embeds.append(embedres)


To best comply with the CFA Institute Standards of Professional Conduct (the Standards) related to performance presentation, Somer should modify the:
Does Somer’s social media post result in a violation of the Standards?
When preparing the marketing materials for the quantitative strategy, did Somer comply with the standard related to communication with clients and prospective clients?
If he fills the client’s order for shares of the technology firm, would Somer violate the standard related to priority of transactions?


In [16]:
print(type(embeds))
print(embeds)

# retrieve from Pinecone
embedding_vector = embeds[0].data[0].embedding

# # get relevant contexts (including the questions)
res = index.query(vector=embedding_vector, top_k=3, include_metadata=True,namespace='TechNote')

<class 'list'>
[CreateEmbeddingResponse(data=[Embedding(embedding=[-0.014457287266850471, 0.0037733789067715406, 0.020589450374245644, -0.02804821915924549, -0.00992923229932785, 0.030349472537636757, -0.03595370426774025, 0.010721134021878242, -0.029347751289606094, -0.00936745572835207, 0.012257559224963188, 0.007898714393377304, 0.015323641709983349, 0.006264147348701954, -0.010815891437232494, -0.017841482535004616, 0.00596295390278101, -0.02110384963452816, -0.001793624134734273, -0.009617886506021023, -0.025625135749578476, 0.006034022197127342, -0.01201389729976654, 0.013618006370961666, -0.003898594295606017, -0.003945972770452499, 0.017719652503728867, -0.029185310006141663, 0.00791225116699934, 0.013563859276473522, 0.02190251834690571, -0.011066322214901447, -0.02345925010740757, -0.011959750205278397, -0.010369177907705307, -0.011533341370522976, -0.01816636696457863, 0.0010939415078610182, 0.026329047977924347, -0.012514757923781872, 0.009820938110351562, -0.00187484477646

In [17]:
def extract_metadata(response):
    # Initialize a list to store all extracted metadata as dictionaries
    extracted_data = []

    # Check if the response contains any matches
    if response and response.get('matches'):
        # Iterate over each match found in the response
        for match in response['matches']:
            # Retrieve the metadata dictionary from the current match
            metadata = match.get('metadata', {})

            # Extract the LOS, Note, and text from the metadata
            # Provide default values if any key is not found
            los = metadata.get('LOS', 'No LOS provided')
            note = metadata.get('Note', 'No Note provided')
            text = metadata.get('text', 'No text provided')

            # Create a dictionary with the extracted data
            data_dict = {
                'LOS': los,
                'Note': note,
                'Text': text
            }

            # Add the dictionary to the list of extracted data
            extracted_data.append(data_dict)
    else:
        print("No matches found in the response.")

    # Return the list containing dictionaries of the extracted data
    return extracted_data



In [23]:
extracted_metadata = extract_metadata(res)
df = pd.DataFrame(extracted_metadata)
print(df)

[{'LOS': 'define a probability distribution and distinguish between discrete and continuous\n                     random variables and their probability functions;\ndescribe the set of possible outcomes of a specified discrete random variable;\ninterpret a cumulative distribution function;\ncalculate and interpret probabilities for a random variable, given its cumulative\n                     distribution function;\ndefine a discrete uniform random variable, a Bernoulli random variable, and a binomial\n                     random variable;\ncalculate and interpret probabilities given the discrete uniform and the binomial\n                     distribution functions;\nconstruct a binomial tree to describe stock price movement;\ndefine the continuous uniform distribution and calculate and interpret probabilities,\n                     given a continuous uniform distribution;\nexplain the key properties of the normal distribution;\ndistinguish between a univariate and a multivariate distr