In [None]:
pip install anthropic

Collecting anthropic
  Downloading anthropic-0.37.1-py3-none-any.whl.metadata (21 kB)
Collecting httpx<1,>=0.23.0 (from anthropic)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from anthropic)
  Downloading jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->anthropic)
  Downloading httpcore-1.0.6-py3-none-any.whl.metadata (21 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->anthropic)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading anthropic-0.37.1-py3-none-any.whl (945 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m946.0/946.0 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.6-py3-none-any.whl 

In [None]:
import anthropic
import numpy as np
import pandas as pd
import os
import glob

from IPython.display import display, Latex, HTML
from math import exp


In [None]:
drive_dirpath = "/content/drive/MyDrive/sadna"

In [None]:
MODEL = "claude-3-5-sonnet-20240620"
MAX_TOKENS = 1024
API_KEY = ""

# API Example

In [None]:
prompt_content = "Hello, Claude"

In [None]:
client = anthropic.Anthropic(api_key=API_KEY)
response = client.messages.create(model=MODEL,
                                  max_tokens=MAX_TOKENS,
                                  temperature=0.2,
                                  messages=[
                                      {"role": "user", "content": prompt_content}
                                      ]
)


In [None]:
response

Message(id='msg_014qTqGx2KumxMW5SvWa9C2h', content=[TextBlock(text="Hello! It's nice to meet you. How can I assist you today?", type='text')], model='claude-3-5-sonnet-20240620', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=10, output_tokens=19))

In [None]:
response_text = response.content[0].text
response_text

"Hello! It's nice to meet you. How can I assist you today?"

# API Function

In [None]:
def ask_claude_single_prompt(prompt_content, temp=0.2):
    client = anthropic.Anthropic(api_key=API_KEY)
    response = client.messages.create(model=MODEL,
                                    max_tokens=MAX_TOKENS,
                                    temperature=temp,
                                    messages=[
                                        {"role": "user", "content": prompt_content}
                                    ]
    )

    return response.content[0].text

In [None]:
def conversation_with_claude(prompts, temp=0.2):
    """
    Have a multi-turn conversation with Claude using the Anthropic Messages API.

    Args:
        prompts (list): List of strings containing the prompts to send to Claude
        temp (float, optional): Temperature parameter for response generation. Defaults to 0.2

    Returns:
        list: List of Claude's responses for each prompt
    """
    client = anthropic.Anthropic(api_key=API_KEY)
    messages = []
    responses = []

    for prompt in prompts:
        # Add the user's new message
        messages.append({"role": "user", "content": prompt})

        # Get Claude's response
        response = client.messages.create(
            model=MODEL,
            max_tokens=MAX_TOKENS,
            temperature=temp,
            messages=messages
        )

        # Add Claude's response to the message history
        assistant_message = response.content[0].text
        messages.append({"role": "assistant", "content": assistant_message})
        responses.append(assistant_message)

    return responses

### Example use

In [None]:
ask_claude_single_prompt("Hi my name is Edan")

"Hello Edan! It's nice to meet you. How can I assist you today? Is there anything specific you'd like to know or discuss?"

In [None]:
conv_example = ["Hi my name is Edan" , "What is my name?"]
conversation_with_claude(conv_example)

["Hello Edan! It's nice to meet you. How can I assist you today? Is there anything specific you'd like to know or discuss?",
 'Your name is Edan, as you mentioned in your introduction.']

# Example Question - Single Prompt

In [None]:
OPENING = """Here is a question from a computer science Data Structures course.
Solve the question.
Question:\n"""

In [None]:
example_q = """\textit{In a sequence of \(N\) Insert operations into a 2-3 tree (initially empty), let \(F(N)\) be the number of times a node split is performed in the tree when inserting \(N\) elements, in the worst case. Let \(G(N)\) be the total number of computer operations, when inserting \(N\) elements, in the worst case.}

\textit{What is the tightest upper bound on the ratio \(G(N)/F(N)\)?}

\begin{enumerate}
    \item \(\theta(1)\)
    \item \(\theta(\log(N))\)
    \item \(\theta(N)\)
    \item \(\theta(N \log(N))\)
    \item \(\theta\left(\frac{N}{\log(N)}\right)\)
\end{enumerate}
"""

In [None]:
example_answer = ask_claude_single_prompt(OPENING + example_q)
print(example_answer)

Let's approach this step-by-step:

1) First, let's consider F(N), the number of node splits:
   - In the worst case, a split occurs every time we insert into a full node.
   - The height of a 2-3 tree with N elements is O(log N).
   - Each split creates at most one new node per level.
   - Therefore, F(N) is O(N) in the worst case.

2) Now, let's consider G(N), the total number of operations:
   - For each insertion, we need to traverse the tree from root to leaf.
   - The height of the tree is O(log N).
   - For each level, we perform a constant number of operations.
   - Therefore, each insertion takes O(log N) operations.
   - For N insertions, G(N) is O(N log N) in the worst case.

3) Now, we need to find the tightest upper bound on G(N)/F(N):
   G(N)/F(N) = O(N log N) / O(N) = O(log N)

4) In Big Theta notation, this is θ(log N).

Therefore, the correct answer is option 2: θ(log N).

This makes sense intuitively as well. The total number of operations (G(N)) is larger than the num

In [None]:
second_prompt = "Write the number of the final answer, without additional text."

In [None]:
conv_res = conversation_with_claude([OPENING+example_q, second_prompt])

In [None]:
for a in conv_res:
  print(a , "\n\n\n\n\n")

Let's approach this step-by-step:

1) First, let's consider F(N), the number of node splits:
   - In the worst case, a split occurs every time we insert into a full node.
   - The number of splits is proportional to the number of insertions.
   - Therefore, F(N) = Θ(N)

2) Now, let's consider G(N), the total number of operations:
   - For each insertion, we need to traverse the tree to find the correct position.
   - The height of a 2-3 tree is logarithmic in the number of elements.
   - Each insertion takes Θ(log N) operations.
   - There are N insertions.
   - Therefore, G(N) = Θ(N log N)

3) Now, we need to find the ratio G(N) / F(N):
   G(N) / F(N) = Θ(N log N) / Θ(N) = Θ(log N)

4) Among the given options, Θ(log N) is present.

Therefore, the tightest upper bound on the ratio G(N)/F(N) is Θ(log N).

The correct answer is option 2: Θ(log N). 





2 







# Run analysis

### Create DF for results

In [None]:
cols = ["question_id",
    "model",
    "model_version",
    "question_prompt",
    "prompt_engineering_method",
    "timestamp",
    "answers_vector",
    "probabilities_vector",
    "tight_accuracy",
    "loose_accuracy"
]

df = pd.DataFrame(columns=cols)
filepath = os.path.join(drive_dirpath, "dataframe.tsv")
df.to_csv(filepath, sep="\t")

In [None]:
questions_df = pd.read_excel(os.path.join(drive_dirpath, "combined_dataframes.xlsx"))

## Process Questions

In [None]:
GENERAL_FILENAME = "{model}-{model_version}_{question_id}_na_rep{rep_num}.txt"
COT_FILENAME = "{model}-{model_version}_{question_id}_cot_rep{rep_num}.txt"

FIRST_PROMPT = """Here is a multiple-choice/true-false question from the Data Structures course. Solve the question."""
SECOND_PROMPT = "Write the final answer as a number, without additional text."
COT_SUFFIX = "Solve it step by step."

### Process Types C-D

In [None]:
def process_questions(df, model, model_version, num_reps):
  rows = []

  for idx in range(len(df)):
    year = df.loc[idx, "exam_year"]
    semester = df.loc[idx, "semester"]
    moed = df.loc[idx, "moed"]
    q_num = df.loc[idx , "question_number"]

    question_id = f"{year}{semester}{moed}_q{q_num}"
    question_text = df.loc[idx , "question_translation_latex"]
    dataset = df.loc[idx , "dataset"]
    has_sol = df.loc[idx , "has_solution"]
    q_type = df.loc[idx , "question_type"].upper()
    multiple_choice_answer = df.loc[idx, "multiple_choice_answer"]

    if dataset == "tested" and has_sol == True and q_type != "B":
      for rep in range(1, num_reps+1):
        row = {
            "question_id": question_id,
            "question_type": q_type,
            "multiple_choice_answer": multiple_choice_answer,
            "model": model,
            "model_version": model_version,
            "question_prompt": f"{FIRST_PROMPT}\nQuestion: {question_text}",
            "prompt_engineering_method": None,
            "rep_num": rep,
            "result_filename": GENERAL_FILENAME.format(model=model, model_version=model_version, question_id=question_id, rep_num=rep),
            "timestamp": None,
            "answers_vector": None,
            "probabilities_vector": None,
            "tight_accuracy": None,
            "loose_accuracy": None,
          }
        rows.append(row)

      for rep in range(1, num_reps+1):
        row = {
            "question_id": question_id,
            "question_type": q_type,
            "multiple_choice_answer": multiple_choice_answer,
            "model": model,
            "model_version": model_version,
            "question_prompt": f"{FIRST_PROMPT}\n{COT_SUFFIX}\nQuestion: {question_text}",
            "prompt_engineering_method": "COT",
            "rep_num": rep,
            "result_filename": COT_FILENAME.format(model=model, model_version=model_version, question_id=question_id, rep_num=rep),
            "timestamp": None,
            "answers_vector": None,
            "probabilities_vector": None,
            "tight_accuracy": None,
            "loose_accuracy": None
          }
        rows.append(row)

  df = pd.DataFrame(rows)
  return df

In [None]:
q_df = process_questions(questions_df, "claude", MODEL, 5)

In [None]:
q_df.to_excel(os.path.join(drive_dirpath, "empty_dataframe.xlsx"))

### Process Type B

In [None]:
FIRST_PROMPT = """Here is a question from the Data Structures course. Solve the question."""
SECOND_PROMPT = "Write the final answer, without additional text."
COT_SUFFIX = "Solve it step by step."

In [None]:
def process_questions_type_b(df, model, model_version, num_reps):
  rows = []

  for idx in range(len(df)):
    year = df.loc[idx, "exam_year"]
    semester = df.loc[idx, "semester"]
    moed = df.loc[idx, "moed"]
    q_num = df.loc[idx , "question_number"]

    question_id = f"{year}{semester}{moed}_q{q_num}"
    question_text = df.loc[idx , "question_translation_latex"]
    dataset = df.loc[idx , "dataset"]
    has_sol = df.loc[idx , "has_solution"]
    q_type = df.loc[idx , "question_type"].upper()
    multiple_choice_answer = df.loc[idx, "multiple_choice_answer"]

    if dataset == "tested" and has_sol == True and q_type == "B":
      for rep in range(1, num_reps+1):
        row = {
            "question_id": question_id,
            "question_type": q_type,
            "multiple_choice_answer": multiple_choice_answer,
            "model": model,
            "model_version": model_version,
            "question_prompt": f"{FIRST_PROMPT}\nQuestion: {question_text}",
            "prompt_engineering_method": None,
            "rep_num": rep,
            "result_filename": GENERAL_FILENAME.format(model=model, model_version=model_version, question_id=question_id, rep_num=rep),
            "timestamp": None,
            "answers_vector": None,
            "probabilities_vector": None,
            "tight_accuracy": None,
            "loose_accuracy": None,
          }
        rows.append(row)

      for rep in range(1, num_reps+1):
        row = {
            "question_id": question_id,
            "question_type": q_type,
            "multiple_choice_answer": multiple_choice_answer,
            "model": model,
            "model_version": model_version,
            "question_prompt": f"{FIRST_PROMPT}\n{COT_SUFFIX}\nQuestion: {question_text}",
            "prompt_engineering_method": "COT",
            "rep_num": rep,
            "result_filename": COT_FILENAME.format(model=model, model_version=model_version, question_id=question_id, rep_num=rep),
            "timestamp": None,
            "answers_vector": None,
            "probabilities_vector": None,
            "tight_accuracy": None,
            "loose_accuracy": None
          }
        rows.append(row)

  df = pd.DataFrame(rows)
  return df



In [None]:
q_df_type_b = process_questions_type_b(questions_df, "claude", MODEL, 5)

In [None]:
q_df_type_b.to_excel(os.path.join(drive_dirpath, "empty_dataframe_type_b.xlsx"))

## Run Analysis: Type C-D Questions

In [None]:
from pathlib import Path

In [None]:
def run_analysis(df, directory, start=0):
  df_res = df.copy(deep=True)

  directory = Path(directory)
  directory.mkdir(exist_ok=True)

  try:
    for idx in range(start, len(df)):
      print(f"Processing {idx} - {df_res.loc[idx, 'question_id']}...")

      prompt = df_res.loc[idx, "question_prompt"]
      prompt_engineering = df_res.loc[idx , "prompt_engineering_method"]

      first_message = prompt
      second_message = SECOND_PROMPT

      # Ask model and get timestamp
      messages = [first_message , second_message]
      full_answer , final_answer = conversation_with_claude(messages)
      timestamp = str(pd.Timestamp.now())

      df_res.loc[idx, "timestamp"] = timestamp

      # Write full answer
      filename = df_res.loc[idx , "result_filename"]
      filepath = os.path.join(directory, filename)
      with open(filepath, "w", encoding="utf-8") as file:
        file.write(full_answer)

      # Write short answer
      df_res.loc[idx, "chat_response"] = final_answer

      if idx % 10 == 0:
        df_res.to_excel(os.path.join(drive_dirpath, "results.xlsx"))

  except Exception as e:
    print(e)
  finally:
    df_res.to_excel(os.path.join(drive_dirpath, "results.xlsx"))
    return df_res

In [None]:
analysis_results_df = run_analysis(q_df, os.path.join(drive_dirpath, "responses"))

Processing 0 - 2002aa_q2...
Processing 1 - 2002aa_q2...
Processing 2 - 2002aa_q2...
Processing 3 - 2002aa_q2...
Processing 4 - 2002aa_q2...
Processing 5 - 2002aa_q2...
Processing 6 - 2002aa_q2...
Processing 7 - 2002aa_q2...
Processing 8 - 2002aa_q2...
Processing 9 - 2002aa_q2...
Processing 10 - 2002aa_q3...
Processing 11 - 2002aa_q3...
Processing 12 - 2002aa_q3...
Processing 13 - 2002aa_q3...
Processing 14 - 2002aa_q3...
Processing 15 - 2002aa_q3...
Processing 16 - 2002aa_q3...
Processing 17 - 2002aa_q3...
Processing 18 - 2002aa_q3...
Processing 19 - 2002aa_q3...
Processing 20 - 2002aa_q4...
Processing 21 - 2002aa_q4...
Processing 22 - 2002aa_q4...
Processing 23 - 2002aa_q4...
Processing 24 - 2002aa_q4...
Processing 25 - 2002aa_q4...
Processing 26 - 2002aa_q4...
Processing 27 - 2002aa_q4...
Processing 28 - 2002aa_q4...
Processing 29 - 2002aa_q4...
Processing 30 - 2002aa_q5...
Processing 31 - 2002aa_q5...
Processing 32 - 2002aa_q5...
Processing 33 - 2002aa_q5...
Processing 34 - 2002aa_q

In [None]:
analysis_results_df

Unnamed: 0,question_id,question_type,multiple_choice_answer,model,model_version,question_prompt,prompt_engineering_method,rep_num,result_filename,timestamp,answers_vector,probabilities_vector,tight_accuracy,loose_accuracy,chat_response
0,2002aa_q2,D,2,claude,claude-3-5-sonnet-20240620,Here is a multiple-choice/true-false question ...,,1,claude-claude-3-5-sonnet-20240620_2002aa_q2_na...,2024-10-27 15:02:03.202705,,,,,2
1,2002aa_q2,D,2,claude,claude-3-5-sonnet-20240620,Here is a multiple-choice/true-false question ...,,2,claude-claude-3-5-sonnet-20240620_2002aa_q2_na...,2024-10-27 15:02:10.355556,,,,,2
2,2002aa_q2,D,2,claude,claude-3-5-sonnet-20240620,Here is a multiple-choice/true-false question ...,,3,claude-claude-3-5-sonnet-20240620_2002aa_q2_na...,2024-10-27 15:02:17.235528,,,,,2
3,2002aa_q2,D,2,claude,claude-3-5-sonnet-20240620,Here is a multiple-choice/true-false question ...,,4,claude-claude-3-5-sonnet-20240620_2002aa_q2_na...,2024-10-27 15:02:24.143092,,,,,3
4,2002aa_q2,D,2,claude,claude-3-5-sonnet-20240620,Here is a multiple-choice/true-false question ...,,5,claude-claude-3-5-sonnet-20240620_2002aa_q2_na...,2024-10-27 15:02:31.528756,,,,,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,2018AA_q14-e,C,False,claude,claude-3-5-sonnet-20240620,Here is a multiple-choice/true-false question ...,COT,1,claude-claude-3-5-sonnet-20240620_2018AA_q14-e...,2024-10-27 18:37:56.669139,,,,,1
1596,2018AA_q14-e,C,False,claude,claude-3-5-sonnet-20240620,Here is a multiple-choice/true-false question ...,COT,2,claude-claude-3-5-sonnet-20240620_2018AA_q14-e...,2024-10-27 18:38:07.341619,,,,,1
1597,2018AA_q14-e,C,False,claude,claude-3-5-sonnet-20240620,Here is a multiple-choice/true-false question ...,COT,3,claude-claude-3-5-sonnet-20240620_2018AA_q14-e...,2024-10-27 18:38:17.009818,,,,,1
1598,2018AA_q14-e,C,False,claude,claude-3-5-sonnet-20240620,Here is a multiple-choice/true-false question ...,COT,4,claude-claude-3-5-sonnet-20240620_2018AA_q14-e...,2024-10-27 18:38:27.143647,,,,,1


## Run Analysis: Type B Questions

In [None]:
def run_analysis_type_b(df, directory, start=0):
  df_res = df.copy(deep=True)

  directory = Path(directory)
  directory.mkdir(exist_ok=True)

  try:
    for idx in range(start, len(df)):
      print(f"Processing {idx} - {df_res.loc[idx, 'question_id']}...")

      prompt = df_res.loc[idx, "question_prompt"]
      prompt_engineering = df_res.loc[idx , "prompt_engineering_method"]

      first_message = prompt
      second_message = SECOND_PROMPT

      # Ask model and get timestamp
      messages = [first_message , second_message]
      full_answer , final_answer = conversation_with_claude(messages)
      timestamp = str(pd.Timestamp.now())

      df_res.loc[idx, "timestamp"] = timestamp

      # Write full answer
      filename = df_res.loc[idx , "result_filename"]
      filepath = os.path.join(directory, filename)
      with open(filepath, "w", encoding="utf-8") as file:
        file.write(full_answer)

      # Write short answer
      df_res.loc[idx, "chat_response"] = final_answer

      if idx % 10 == 0:
        df_res.to_excel(os.path.join(drive_dirpath, "results_type_b.xlsx"))

  except Exception as e:
    print(e)
  finally:
    df_res.to_excel(os.path.join(drive_dirpath, "results_type_b.xlsx"))
    return df_res

In [None]:
res_type_b = run_analysis_type_b(q_df_type_b, os.path.join(drive_dirpath, "responses_type_b"))

Processing 0 - 2009ba_q12-a...
Processing 1 - 2009ba_q12-a...
Processing 2 - 2009ba_q12-a...
Processing 3 - 2009ba_q12-a...
Processing 4 - 2009ba_q12-a...
Processing 5 - 2009ba_q12-a...
Processing 6 - 2009ba_q12-a...
Processing 7 - 2009ba_q12-a...
Processing 8 - 2009ba_q12-a...
Processing 9 - 2009ba_q12-a...
Processing 10 - 2010ba_q2-d...
Processing 11 - 2010ba_q2-d...
Processing 12 - 2010ba_q2-d...
Processing 13 - 2010ba_q2-d...
Processing 14 - 2010ba_q2-d...
Processing 15 - 2010ba_q2-d...
Processing 16 - 2010ba_q2-d...
Processing 17 - 2010ba_q2-d...
Processing 18 - 2010ba_q2-d...
Processing 19 - 2010ba_q2-d...
Processing 20 - 2010ba_q3-a...
Processing 21 - 2010ba_q3-a...
Processing 22 - 2010ba_q3-a...
Processing 23 - 2010ba_q3-a...
Processing 24 - 2010ba_q3-a...
Processing 25 - 2010ba_q3-a...
Processing 26 - 2010ba_q3-a...
Processing 27 - 2010ba_q3-a...
Processing 28 - 2010ba_q3-a...
Processing 29 - 2010ba_q3-a...
Processing 30 - 2010ba_q4-a...
Processing 31 - 2010ba_q4-a...
Processi

In [None]:
res_type_b

Unnamed: 0,question_id,question_type,multiple_choice_answer,model,model_version,question_prompt,prompt_engineering_method,rep_num,result_filename,timestamp,answers_vector,probabilities_vector,tight_accuracy,loose_accuracy,chat_response
0,2009ba_q12-a,B,na,claude,claude-3-5-sonnet-20240620,Here is a question from the Data Structures co...,,1,claude-claude-3-5-sonnet-20240620_2009ba_q12-a...,2024-10-30 13:38:18.886077,,,,,(n-1)(n)/2
1,2009ba_q12-a,B,na,claude,claude-3-5-sonnet-20240620,Here is a question from the Data Structures co...,,2,claude-claude-3-5-sonnet-20240620_2009ba_q12-a...,2024-10-30 13:38:26.564253,,,,,n(n-1)/2
2,2009ba_q12-a,B,na,claude,claude-3-5-sonnet-20240620,Here is a question from the Data Structures co...,,3,claude-claude-3-5-sonnet-20240620_2009ba_q12-a...,2024-10-30 13:38:35.364287,,,,,(n² - n) / 2
3,2009ba_q12-a,B,na,claude,claude-3-5-sonnet-20240620,Here is a question from the Data Structures co...,,4,claude-claude-3-5-sonnet-20240620_2009ba_q12-a...,2024-10-30 13:38:42.707298,,,,,n(n-1)/2
4,2009ba_q12-a,B,na,claude,claude-3-5-sonnet-20240620,Here is a question from the Data Structures co...,,5,claude-claude-3-5-sonnet-20240620_2009ba_q12-a...,2024-10-30 13:38:50.206875,,,,,n(n-1)/2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,2017AB_q1-c,B,na,claude,claude-3-5-sonnet-20240620,Here is a question from the Data Structures co...,COT,1,claude-claude-3-5-sonnet-20240620_2017AB_q1-c_...,2024-10-30 14:41:16.320627,,,,,Two AVL trees of size n each cannot be merged ...
396,2017AB_q1-c,B,na,claude,claude-3-5-sonnet-20240620,Here is a question from the Data Structures co...,COT,2,claude-claude-3-5-sonnet-20240620_2017AB_q1-c_...,2024-10-30 14:41:26.486325,,,,,It is not possible to merge two AVL trees of s...
397,2017AB_q1-c,B,na,claude,claude-3-5-sonnet-20240620,Here is a question from the Data Structures co...,COT,3,claude-claude-3-5-sonnet-20240620_2017AB_q1-c_...,2024-10-30 14:41:35.808736,,,,,It is not possible to merge two AVL trees of s...
398,2017AB_q1-c,B,na,claude,claude-3-5-sonnet-20240620,Here is a question from the Data Structures co...,COT,4,claude-claude-3-5-sonnet-20240620_2017AB_q1-c_...,2024-10-30 14:41:45.623503,,,,,Disproved. It is not possible to merge two AVL...
