In [1]:
from openai import OpenAI
import os


client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.environ["OPENROUTER_API_KEY"],
)

In [2]:
completion = client.chat.completions.create(
  model="qwen/qwq-32b:free",
  messages=[
    {
      "role": "user",
      "content": "write strictly 'Yes' or 'No' to the following question: Will bitcoin exceed $80k today?\nAnswer: "
    }
  ]
)

print(completion.choices[0].message.content)

No


In [35]:
import requests
import json
import numpy as np


def get_probability_from_openrouter(
        system_prompt: str,
        prompt: str,
        model: str,
        temperature=0,
        max_tokens=2000,
        logprobs=True,
        top_logprobs=20,
        extra_params: dict = None,
):
    response = requests.post(
        url="https://openrouter.ai/api/v1/chat/completions",
        headers={
            "Authorization": f"Bearer {os.environ["OPENROUTER_API_KEY"]}",
        },
        data=json.dumps({
            "model": model,
            "messages": [
                {
                    "role": "system",
                    "content": system_prompt
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            "temperature": temperature,
            "logprobs": logprobs,
            "top_logprobs": top_logprobs,
            "max_tokens": max_tokens,
            "require_parameters": True,
            **extra_params
        })
    )
    return response.json()

def extract_yes_no_logits_and_softmax(response):
    yes_logits = []
    no_logits = []
    yes_tokens = []
    no_tokens = []

    try:
        for content_item in response['choices'][0]['logprobs']['content']:
            top_logprobs_list = content_item['top_logprobs']
            for logprob_item in top_logprobs_list:
                token = logprob_item['token']
                logit = logprob_item['logprob']
                processed_token = token.lower().strip()
                if processed_token == 'yes':
                    yes_logits.append(logit)
                    yes_tokens.append(token)
                elif processed_token == 'no':
                    no_logits.append(logit)
                    no_tokens.append(token)
    except (KeyError, IndexError, TypeError) as e:
        raise Exception(f"Error accessing logprobs data: {e}")

    relevant_logits = np.array(yes_logits + no_logits, dtype=np.float64)

    if relevant_logits.size == 0:
        return Exception("No relevant logits found for 'yes' or 'no' tokens.")

    if relevant_logits.size == 1:
        return {
            "yes_tokens_found": yes_tokens,
            "no_tokens_found": no_tokens,
            "yes_prob_sum": 1.0 if yes_logits else 0.0,
            "no_prob_sum": 1.0 if no_logits else 0.0,
        }

    exp_logits = np.exp(relevant_logits - np.max(relevant_logits))
    softmax_probs = exp_logits / exp_logits.sum()

    yes_prob_sum = float(np.sum(softmax_probs[:len(yes_logits)])) if yes_logits else 0.0
    no_prob_sum = float(np.sum(softmax_probs[len(yes_logits):])) if no_logits else 0.0

    return {
        "yes_tokens_found": yes_tokens,
        "no_tokens_found": no_tokens,
        "yes_prob_sum": yes_prob_sum,
        "no_prob_sum": no_prob_sum,
    }


def extract_yes_no_from_text(response):
    text = response['choices'][0]['message']['content'].lower().strip()
    yes_tokens = []
    no_tokens = []
    if 'yes' in text:
        yes_tokens.append('yes')
    if 'no' in text:
        no_tokens.append('no')
    if not yes_tokens and not no_tokens:
        return Exception("No 'yes' or 'no' found in text.")
    if yes_tokens and not no_tokens:
        return {
            "yes_tokens_found": yes_tokens,
            "no_tokens_found": no_tokens,
            "yes_prob_sum": 1.0,
            "no_prob_sum": 0.0,
        }
    if no_tokens and not yes_tokens:
        return {
            "yes_tokens_found": yes_tokens,
            "no_tokens_found": no_tokens,
            "yes_prob_sum": 0.0,
            "no_prob_sum": 1.0,
        }
    return {
        "yes_tokens_found": yes_tokens,
        "no_tokens_found": no_tokens,
        "yes_prob_sum": 0.5,
        "no_prob_sum": 0.5,
    }

In [53]:
openrouter_models_non_reasoning = [
    "deepseek/deepseek-chat-v3-0324:free",
    "openai/gpt-4o-2024-11-20",    
    "openai/chatgpt-4o-latest",
]

openrouter_models_reasoning = [
    "qwen/qwq-32b:free",
    # "google/gemini-2.5-pro-exp-03-25:free",
    "google/gemini-2.5-pro-preview-03-25",
    "anthropic/claude-3.7-sonnet:thinking",
    "deepseek/deepseek-r1:free"
]

In [54]:
system_prompt = "You are an expert financial analyst. You will be given a market question and relevant data. Based solely on the data provided, answer the question. Respond *only* with 'Yes' or 'No'. Do not include any reasoning, explanation, or additional text."
prompt = "\n    Use following data to answer on question:\n    |    | title                                                                                                            |   positive |   important |   liked | date       |\n|---:|:-----------------------------------------------------------------------------------------------------------------|-----------:|------------:|--------:|:-----------|\n|  5 | Ethereum Is What Bitcoin Was Meant to Be                                                                         |          8 |           5 |       8 | 2025-04-16 |\n|  4 | Family offices show stronger preference for Ethereum ETFs over Bitcoin                                           |          5 |           3 |       6 | 2025-04-16 |\n|  6 | MicroStrategy Acquires 3,459 Bitcoins for $285.8 Million, Total Holdings Reach 531,644 BTC                       |          8 |           5 |       6 | 2025-04-14 |\n|  7 | Saylor signals new Bitcoin buy after Strategy reports nearly $6 billion Q1 unrealized loss                       |          6 |           4 |       5 | 2025-04-13 |\n|  8 | Sweden: proposal to include Bitcoin in national reserves                                                         |         16 |           5 |       8 | 2025-04-11 |\n|  9 | Crypto Market Rises Sharply, but Experts Warn it May Be a ‘Dead Cat Bounce’                                      |          0 |           4 |       0 | 2025-04-10 |\n| 10 | Charles Hoskinson sees Bitcoin hitting $250K as Big Tech embraces crypto, Fed cut rates, and regulation kicks in |          5 |           3 |       4 | 2025-04-10 |\n\n|    | open_time           | close_time                 |   volume |    open |   close |     diff |\n|---:|:--------------------|:---------------------------|---------:|--------:|--------:|---------:|\n|  0 | 2025-04-10 00:00:00 | 2025-04-10 23:59:59.999000 |  33284.8 | 82615.2 | 79607.3 | -3007.92 |\n|  1 | 2025-04-11 00:00:00 | 2025-04-11 23:59:59.999000 |  34435.4 | 79607.3 | 83423.8 |  3816.54 |\n|  2 | 2025-04-12 00:00:00 | 2025-04-12 23:59:59.999000 |  18470.7 | 83423.8 | 85276.9 |  1853.07 |\n|  3 | 2025-04-13 00:00:00 | 2025-04-13 23:59:59.999000 |  24680   | 85276.9 | 83760   | -1516.91 |\n|  4 | 2025-04-14 00:00:00 | 2025-04-14 23:59:59.999000 |  28659.1 | 83760   | 84591.6 |   831.58 |\n|  5 | 2025-04-15 00:00:00 | 2025-04-15 23:59:59.999000 |  20911   | 84591.6 | 83644   |  -947.59 |\n|  6 | 2025-04-16 00:00:00 | 2025-04-16 23:59:59.999000 |  20867.2 | 83644   | 84030.4 |   386.39 |\n\n    Write strictly 'Yes' or 'No' to the following question: Will be Bitcoin Up or Down on April 17 \nAnswer:\n"

In [55]:
responses = {}

for model in openrouter_models_non_reasoning + openrouter_models_reasoning:
    responses[model] = get_probability_from_openrouter(
        system_prompt=system_prompt,
        prompt=prompt,
        model=model,
        temperature=0,
        max_tokens=3000,
        logprobs=True,
        top_logprobs=5,
        extra_params={
            'provider': {
                'ignore': [
                    "Targon"
                ]
            }
        }
    )

In [60]:
import pandas as pd

def process_results(responses, openrouter_models_non_reasoning, openrouter_models_reasoning):
    results = []

    for model in openrouter_models_non_reasoning:
        response = responses[model]
        row = {
            "model": model,
            "text": response['choices'][0]['message']['content'].lower().strip()
        }
        try:
            token_result = extract_yes_no_logits_and_softmax(response)
            text_result = extract_yes_no_from_text(response)
            row.update({
                "yes_tokens_found": token_result['yes_tokens_found'],
                "no_tokens_found": token_result['no_tokens_found'],
                "probability_token": token_result['yes_prob_sum'],
                "probability_text": text_result['yes_prob_sum']
            })
        except Exception as e:
            row.update({
                "yes_tokens_found": [],
                "no_tokens_found": [],
                "probability_token": None,
                "probability_text": None,
                "error": str(e)
            })
        results.append(row)

    for model in openrouter_models_reasoning:
        response = responses[model]
        row = {
            "model": model,
            "text": response['choices'][0]['message']['content'].lower().strip()
        }
        try:
            text_result = extract_yes_no_from_text(response)
            row.update({
                "yes_tokens_found": text_result['yes_tokens_found'],
                "no_tokens_found": text_result['no_tokens_found'],
                "probability_token": None,
                "probability_text": text_result['yes_prob_sum']
            })
        except Exception as e:
            row.update({
                "yes_tokens_found": [],
                "no_tokens_found": [],
                "probability_token": None,
                "probability_text": None,
                "error": str(e)
            })
        results.append(row)

    df = pd.DataFrame(results)
    df['prob'] = df['probability_token'].where(df['probability_token'].notnull(), df['probability_text'])

    return df


df = process_results(responses, openrouter_models_non_reasoning, openrouter_models_reasoning)
df[['model', 'prob']]

Unnamed: 0,model,prob
0,deepseek/deepseek-chat-v3-0324:free,0.00407
1,openai/gpt-4o-2024-11-20,0.622459
2,openai/chatgpt-4o-latest,0.99996
3,qwen/qwq-32b:free,0.0
4,google/gemini-2.5-pro-preview-03-25,0.0
5,anthropic/claude-3.7-sonnet:thinking,0.0
6,deepseek/deepseek-r1:free,0.0


In [44]:
responses

{'deepseek/deepseek-chat-v3-0324:free': {'id': 'gen-1745587343-mvIOfqSMjl0ys3OifXcn',
  'provider': 'Chutes',
  'model': 'deepseek/deepseek-chat-v3-0324:free',
  'object': 'chat.completion',
  'created': 1745587343,
  'choices': [{'logprobs': {'content': [{'token': 'No',
       'bytes': [78, 111],
       'logprob': -0.001176380319520831,
       'top_logprobs': [{'token': 'No',
         'bytes': [78, 111],
         'logprob': -0.001176380319520831},
        {'token': 'Yes',
         'bytes': [89, 101, 115],
         'logprob': -6.751176357269287},
        {'token': ' No',
         'bytes': [32, 78, 111],
         'logprob': -12.751176834106445},
        {'token': 'Maybe',
         'bytes': [77, 97, 121, 98, 101],
         'logprob': -13.751176834106445},
        {'token': '\n', 'bytes': [10], 'logprob': -14.751176834106445}]},
      {'token': 'No',
       'bytes': [78, 111],
       'logprob': -0.001176380319520831,
       'top_logprobs': [{'token': 'No',
         'bytes': [78, 111],
   