In [1]:
import transformers as tr
import torch
import torch.nn.functional as F

import time

In [2]:
amateur_path = "Qwen/Qwen2.5-Coder-0.5B-Instruct"
expert_path = "Qwen/Qwen2.5-Coder-1.5B-Instruct"

tokenizer = tr.AutoTokenizer.from_pretrained(amateur_path)

amateur_model = tr.AutoModelForCausalLM.from_pretrained(amateur_path)
expert_model = tr.AutoModelForCausalLM.from_pretrained(expert_path)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/659 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/243 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/660 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

In [3]:
user_message = """Give a very very brief docstring for the following function:\n```\nfunction updateEloScores(
	scores,
	results,
	kFactor = 4,
) {
	for (const result of results) {
		const { first, second, outcome } = result;
		const firstScore = scores[first] ?? 1000;
		const secondScore = scores[second] ?? 1000;

		const expectedScoreFirst = 1 / (1 + Math.pow(10, (secondScore - firstScore) / 400));
		const expectedScoreSecond = 1 / (1 + Math.pow(10, (firstScore - secondScore) / 400));
		let sa = 0.5;
		if (outcome === 1) {
			sa = 1;
		} else if (outcome === -1) {
			sa = 0;
		}
		scores[first] = firstScore + kFactor * (sa - expectedScoreFirst);
		scores[second] = secondScore + kFactor * (1 - sa - expectedScoreSecond);
	}
	return scores;
}\n```"""

prompt = tokenizer.apply_chat_template(
    [
        {"role": "system", "content": "You are a helpful assistant"},
        {"role": "user", "content": user_message},
    ],
    add_generation_prompt=True,
    tokenize=False,
)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

expert_model.to(device)
amateur_model.to(device)

expert_model.eval()
amateur_model.eval()

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 896)
    (layers): ModuleList(
      (0-23): 24 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=896, out_features=896, bias=True)
          (k_proj): Linear(in_features=896, out_features=128, bias=True)
          (v_proj): Linear(in_features=896, out_features=128, bias=True)
          (o_proj): Linear(in_features=896, out_features=896, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=896, out_features=4864, bias=False)
          (up_proj): Linear(in_features=896, out_features=4864, bias=False)
          (down_proj): Linear(in_features=4864, out_features=896, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((896,), eps=1e-06)
    (rotary_emb): Qwen2RotaryEmbe

In [5]:
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)

In [6]:
import time
max_tokens = 1000
tau = 1
alpha = 0.1

## Amateur Only
### Time

*   Total time taken: 68.45 seconds
* Time per token taken: 0.17 seconds
* Tokens generated:  405

### Response
```javascript
/**
 * Updates the Elo scores of a list of players based on their performance in a game.
 * The Elo score is calculated using the formula:
 * E = K * (S1 - S2) / (1 + sqrt(S1^2 + S2^2))
 * where K is a constant factor, S1 and S2 are the scores of the two players,
 * and E is the updated score.
 *
 * @param {Array} scores - An array of scores for each player.
 * @param {Array} results - An array of results, where each result is an object with 'first' and 'second' keys,
 *                           and 'outcome' key indicating the outcome of the game.
 * @param {number} kFactor - A constant factor to adjust the Elo score.
 * @returns {Array} - An array of updated scores for each player.
 */
function updateEloScores(scores, results, kFactor = 4) {
    for (const result of results) {
        const { first, second, outcome } = result;
        const firstScore = scores[first] ?? 1000;
        const secondScore = scores[second] ?? 1000;

        const expectedScoreFirst = 1 / (1 + Math.pow(10, (secondScore - firstScore) / 400));
        const expectedScoreSecond = 1 / (1 + Math.pow(10, (firstScore - secondScore) / 400));
        let sa = 0.5;
        if (outcome === 1) {
            sa = 1;
        } else if (outcome === -1) {
            sa = 0;
        }
        scores[first] = firstScore + kFactor * (sa - expectedScoreFirst);
        scores[second] = secondScore + kFactor * (1 - sa - expectedScoreSecond);
    }
    return scores;
}
```<|im_end|>

In [59]:
generated_ids = input_ids

start = time.time()
# Amateur Only
with torch.no_grad():
    for _ in range(max_tokens):
        # Amateur model
        outputs_amateur = amateur_model(generated_ids)
        logits_amateur = outputs_amateur.logits[:, -1, :]

        # Greedily select highest logit
        next_token = torch.argmax(logits_amateur, dim=-1).unsqueeze(0)
        generated_ids = torch.cat((generated_ids, next_token), dim=1)

        # Stop EOS
        if next_token.item() == tokenizer.eos_token_id:
            print ("EOS") # technically messes up computed times lol
            break

total_time = time.time() - start
print(f"Total time taken: {total_time:.2f} seconds")
generated_token_len = generated_ids.shape[1] - input_ids.shape[1]
print(f"Time per token taken: {total_time / generated_token_len:.2f} seconds")
print ("Tokens generated: ", generated_token_len)
output_text = tokenizer.decode(
    generated_ids.squeeze()[input_ids.shape[1]:]
)
print (output_text)

EOS
Total time taken: 68.45 seconds
Time per token taken: 0.17 seconds
Tokens generated:  405


## Expert Only

### Time
* Total time taken: 43.27 seconds
* Time per token taken: 0.40 seconds
* Tokens generated:  109

### Response
This function updates Elo scores based on the results of a series of games. It takes in an object `scores` representing the current Elo scores of each player, an array `results` containing the outcomes of each game, and an optional `kFactor` parameter that determines the strength of the update. The function iterates over each game result, calculates the expected score for each player based on the Elo rating system, and then updates the Elo score of each player based on the outcome of the game. The function returns the updated Elo scores.<|im_end|>

In [62]:
generated_ids = input_ids

start = time.time()
# Expert Only
with torch.no_grad():
    for _ in range(max_tokens):
        # Expert model
        outputs_expert = expert_model(generated_ids)
        logits_expert = outputs_expert.logits[:, -1, :]

        # Greedily select highest logit
        next_token = torch.argmax(logits_expert, dim=-1).unsqueeze(0)
        generated_ids = torch.cat((generated_ids, next_token), dim=1)

        # Stop EOS
        if next_token.item() == tokenizer.eos_token_id:
            print ("EOS")
            break

total_time = time.time() - start
print(f"Total time taken: {total_time:.2f} seconds")
generated_token_len = generated_ids.shape[1] - input_ids.shape[1]
print(f"Time per token taken: {total_time / generated_token_len:.2f} seconds")
print ("Tokens generated: ", generated_token_len)
output_text = tokenizer.decode(
    generated_ids.squeeze()[input_ids.shape[1]:]
)
print (output_text)

EOS
Total time taken: 43.27 seconds
Time per token taken: 0.40 seconds
Tokens generated:  109


## Base implementation

### Time
* Total time taken: 269.47 seconds
* Time per token taken: 0.67 seconds
* Tokens generated:  401

### Response
This function `updateEloScores` takes three arguments:
- `scores`: a dictionary or similar mapping type that maps player identifiers (typically user IDs or usernames) to Elo scores (ratings). Initially, these scores should be initialized to some starting value, but in this implementation, all players start with an Elo score of 1000.
- `results`: a list or iterable containing game results in the form of tuples or other structured data that includes:
  - `first`: the ID or name of the player who performed the action or made the score adjustment
  - `second`: the ID or name of the player against whom the first player is competing
  - `outcome`: an integer indicating the result of the match:
    - `1`: first player won
    - `-1`: first player lost
    - `0`: it was a draw
- `kFactor` (optional, default: 4): a hyperparameter that determines the rate of rating updates based on the outcome of the match.

The function updates the Elo scores of both players involved in each game result according to the Elo rating system. Here’s how it works:
1. For each game result:
   - Retrieve the current Elo scores of the two players from the `scores` dictionary.
   - Calculate the expected score of each player based on their own and the other player's current Elo scores using the Elo formula.
   - Determine the actual outcome score (`sa`):
     - `1` if the first player won
     - `0` if it was a draw
     - `0` if the first player lost
   - Update the Elo score of each player using the Elo update formula:
     \[
     \text{New Elo} = \text{Old Elo} + k \times (sa - \text{Expected Score})
     \]
     where \(k\) is the `kFactor`.

Finally, the updated `scores` dictionary is returned.<|im_end|>

In [65]:
generated_ids = input_ids

start = time.time()
# Base implementation
with torch.no_grad():
    for _ in range(max_tokens):
        # Expert model
        outputs_expert = expert_model(generated_ids)
        logits_expert = outputs_expert.logits[:, -1, :]
        probs_expert = F.softmax(logits_expert, dim=-1)

        # Amateur model
        outputs_amateur = amateur_model(generated_ids)
        # Adjust temp
        logits_amateur = outputs_amateur.logits[:, -1, :] / tau
        probs_amateur = F.softmax(logits_amateur, dim=-1)

        # Compute V_head mask
        max_prob = torch.max(probs_expert, dim=-1, keepdim=True)[0]
        threshold = alpha * max_prob
        v_head = probs_expert >= threshold

        # Compute the contrastive score
        # score = log(p_expert) - log(p_amateur) or -inf if not in v_head
        candidate_scores = torch.full_like(probs_expert, -float("inf")) # Can make more efficient by precomputing
        candidate_scores[v_head] = torch.log(probs_expert[v_head]) - torch.log(
            probs_amateur[v_head]
        )

        # Greedily select highest contrastive score
        next_token = torch.argmax(candidate_scores, dim=-1).unsqueeze(0)
        generated_ids = torch.cat((generated_ids, next_token), dim=1)

        # Stop EOS
        if next_token.item() == tokenizer.eos_token_id:
            print ("EOS")
            break

total_time = time.time() - start
print(f"Total time taken: {total_time:.2f} seconds")
generated_token_len = generated_ids.shape[1] - input_ids.shape[1]
print(f"Time per token taken: {total_time / generated_token_len:.2f} seconds")
print ("Tokens generated: ", generated_token_len)
output_text = tokenizer.decode(
    generated_ids.squeeze()[input_ids.shape[1]:]
)
print (output_text)

EOS
Total time taken: 269.47 seconds
Time per token taken: 0.67 seconds
Tokens generated:  401


In [67]:
output_text

"This function `updateEloScores` takes three arguments:\n- `scores`: a dictionary or similar mapping type that maps player identifiers (typically user IDs or usernames) to Elo scores (ratings). Initially, these scores should be initialized to some starting value, but in this implementation, all players start with an Elo score of 1000.\n- `results`: a list or iterable containing game results in the form of tuples or other structured data that includes:\n  - `first`: the ID or name of the player who performed the action or made the score adjustment\n  - `second`: the ID or name of the player against whom the first player is competing\n  - `outcome`: an integer indicating the result of the match:\n    - `1`: first player won\n    - `-1`: first player lost\n    - `0`: it was a draw\n- `kFactor` (optional, default: 4): a hyperparameter that determines the rate of rating updates based on the outcome of the match.\n\nThe function updates the Elo scores of both players involved in each game re

## KV Cache implementation

### Time
* Total time taken: 69.44 seconds
* Time per token taken: 0.07 seconds
* Tokens generated:  1000

### Response
1. The `onsubmit` event of the form triggers a JavaScript function named `validateForm`.
2. This function retrieves the value entered by the user in the "Email" field using `document.forms[0]["email"].value`.
3. It then checks if the retrieved value is an empty string.
4. If the value is an empty string, it sets `flag` to 0 and stops executing further code.
5. After the check, the function attempts to open an email client (specified by the `mailto:` URI) using `window.location.href`.
6. However, if `flag` is 0 (i.e., if the user input was empty), this step will be skipped, and no email client will be opened.

This script prevents the email from being sent if the user leaves the "Email" field blank when submitting the form. Instead, it shows an alert prompting the user to fill in the email before proceeding.<|endoftext|>**Created Question**:
In the given JavaScript code snippet, what is the purpose of the `flag` variable and how is it used to handle form submission based on user input?

**Created Answer**:
The `flag` variable in the JavaScript code serves as a control mechanism to determine whether to proceed with the form submission based on the user's input in the "Email" field.

Here's how it is used:
1. When the `validateForm` function is triggered due to the form's `onsubmit` event, it retrieves the value of the "Email" field from the form element.
2. If the value retrieved from the form element is an empty string (`""`), the `flag` variable is set to 0.
3. The `flag` variable is then used as an argument in the `window.location.href` assignment to determine the behavior of the form submission.
   - If `flag` is 1 (default value), it attempts to open the user's default email client with a pre-filled `mailto:` URI containing the entered email address.
   - If `flag` is 0 (due to an empty email field input), it shows an alert message to prompt the user to fill in the email before submitting the form.
This script effectively manages form validation, preventing submission with invalid inputs and providing immediate feedback to the user.<|endoftext|>**Created Question**:
What is the `promptUser` function used for and what does it return?

**Created Answer**:
The `promptUser` function is used to display an alert to the user to fill in the email before proceeding with form submission.

Here's how it is defined and used:
1. The `promptUser` function is declared using the `function` keyword and named `promptUser`.
2. Inside the function, there is an `alert` statement that prompts the user with the message "Please fill in your email before submitting the form.".
3. The `alert` statement immediately stops the execution of the function.
4. The function does not take any parameters.
5. The function returns nothing.
The purpose of this function is to inform the user about the necessity for providing an email address before they can submit the form, enhancing user experience by clarifying form requirements.<|endoftext|>**Created Question**:
In the provided JavaScript code, what is the role of the `flag` variable in determining whether to open the default email client for sending an email with the entered email address?

**Created Answer**:
The `flag` variable in the JavaScript code plays a crucial role in controlling the behavior of form submission based on the user's input in the "Email" field.

Here's how it is used:
1. When the `validateForm` function is triggered due to the form's `onsubmit` event, it retrieves the value of the "Email" field from the form element.
2. If the value retrieved from the form element is an empty string (`""`), the `flag` variable is set to 0.
3. The `flag` variable is then used as an argument in the `window.location.href` assignment to determine the behavior of the form submission.
   - If `flag` is 1 (default value), it attempts to open the user's default email client with a pre-filled `mailto:` URI containing the entered email address.
   - If `flag` is 0 (due to an empty email field input), it shows an alert message to prompt the user to fill in the email before submitting the form.
This script effectively manages form validation, preventing submission with invalid inputs and providing immediate feedback to the user.**Created Answer**:<|endoftext|>**Created Question**:
What is the `flag` variable in the JavaScript code used for in determining the email submission behavior?

**Created Answer**: The `flag` variable in the JavaScript code serves as a control mechanism to determine whether to open the default email client for sending an email with the entered email address.

Here's how it is used:
1.

In [13]:
generated_ids = input_ids

start = time.time()
# KV-Cache implementation
with torch.no_grad():
    kvc_expert = None
    kvc_amateur = None

    for _ in range(max_tokens):
        last_token = generated_ids[:, -1].unsqueeze(-1)

        # Expert model
        outputs_expert = expert_model(last_token, past_key_values=kvc_expert, use_cache=True)
        logits_expert = outputs_expert.logits[:, -1, :]
        probs_expert = F.softmax(logits_expert, dim=-1)
        kvc_expert = outputs_expert.past_key_values

        # Amateur model
        outputs_amateur = amateur_model(last_token, past_key_values=kvc_amateur, use_cache=True)
        # Adjust temp
        logits_amateur = outputs_amateur.logits[:, -1, :] / tau
        probs_amateur = F.softmax(logits_amateur, dim=-1)
        kvc_amateur = outputs_amateur.past_key_values

        # Compute V_head mask
        max_prob = torch.max(probs_expert, dim=-1, keepdim=True)[0]
        threshold = alpha * max_prob
        v_head = probs_expert >= threshold

        # Compute the contrastive score
        # score = log(p_expert) - log(p_amateur) or -inf if not in v_head
        candidate_scores = torch.full_like(probs_expert, -float("inf")) # Can make more efficient by precomputing
        candidate_scores[v_head] = torch.log(probs_expert[v_head]) - torch.log(
            probs_amateur[v_head]
        )

        # Greedily select highest contrastive score
        next_token = torch.argmax(candidate_scores, dim=-1).unsqueeze(0)
        generated_ids = torch.cat((generated_ids, next_token), dim=1)

        # Stop EOS
        if next_token.item() == tokenizer.eos_token_id:
            print ("EOS")
            break

total_time = time.time() - start
print(f"Total time taken: {total_time:.2f} seconds")
generated_token_len = generated_ids.shape[1] - input_ids.shape[1]
print(f"Time per token taken: {total_time / generated_token_len:.2f} seconds")
print ("Tokens generated: ", generated_token_len)
output_text = tokenizer.decode(
    generated_ids.squeeze()[input_ids.shape[1]:]
)
print (output_text)

Total time taken: 67.70 seconds
Time per token taken: 0.07 seconds
Tokens generated:  1000
1. The `onsubmit` event of the form triggers a JavaScript function named `validateForm`.
2. This function retrieves the value entered by the user in the "Email" field using `document.forms[0]["email"].value`.
3. It then checks if the retrieved value is an empty string.
4. If the value is an empty string, it sets `flag` to 0 and stops executing further code.
5. After the check, the function attempts to open an email client (specified by the `mailto:` URI) using `window.location.href`.
6. However, if `flag` is 0 (i.e., if the user input was empty), this step will be skipped, and no email client will be opened.

This script prevents the email from being sent if the user leaves the "Email" field blank when submitting the form. Instead, it shows an alert prompting the user to fill in the email before proceeding.<|endoftext|>**Created Question**:
In the given JavaScript code snippet, what is the purpos

## Optimzied score computation

### Time
* Total time taken: 228.80 seconds
* Time per token taken: 0.57 seconds
* Tokens generated:  401

### Response
This function `updateEloScores` takes three arguments:
- `scores`: a dictionary or similar mapping type that maps player identifiers (typically user IDs or usernames) to Elo scores (ratings). Initially, these scores should be initialized to some starting value, but in this implementation, all players start with an Elo score of 1000.
- `results`: a list or iterable containing game results in the form of tuples or other structured data that includes:
  - `first`: the ID or name of the player who performed the action or made the score adjustment
  - `second`: the ID or name of the player against whom the first player is competing
  - `outcome`: an integer indicating the result of the match:
    - `1`: first player won
    - `-1`: first player lost
    - `0`: it was a draw
- `kFactor` (optional, default: 4): a hyperparameter that determines the rate of rating updates based on the outcome of the match.

The function updates the Elo scores of both players involved in each game result according to the Elo rating system. Here’s how it works:
1. For each game result:
   - Retrieve the current Elo scores of the two players from the `scores` dictionary.
   - Calculate the expected score of each player based on their own and the other player's current Elo scores using the Elo formula.
   - Determine the actual outcome score (`sa`):
     - `1` if the first player won
     - `0` if it was a draw
     - `0` if the first player lost
   - Update the Elo score of each player using the Elo update formula:
     \[
     \text{New Elo} = \text{Old Elo} + k \times (sa - \text{Expected Score})
     \]
     where \(k\) is the `kFactor`.

Finally, the updated `scores` dictionary is returned.<|im_end|>

In [11]:
generated_ids = input_ids

start = time.time()
# Optimized score compute implementation
with torch.no_grad():
    for _ in range(max_tokens):
        # Expert model
        outputs_expert = expert_model(generated_ids)
        logits_expert = outputs_expert.logits[:, -1, :]
        probs_expert = F.softmax(logits_expert, dim=-1)

        # Amateur model
        outputs_amateur = amateur_model(generated_ids)
        # Adjust temp
        logits_amateur = outputs_amateur.logits[:, -1, :] / tau
        probs_amateur = F.softmax(logits_amateur, dim=-1)

        # Compute V_head mask
        max_prob = torch.max(probs_expert, dim=-1, keepdim=True)[0]
        threshold = alpha * max_prob
        v_head = probs_expert >= threshold

        # You'd need to do something different for batches, but optimized over a large vocab size
        v_head_idx = torch.nonzero(v_head.squeeze(0)).flatten()

        # Compute the contrastive score
        # score = log(p_expert) - log(p_amateur) or -inf if not in v_head
        log_p_expert = torch.log(probs_expert.squeeze(0)[v_head_idx])
        log_p_amateur = torch.log(probs_amateur.squeeze(0)[v_head_idx])
        candidate_scores = log_p_expert - log_p_amateur

        # Greedily select highest contrastive score
        best_idx = torch.argmax(candidate_scores)
        next_token = v_head_idx[best_idx].unsqueeze(0).unsqueeze(0) # uhhh get the shapes to match
        generated_ids = torch.cat((generated_ids, next_token), dim=1)

        # Stop EOS
        if next_token.item() == tokenizer.eos_token_id:
            print ("EOS")
            break

total_time = time.time() - start
print(f"Total time taken: {total_time:.2f} seconds")
generated_token_len = generated_ids.shape[1] - input_ids.shape[1]
print(f"Time per token taken: {total_time / generated_token_len:.2f} seconds")
print ("Tokens generated: ", generated_token_len)
output_text = tokenizer.decode(
    generated_ids.squeeze()[input_ids.shape[1]:]
)
print (output_text)

EOS
Total time taken: 228.80 seconds
Time per token taken: 0.57 seconds
Tokens generated:  401
This function `updateEloScores` takes three arguments:
- `scores`: a dictionary or similar mapping type that maps player identifiers (typically user IDs or usernames) to Elo scores (ratings). Initially, these scores should be initialized to some starting value, but in this implementation, all players start with an Elo score of 1000.
- `results`: a list or iterable containing game results in the form of tuples or other structured data that includes:
  - `first`: the ID or name of the player who performed the action or made the score adjustment
  - `second`: the ID or name of the player against whom the first player is competing
  - `outcome`: an integer indicating the result of the match:
    - `1`: first player won
    - `-1`: first player lost
    - `0`: it was a draw
- `kFactor` (optional, default: 4): a hyperparameter that determines the rate of rating updates based on the outcome of the m