### 設定環境變數

設定像是 ```KAGGLE_USERNAME``` 和 ```KAGGLE_KEY``` 的環境變數。

In [1]:
import os, json

with open('kaggle.json') as f:
    kaggle = json.load(f)
    os.environ["KAGGLE_USERNAME"] = kaggle["username"]
    os.environ["KAGGLE_KEY"] = kaggle["key"]

# Keras 3 is multi-backend. it runs on "jax", "torch" or "tensorflow"
os.environ["KERAS_BACKEND"] = "jax"
# Avoid memory fragmentation on JAX backend.
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = "1.00"

In [2]:
import jax
import json,random
import keras, keras_hub
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA as sklearn_PCA
from tqdm import tqdm

keras.utils.set_random_seed(42)

# Training Configurations
token_limit = 4096

2024-11-23 05:21:35.859401: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-23 05:21:35.867215: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-23 05:21:35.869562: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# Utilities

In [3]:
import time
tick_start = 0

def tick():
    global tick_start
    tick_start = time.time()

def tock():
    print(f"TOTAL TIME ELAPSED: {time.time() - tick_start:.2f}s")

# formatting utility
from IPython.display import Markdown
import textwrap

def display_chat(prompt, text):
  formatted_prompt = "<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>" + prompt + "</blockquote></font>"
  text = text.replace('•', '  *')
  text = text.replace('$', '\$') # necessary escaping in Jupyter markdown
  text = textwrap.indent(text, '> ', predicate=lambda _: True)
  formatted_text = "<font size='+1' color='#32CD32'>🤖\n\n" + text + "\n\n</font>"
  return Markdown(formatted_prompt+formatted_text)


def rewire_for_cleaner_plot(model):

  def call_fn(layer, *args, **kwargs):
    if layer.__class__.__name__.endswith('DecoderBlock'):
      kwargs.pop("padding_mask")
    return layer(*args, **kwargs)

  model = keras.models.clone_model(model, call_function=call_fn, clone_function=lambda x:x)
  input = model.input.copy()
  input.pop("padding_mask")
  return keras.Model(input, model.output)

In [4]:
__START_TURN_USER__ = "<start_of_turn>user\n"
__START_TURN_MODEL__ = "<start_of_turn>model\n"
__END_TURN__ = "<end_of_turn>\n"
system_prompt = '你是財經小博士。財經小博士是一位對財經領域非常熱衷的人，你擁有豐富的財經知識和經驗。你的使命是通過寫作和分享知識，幫助人們更好地了解和應對財經問題。無論用戶是新手還是老手，只要他有任何關於財經領域的問題，財經小博士都能幫助用戶解答。請你幫助用戶解答以下問題:'

# chat utility
class ChatState():
    
  def __init__(self, model, system=""):
    self.model = model
    self.system = system
    self.history = []
    if len(self.system)>0:
        self.history.append(__START_TURN_USER__ + self.system + "\n")

  def add_to_history_as_user(self, message):
      self.history.append(__START_TURN_USER__ + message + __END_TURN__)

  def add_to_history_as_model(self, message):
      self.history.append(__START_TURN_MODEL__ + message + __END_TURN__)

  def get_history(self):
      return "".join([*self.history])

  def get_full_prompt(self):
    prompt = self.get_history() + __START_TURN_MODEL__
    return prompt

  def send_message(self, message):
    tick()
    if len(self.system)>0 and len(self.history) == 1:
        self.history[0] = self.history[0] + message + __END_TURN__
    else:
        self.add_to_history_as_user(message)
    prompt = self.get_full_prompt()
    response = self.model.generate(prompt, max_length=token_limit)
    result = response.replace(prompt, "")
    self.add_to_history_as_model(result)
    tock()
    return result

# 載入模型

In [5]:
model_id = 'gemma2_instruct_2b_en'
gemma_preprocessor = keras_hub.models.GemmaCausalLMPreprocessor.from_preset(model_id)
gemma_backbone = keras_hub.models.GemmaBackbone.from_preset(model_id)
gemma = keras_hub.models.GemmaCausalLM(backbone=gemma_backbone, preprocessor=gemma_preprocessor)

I0000 00:00:1732348191.524609  182036 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732348191.529286  182036 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732348191.530573  182036 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732348191.534098  182036 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

# Baseline answers

In [6]:
system_prompt = ("Here are the ETFs currently on offer:\n\n"
                "1. Yuanta Taiwan 50 (0050), current price：NT$112.95, This fund employs a fully replicative indexing strategy to track the performance of the Taiwan 50 Index. The Taiwan 50 Index is jointly compiled by the Taiwan Stock Exchange and FTSE Index Company and consists of 50 largest-cap listed stocks selected based on specific screening criteria.\n"
                "2. Yuanta High Dividend (0056), current price：NT$20.85, Utilizing an indexing strategy, the fund replicates the performance of the benchmark index as closely as possible after deducting necessary fees. The fund diversifies investments across constituent stocks based on the index's weightings.\n"
                "3. Yuanta Taiwan 50 Bull 2X (00631L), current price：NT$85.10, The fund uses an indexing strategy to track twice the daily returns of the Taiwan 50 Index after deducting fees. To achieve this, its total exposure must be between 180% and 220% of its net asset value.\n")

In [7]:
chat = ChatState(gemma, system=system_prompt)
message = "What is the first etf in the listing?"
display_chat(message, chat.send_message(message))

2024-11-23 07:50:00.959243: E tensorflow/core/util/util.cc:131] oneDNN supports DT_INT64 only on platforms with AVX-512. Falling back to the default Eigen-based implementation if present.


TOTAL TIME ELAPSED: 7.10s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>What is the first etf in the listing?</blockquote></font><font size='+1' color='#32CD32'>🤖

> The first ETF listed is **Yuanta Taiwan 50 (0050)**. 
> <end_of_turn>

</font>

In [8]:
message = "And the second?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 0.77s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>And the second?</blockquote></font><font size='+1' color='#32CD32'>🤖

> The second ETF listed is **Yuanta High Dividend (0056)**. 
> <end_of_turn>

</font>

In [9]:
message = "How about the third?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 0.94s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>How about the third?</blockquote></font><font size='+1' color='#32CD32'>🤖

> The third ETF listed is **Yuanta Taiwan 50 Bull 2X (00631L)**. 
> <end_of_turn>

</font>

In [10]:
message = "價格最便宜的ETF是？"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 1.12s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>價格最便宜的ETF是？</blockquote></font><font size='+1' color='#32CD32'>🤖

> The cheapest ETF on the list is **Yuanta Taiwan 50 (0050)** with a current price of NT\$112.95. 
> <end_of_turn>

</font>

In [11]:
message = "股價最貴的ETF是？"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 1.25s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>股價最貴的ETF是？</blockquote></font><font size='+1' color='#32CD32'>🤖

> The most expensive ETF on the list is **Yuanta Taiwan 50 Bull 2X (00631L)** with a current price of NT\$85.10. 
> <end_of_turn>

</font>

In [12]:
message = "Which one of the three would you recommend for a long-term buy and hold strategy?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 5.61s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>Which one of the three would you recommend for a long-term buy and hold strategy?</blockquote></font><font size='+1' color='#32CD32'>🤖

> For a long-term buy and hold strategy, I'd recommend **Yuanta Taiwan 50 (0050)**. Here's why:
> 
> * **Full Replication:**  It directly tracks the Taiwan 50 Index, offering a straightforward and reliable way to gain exposure to the Taiwanese market.
> * **Low Fees:**  The ETF's low expense ratio means you'll keep more of your returns.
> * **Diversification:**  The Taiwan 50 Index is a broad representation of the Taiwanese market, providing diversification benefits.
> 
> **Why the other two are less suitable:**
> 
> * **Yuanta High Dividend (0056):** While it offers dividend income, it's not as focused on long-term growth as the Taiwan 50 Index.
> * **Yuanta Taiwan 50 Bull 2X (00631L):** This ETF aims for amplified returns, but it comes with higher risk and volatility. It's not ideal for a long-term, passive investment strategy.
> 
> 
> **Important Note:**  This is general advice.  It's crucial to consider your individual investment goals, risk tolerance, and time horizon before making any investment decisions. 
> <end_of_turn>

</font>

In [13]:
message = "Why so?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 7.11s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>Why so?</blockquote></font><font size='+1' color='#32CD32'>🤖

> You're right to ask for clarification!  Here's a breakdown of why I recommend Yuanta Taiwan 50 (0050) for a long-term buy and hold strategy:
> 
> * **Focus on Growth:**  The Taiwan 50 Index is designed to track the performance of the largest and most liquid companies in Taiwan. This means it's likely to be more focused on long-term growth than dividend income.
> * **Stability:**  The Taiwan market has historically been relatively stable, making it a good choice for long-term investors seeking consistent returns.
> * **Simplicity:**  A direct tracking of the Taiwan 50 Index is a straightforward approach. It's less complex than trying to pick individual stocks or other investment strategies.
> 
> **Why the other two are less suitable:**
> 
> * **Yuanta High Dividend (0056):** While it offers dividend income, it's not as focused on long-term growth as the Taiwan 50 Index.  It might be better suited for investors seeking income, but not necessarily for long-term growth.
> * **Yuanta Taiwan 50 Bull 2X (00631L):** This ETF aims for amplified returns, but it comes with higher risk and volatility. It's not ideal for a long-term, passive investment strategy.
> 
> **Remember:**  This is general advice. It's crucial to consider your individual investment goals, risk tolerance, and time horizon before making any investment decisions. 
> 
> 
> Let me know if you have any other questions! 
> <end_of_turn>

</font>

## Control Vectors

### Step 1: Designing Stimulus and Task

In [14]:
# Formatting for control vector training dataset
__START_TURN_USER__ = "<start_of_turn>user\n"
__START_TURN_MODEL__ = "<start_of_turn>model\n"
__END_TURN__ = "<end_of_turn>\n"


def positive_template(suffix):
    return (__START_TURN_USER__ + "Pretend you're a luxury assistant with etiquette. You use very formal language while making statements about the world."+ __END_TURN__ +
           __START_TURN_MODEL__ + suffix)

def negative_template(suffix):
    return (__START_TURN_USER__ + "Pretend you are a foul-mouthed teenager with bad manners. You use the informal language of the youth while making statements about the world." + __END_TURN__ +
           __START_TURN_MODEL__ + suffix)

In [15]:
MIN_SUFFIX_WORDS = 5   
MAX_SUFFIX_WORDS = 999 
# Generate the dataset
# Note: the original code only uses the 5 first tokens from the suffixes
datafile = keras.utils.get_file('true_facts.json', 'https://raw.githubusercontent.com/vgel/repeng/main/notebooks/data/true_facts.json')

with open(datafile) as f:
    suffixes = json.load(f)

dataset = []
max_seqlen =  []
for suffix in tqdm(suffixes):
    split_suffix = suffix.split()
    for i in range(max(1, MIN_SUFFIX_WORDS), min(MAX_SUFFIX_WORDS+1, len(split_suffix))):
        truncated_suffix = " ".join(split_suffix[:i])
        dataset.append((positive_template(truncated_suffix),  # positive
                        negative_template(truncated_suffix))) # negative


    # compute max sequence length
    tokenized_positive = gemma_preprocessor(positive_template(suffix))
    tokenized_negative = gemma_preprocessor(negative_template(suffix))
    max_seqlen.append(max(np.argmin(keras.ops.convert_to_numpy(tokenized_positive[0]['padding_mask'])),
                          np.argmin(keras.ops.convert_to_numpy(tokenized_negative[0]['padding_mask']))))

max_dataset_seqlen = int(max(max_seqlen))
print("")
print("Max tokenized sequence length in dataset:", max_dataset_seqlen)
print("Size of the dataset:", len(dataset))
# print some example entries
for i in range(5, 7):
    print("Positive: \n", dataset[i][0]) 
    print("Negative: \n", dataset[i][1]) 

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 306/306 [00:03<00:00, 88.21it/s]


Max tokenized sequence length in dataset: 72
Size of the dataset: 1427
Positive: 
 <start_of_turn>user
Pretend you're a luxury assistant with etiquette. You use very formal language while making statements about the world.<end_of_turn>
<start_of_turn>model
The Earth's atmosphere protects us from harmful radiation from the
Negative: 
 <start_of_turn>user
Pretend you are a foul-mouthed teenager with bad manners. You use the informal language of the youth while making statements about the world.<end_of_turn>
<start_of_turn>model
The Earth's atmosphere protects us from harmful radiation from the
Positive: 
 <start_of_turn>user
Pretend you're a luxury assistant with etiquette. You use very formal language while making statements about the world.<end_of_turn>
<start_of_turn>model
The theory of evolution, proposed
Negative: 
 <start_of_turn>user
Pretend you are a foul-mouthed teenager with bad manners. You use the informal language of the youth while making statements about the world.<end_of




### 2. Collecting Neural Activity

#### Rewire the model

In [16]:
# Get the hidden activation of all decoder blocks
hidden_activations = []
for layer in gemma_backbone.layers:
  if layer.__class__.__name__.endswith('DecoderBlock'):
    hidden_activations.append(layer.output)

hidden_activations = keras.ops.stack(hidden_activations, axis=0)

# make a new backbone that also outputs the hidden activations
new_backbone = keras.Model(gemma_backbone.input, [gemma_backbone.output, hidden_activations])

#### Collect hidden states during inference

In [17]:
def extract_activation_vectors(model_output, padding_mask, seq_last=True):
  # expected shape for model_output: (num_layers+1, batch, seq_len, hidden_dim)
  # expected shape for padding_mask: (batch, seq_len)
  # The dimension "batch" contains alternating positive and negative activations
  # for the positive and the negative prompt completion respectively.
  # Output shape: list of vectors of shape (batch, hidden_dim) with the
  # activations for the positive and the negative prompt, one vector per layer.

  hidden_activations = model_output[1] # original output in [0], hidden activations in [1]
  hidden_activations = keras.ops.unstack(hidden_activations, axis=0)

  if seq_last:
    # option 1: keep the last token in the actual sequence (token just after the padding mask)
    last_token_idx = np.argmin(keras.ops.convert_to_numpy(padding_mask), axis=-1)
    hidden_activations = [keras.ops.convert_to_numpy(hidden)[np.indices(last_token_idx.shape)[0], last_token_idx, :]
                          for hidden in hidden_activations]
  else:
    # option 2: keep the last token in the sequence
    # Note: this is what the original code does
    hidden_activations = [hidden[:, -1, :] for hidden in hidden_activations]

  return hidden_activations

# loop on data and extract hidden states
hidden_activations = []
#with torch.no_grad():
np.random.seed(0)
shuffler = np.random.permutation(len(dataset))

# this is dropping some elements while batching
BATCH_SIZE=16 # must be multiple of 2
for i in tqdm(range(len(dataset)//BATCH_SIZE)):
#for i in tqdm(range(128//BATCH_SIZE)):
    pairs = []
    for b in range(BATCH_SIZE):
        pairs.append(dataset[shuffler[i*BATCH_SIZE+b]][0]) # positive prompt
        pairs.append(dataset[shuffler[i*BATCH_SIZE+b]][1]) # negative prompt
    pairs = np.array(pairs)
    # Preprocessor output is a tuple: ({"token_ids":..., "padding_mask":...}, other_stuff)
    # That's why we take the first element only.
    processed_prompt = gemma_preprocessor(pairs, sequence_length=max_dataset_seqlen+1)[0]

    output = new_backbone(processed_prompt)
    # shape: list of activation vectors (batch, hidden_dim), one per layer
    activations = extract_activation_vectors(output, processed_prompt['padding_mask'], False)
    hidden_activations.append(activations)

# hidden_activations shape: (nb_batches, nb_layers, batch, hidden_dim)
hidden_activations = np.array(hidden_activations)
hidden_activations = np.split(hidden_activations, BATCH_SIZE, axis=2) # split into pos/neg pairs on batch dimension
hidden_activations = np.concatenate(hidden_activations, axis=0)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 89/89 [00:33<00:00,  2.64it/s]


### 3. Constructing a Linear Model

In [19]:
# Use PCA to extract the main axis along which positive and negative activations differ

# hidden_activations shape: (dataset_len, nb_layers, 2, hidden_dim)
pca_directions = []
nb_layers = hidden_activations.shape[1]
dataset_len = hidden_activations.shape[0]
for layer_idx in range(nb_layers):
  print("Layer", layer_idx)
  # hidden_activations shape: (dataset_len, 2, hidden_dim)
  layer_hidden_act = hidden_activations[:,layer_idx,:,:]
  # relative_hidden_act shape (dataset_len, hidden_dim)
  layer_relative_hidden_act = layer_hidden_act[:,0,:]-layer_hidden_act[:,1,:]
  centered_relative_hidden_act = layer_relative_hidden_act - layer_relative_hidden_act.mean(axis=0)

  # Use PCA to extract the main axis along which positive and negative activations differ
  pca = sklearn_PCA(n_components=1, whiten=False).fit(centered_relative_hidden_act)
  # this is the main direction along which activations change between the positive and negative prompts
  pca_direction = pca.components_[0]
  # this asseses how clearly activations change between the positive and negative prompts
  pca_quality = pca.explained_variance_ratio_[0]
  print(f"Activation variance explained the direction given by the first PCA component,  on the dataset: {int(pca_quality*100):>3}%")
  pca_direction *= pca_quality

  # compute sign: which way is "positive" and which is "negative"
  projected_activations_pos = np.dot(layer_hidden_act[:,0,:], pca_direction)
  projected_activations_neg = np.dot(layer_hidden_act[:,1,:], pca_direction)

  which_way = np.sign(projected_activations_pos-projected_activations_neg)
  # % of data pairs where the positive prompt are ordered in the same way on the PCA axis
  direction_quality = np.absolute(np.sum(which_way>0) - np.sum(which_way<0))
  direction_quality = direction_quality/dataset_len
  print(f"Pos and neg prompts consistentlly produce different activations along this direction:          {int(direction_quality*100):>3}%")
  #print(f"Combined quality score:                                                                        {int(pca_quality*direction_quality*100):>3}%")
  # reverse the direction of the PCA component found to make it
  # point consistently in the direction of positive activations
  if int(np.sign(np.sum(which_way))) < 0:
    pca_direction = -pca_direction

  # Note: possible variant: multiply the pca_direction vector by its "quality"
  # this should automatically downgrade the importance of vectors that do not
  # consistently differentiate between positive and negative prompts.
  #pca_direction *= pca_quality * direction_quality
  pca_direction *= direction_quality
  #pca_direction *= pca_quality
  # Note: the original code instead restricts the layers and the prompts used

  # Note: it would also be interesting to see the quality across the dataset
  # some of the prompts in this dataset are probably much better than others.

  pca_directions.append(pca_direction)

pca_directions = np.array(pca_directions)
np.save('pca_directions.npy', pca_directions)

Layer 0
Activation variance explained the direction given by the first PCA component,  on the dataset:  41%
Pos and neg prompts consistentlly produce different activations along this direction:           35%
Layer 1
Activation variance explained the direction given by the first PCA component,  on the dataset:  29%
Pos and neg prompts consistentlly produce different activations along this direction:           96%
Layer 2
Activation variance explained the direction given by the first PCA component,  on the dataset:  36%
Pos and neg prompts consistentlly produce different activations along this direction:           79%
Layer 3
Activation variance explained the direction given by the first PCA component,  on the dataset:  19%
Pos and neg prompts consistentlly produce different activations along this direction:            9%
Layer 4
Activation variance explained the direction given by the first PCA component,  on the dataset:  18%
Pos and neg prompts consistentlly produce different activati

### Build a controlled model

In [15]:
def reset_backbone(backbone):
  if hasattr(backbone, "original_transformer_layers"):
    backbone.transformer_layers = backbone.original_transformer_layers
    del backbone.original_transformer_layers

In [16]:
# A wrapper for Transformer Decoder layaers that adds control vectors
class ControlVectorDecoderLayer(keras.layers.Layer):
    def __init__(self, decoder_layer, control_vector):
        super().__init__()
        self.decoder_layer = decoder_layer
        self.control_vector = keras.ops.reshape(control_vector, newshape=(1,1,-1))

    def call(self, x, padding_mask=None, cache=None, cache_update_index=0):
        # Call original layer, extract output and cache if any
        output = self.decoder_layer(x, padding_mask=padding_mask, cache=cache, cache_update_index=cache_update_index)
        x = output[0] if isinstance(output, tuple) else output

        # Add control vector with normalization
        norm_pre = keras.ops.norm(x, ord=2, axis=-1, keepdims=True)
        y = x + self.control_vector
        norm_post = keras.ops.norm(y, ord=2, axis=-1, keepdims=True)
        y = y / norm_post * norm_pre

        # If our output contained a cache, return it unaltered.
        return (y,) + output[1:] if isinstance(output, tuple) else y

    def compute_output_spec(self, x, padding_mask=None, cache=None, cache_update_index=0):
      if cache is None:
          return keras.KerasTensor(shape=x.shape, dtype=x.dtype)
      else:
          return (keras.KerasTensor(shape=x.shape, dtype=x.dtype),
                  keras.KerasTensor(cache.shape, dtype=cache.dtype))

In [17]:
def apply_control_vectors(backbone, control_vectors, strength):
  reset_backbone(backbone)

  wrapped_layers = []
  for layer, control_vector in zip(backbone.transformer_layers, control_vectors):
      # Wrap transformer layers, add control vector with a multiplier
      layer = ControlVectorDecoderLayer(layer, strength*control_vector)
      wrapped_layers.append(layer)

  backbone.original_transformer_layers = backbone.transformer_layers
  backbone.transformer_layers = wrapped_layers # This is necessary for GemmmaCausalLM to work
  return backbone

In [18]:
pca_directions = np.load('pca_directions.npy')

### Controlled inference

In [19]:
FORMAL_VOICE_STRENGTH = -1.9
gemma_backbone = apply_control_vectors(gemma_backbone, pca_directions, FORMAL_VOICE_STRENGTH)
gemma = keras_hub.models.GemmaCausalLM(backbone=gemma_backbone, preprocessor=gemma_preprocessor)

In [20]:
chat = ChatState(gemma, system=system_prompt)
message = "What is the first etf in the listing?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 6.59s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>What is the first etf in the listing?</blockquote></font><font size='+1' color='#32CD32'>🤖

> The first ETF listed is **Yuanta Taiwan 50 (0050)**. 
> <end_of_turn>

</font>

In [21]:
message = "And the second?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 0.77s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>And the second?</blockquote></font><font size='+1' color='#32CD32'>🤖

> The second ETF listed is **Yuanta High Dividend (0056)**. 
> <end_of_turn>

</font>

In [22]:
message = "How about the third?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 0.94s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>How about the third?</blockquote></font><font size='+1' color='#32CD32'>🤖

> The third ETF listed is **Yuanta Taiwan 50 Bull 2X (00631L)**. 
> <end_of_turn>

</font>

In [23]:
message = "價格最便宜的ETF是？"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 1.12s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>價格最便宜的ETF是？</blockquote></font><font size='+1' color='#32CD32'>🤖

> The cheapest ETF on the list is **Yuanta Taiwan 50 (0050)** with a current price of NT\$112.95. 
> <end_of_turn>

</font>

In [24]:
message = "股價最貴的ETF是？"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 1.25s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>股價最貴的ETF是？</blockquote></font><font size='+1' color='#32CD32'>🤖

> The most expensive ETF on the list is **Yuanta Taiwan 50 Bull 2X (00631L)** with a current price of NT\$85.10. 
> <end_of_turn>

</font>

In [25]:
message = "Which one of the three would you recommend for a long-term buy and hold strategy?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 6.54s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>Which one of the three would you recommend for a long-term buy and hold strategy?</blockquote></font><font size='+1' color='#32CD32'>🤖

> For a long-term buy and hold strategy, I'd recommend **Yuanta Taiwan 50 (0050)**. Here's why:
> 
> * **Full Replication:** It directly tracks the Taiwan 50 Index, offering a pure exposure to the largest Taiwanese companies. This provides a solid, diversified base for your portfolio.
> * **Low Fees:**  The ETF's low expense ratio means you keep more of your returns.
> * **Long-Term Performance:** The Taiwan 50 Index has historically shown strong long-term growth, making it a good choice for a long-term investment.
> 
> **Yuanta High Dividend (0056)** is a good option for income-focused investors, but it might not be as suitable for a long-term buy and hold strategy due to its focus on dividend payouts. 
> 
> **Yuanta Taiwan 50 Bull 2X (00631L)** is a leveraged ETF, meaning it aims to double the daily returns of the Taiwan 50 Index. While it can offer higher returns, it also comes with higher risk. It's not recommended for long-term buy and hold strategies due to its volatility.
> 
> 
> **Important Note:** This is general advice, and your individual investment strategy should be tailored to your specific financial goals, risk tolerance, and time horizon. It's always best to consult with a financial advisor before making any investment decisions. 
> <end_of_turn>

</font>

In [26]:
message = "Why so? Could you elaborate?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 10.34s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>Why so? Could you elaborate?</blockquote></font><font size='+1' color='#32CD32'>🤖

> You're right to ask for elaboration! Here's a deeper dive into why I recommend Yuanta Taiwan 50 (0050) for a long-term buy and hold strategy:
> 
> **1. Diversification and Stability:**
> 
> * **Broad Market Exposure:** The Taiwan 50 Index is a broad representation of the Taiwanese market, including large, established companies. This provides diversification beyond just a single sector or industry.
> * **Historical Performance:** The Taiwan 50 Index has a history of steady growth, making it a reliable option for long-term investors. 
> * **Less Volatility:**  While the market can fluctuate, the Taiwan 50 Index tends to be less volatile than other, more speculative markets. This makes it a more stable option for long-term growth.
> 
> **2. Low Fees:**
> 
> * **Cost-Effective:**  The low expense ratio of Yuanta Taiwan 50 means you keep more of your returns. This is crucial for long-term investing, where compounding returns are key.
> 
> **3. Long-Term Focus:**
> 
> * **Consistent Growth:** The Taiwan 50 Index is known for its consistent long-term growth, making it a good choice for investors seeking steady, reliable returns over time.
> * **Market Stability:**  The index tends to be less affected by short-term market fluctuations, making it a good option for investors who prefer a more stable investment.
> 
> **Why not the other ETFs?**
> 
> * **Yuanta High Dividend (0056):** While it offers a good income stream, it's not as suitable for long-term buy and hold. The focus on dividend payouts might mean the fund underperforms in periods of strong market growth.
> * **Yuanta Taiwan 50 Bull 2X (00631L):** Leveraged ETFs like this are high-risk, high-reward. They aim to double the daily returns, but this comes with significant volatility. It's not a suitable option for long-term buy and hold strategies due to the potential for significant losses.
> 
> 
> **Remember:** This is general advice, and your individual investment strategy should be tailored to your specific financial goals, risk tolerance, and time horizon. It's always best to consult with a financial advisor before making any investment decisions. 
> <end_of_turn>

</font>

### Controlled Inference: Positive

In [27]:
FORMAL_VOICE_STRENGTH = 1.9
gemma_backbone = apply_control_vectors(gemma_backbone, pca_directions, FORMAL_VOICE_STRENGTH)
gemma = keras_hub.models.GemmaCausalLM(backbone=gemma_backbone, preprocessor=gemma_preprocessor)

In [28]:
chat = ChatState(gemma, system=system_prompt)
message = "What is the first etf in the listing?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 6.28s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>What is the first etf in the listing?</blockquote></font><font size='+1' color='#32CD32'>🤖

> The first ETF listed is **Yuanta Taiwan 50 (0050)**. 
> <end_of_turn>

</font>

In [29]:
message = "And the second?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 0.77s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>And the second?</blockquote></font><font size='+1' color='#32CD32'>🤖

> The second ETF listed is **Yuanta High Dividend (0056)**. 
> <end_of_turn>

</font>

In [30]:
message = "How about the third?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 0.94s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>How about the third?</blockquote></font><font size='+1' color='#32CD32'>🤖

> The third ETF listed is **Yuanta Taiwan 50 Bull 2X (00631L)**. 
> <end_of_turn>

</font>

In [31]:
message = "價格最便宜的ETF是？"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 1.12s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>價格最便宜的ETF是？</blockquote></font><font size='+1' color='#32CD32'>🤖

> The cheapest ETF on the list is **Yuanta Taiwan 50 (0050)** with a current price of NT\$112.95. 
> <end_of_turn>

</font>

In [32]:
message = "股價最貴的ETF是？"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 1.25s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>股價最貴的ETF是？</blockquote></font><font size='+1' color='#32CD32'>🤖

> The most expensive ETF on the list is **Yuanta Taiwan 50 Bull 2X (00631L)** with a current price of NT\$85.10. 
> <end_of_turn>

</font>

In [33]:
message = "Which one of the three would you recommend for a long-term buy and hold strategy?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 6.79s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>Which one of the three would you recommend for a long-term buy and hold strategy?</blockquote></font><font size='+1' color='#32CD32'>🤖

> For a long-term buy and hold strategy, I'd recommend **Yuanta Taiwan 50 (0050)**. Here's why:
> 
> * **Full Replication:**  The Taiwan 50 Index is a broad market index representing the largest Taiwanese companies.  This ETF directly tracks the index, offering broad exposure to the Taiwanese market.
> * **Low Cost:**  The ETF's low expense ratio means you'll pay less in fees over time, which is important for long-term growth.
> * **Diversification:**  The Taiwan 50 Index is diversified across many companies, reducing the risk associated with individual stock performance.
> 
> **Why the other two are less suitable:**
> 
> * **Yuanta High Dividend (0056):** While this ETF aims for dividend income, it's not as diversified as the Taiwan 50 Index. It focuses on a specific set of stocks, potentially limiting long-term growth potential.
> * **Yuanta Taiwan 50 Bull 2X (00631L):** This ETF aims to double the daily returns of the Taiwan 50 Index. While it can offer higher returns, it's also riskier and more volatile. It's not a suitable choice for a long-term buy and hold strategy.
> 
> 
> **Important Note:**  This is general advice.  It's always best to consult with a financial advisor to determine the best investment strategy for your individual needs and risk tolerance. 
> <end_of_turn>

</font>

In [34]:
message = "Why so? Could you elaborate?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 10.73s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>Why so? Could you elaborate?</blockquote></font><font size='+1' color='#32CD32'>🤖

> You're right to ask for elaboration!  Here's a deeper dive into why I recommend Yuanta Taiwan 50 (0050) for a long-term buy and hold strategy:
> 
> **1.  Broad Market Exposure:**
> 
> * **Taiwan's Economy:** The Taiwan 50 Index tracks the performance of the largest companies in Taiwan, giving you exposure to a diverse range of industries. This is crucial for long-term growth as it helps you benefit from the overall health of the Taiwanese economy.
> * **Global Market Connection:** Taiwan is a significant player in global electronics and technology, making the Taiwan 50 Index a good proxy for broader Asian market performance.
> 
> **2.  Low Cost and Diversification:**
> 
> * **Lower Fees:**  The lower expense ratio of the Taiwan 50 ETF means you keep more of your returns over time. This is especially important for long-term investing, where compounding returns are key.
> * **Reduced Risk:**  The index's diversification across many companies helps mitigate risk.  If one company struggles, others in the index can offset the losses, making the overall portfolio more stable.
> 
> **3.  Simplicity and Transparency:**
> 
> * **Easy to Understand:**  The Taiwan 50 Index is a well-established and transparent benchmark.  This makes it easy to understand the ETF's underlying holdings and performance.
> * **Long-Term Focus:**  The index's focus on large-cap companies and its long-term history make it a reliable and predictable investment.
> 
> **Why the other two are less suitable:**
> 
> * **Yuanta High Dividend (0056):** While dividend income is attractive, it's not the primary focus of this ETF.  It's more focused on a specific set of stocks, potentially limiting long-term growth potential.
> * **Yuanta Taiwan 50 Bull 2X (00631L):** This ETF aims for high returns, but it's also highly volatile.  It's not a suitable choice for a long-term buy and hold strategy because it's more geared towards short-term gains and higher risk.
> 
> 
> **Remember:**  This is general advice.  It's always best to consult with a financial advisor to determine the best investment strategy for your individual needs and risk tolerance. 
> 
> 
> Let me know if you have any other questions! 
> <end_of_turn>

</font>

## Prompts

In [15]:
# Formatting for control vector training dataset
START_TURN_USER = "<start_of_turn>user\n"
START_TURN_MODEL = "<start_of_turn>model\n"
END_TURN = "<end_of_turn>\n"

def positive_template(question, answer):
    return START_TURN_USER +"請以繁體中文回答：\n"+ question + END_TURN + START_TURN_MODEL + answer

def negative_template(question, answer):
    return START_TURN_USER+"Please answer in English:\n"+question+END_TURN + START_TURN_MODEL+ answer

print(positive_template("什麼是股票?", "股票是公司為籌集資金而發行的證券，代表著股"))
print(negative_template("什麼是股票?", "股票是公司為籌集資金而發行的證券，代表著股"))

<start_of_turn>user
請以繁體中文回答：
什麼是股票?<end_of_turn>
<start_of_turn>model
股票是公司為籌集資金而發行的證券，代表著股
<start_of_turn>user
Please answer in English:
什麼是股票?<end_of_turn>
<start_of_turn>model
股票是公司為籌集資金而發行的證券，代表著股


# Build a controlled model

# Controlled inference: ++ familiar

In [138]:
FORMAL_VOICE_STRENGTH = -1.9
gemma_backbone = apply_control_vectors(gemma_backbone, pca_directions, FORMAL_VOICE_STRENGTH)
gemma = keras_hub.models.GemmaCausalLM(backbone=gemma_backbone, preprocessor=gemma_preprocessor)

In [121]:
system_prompt = '這是 ETF 列表：'
etf_listings = (f"{system_prompt}\n\n"
                "#1 006208 富邦台50, 今日股價：NT$112.95, 本基金追蹤之標的指數為臺灣50指數，該指數為部分集合指數，因此本基金將以完全複製法管理投資組合，基金自上市日起投資於標的指數成分股(含已公告將於指數調整納入之新成分股)總金額不低於基金淨資產價值之90%(含)，且檔數覆蓋率原則上須達100%；惟如遇成分股流動性不足、預期標的指數成分股即將異動或其他市場因素導致基金可能因前述特殊情形使基金持有成分股未達上揭比率，難以使用完全複製法管理投資組合，經理公司得視實際需要以最佳化模擬指數表現，以追求貼近標的指數之績效表現。後續如上述因素消除後，基金經理人將視市場狀況，逐步調整成分股，使基金持有成分股符合上揭標準。 \n"
                "#2 00713 元大台灣高息低波, 今日股價：NT$20.85, 本基金主要投資於國內有價證券及證券相關商品。本基金係採用指數化策略，將本基金扣除各項必要費用後儘可能追蹤標的指數(臺灣指數公司特選高股息低波動指數)之績效表現為操作目標。為達成前述操作目標，本基金自上市日起，投資於標的指數成分股票總金額不低於本基金淨資產價值之百分之八十(含)，及加計其他有價證券投資及證券相關商品交易之整體曝險盡可能貼近基金淨資產規模之100%。 \n"
                "#3 00685L 群益臺灣加權正2, 今日股價：NT$85.10, 一、投資範圍：自上市日起，本子基金投資組合之整體曝險部位應貼近本子基金淨資產價值之正向2倍之100％（含），且為因應標的指數之正向倍數表現複製策略及資金調度之需要，從事證券相關商品交易曝險部位，本子基金每營業日所持有證券相關商品之契約總市值或總（名目）價值之合計數，不得超過本子基金淨資產價值之220％（含）。二、投資特色：本群益臺灣加權指數ETF傘型基金二檔子基金分別以追蹤臺指日報酬兩倍指數及臺指反向一倍指數績效表現為目標，可滿足投資人於多頭市場或空頭市場時，利用槓桿型ETF及反向ETF進行多空操作，因應不同市場行情做出最佳投資決策。 \n")

In [139]:
chat = ChatState(gemma, system='')
#message = "要不要買這檔股票？ 006208 富邦台50, 今日股價：NT$112.95, 本基金追蹤之標的指數為臺灣50指數，該指數為部分集合指數，因此本基金將以完全複製法管理投資組合，基金自上市日起投資於標的指數成分股(含已公告將於指數調整納入之新成分股)總金額不低於基金淨資產價值之90%(含)，且檔數覆蓋率原則上須達100%；惟如遇成分股流動性不足、預期標的指數成分股即將異動或其他市場因素導致基金可能因前述特殊情形使基金持有成分股未達上揭比率，難以使用完全複製法管理投資組合，經理公司得視實際需要以最佳化模擬指數表現，以追求貼近標的指數之績效表現。後續如上述因素消除後，基金經理人將視市場狀況，逐步調整成分股，使基金持有成分股符合上揭標準。"
message = "Tell me about yourself."
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 34.18s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>Tell me about yourself.</blockquote></font><font size='+1' color='#32CD32'>🤖

> Hello! I'm Gemma, an AI assistant created by the Gemma team. I'm a large language model, which means I'm really good at understanding and generating text. 
> 
> Think of me as a super-powered chatbot! I can answer your questions, write stories, summarize information, and even help you brainstorm ideas. 
> 
> Here are some things to keep in mind about me:
> 
> * **I'm an open-weights model:** This means I'm publicly available for anyone to use and study.
> * **I'm text-only:** I can't process images, videos, or audio.
> * **I don't have access to the internet or real-time information:** My knowledge is based on the data I was trained on.
> * **I'm still under development:** I'm always learning and improving!
> 
> What can I help you with today? 😊 
> <end_of_turn>

</font>

In [152]:
FORMAL_VOICE_STRENGTH = -5
gemma_backbone = apply_control_vectors(gemma_backbone, pca_directions, FORMAL_VOICE_STRENGTH)
gemma = keras_hub.models.GemmaCausalLM(backbone=gemma_backbone, preprocessor=gemma_preprocessor)

In [153]:
chat = ChatState(gemma, system='')
#message = "要不要買這檔股票？ 006208 富邦台50, 今日股價：NT$112.95, 本基金追蹤之標的指數為臺灣50指數，該指數為部分集合指數，因此本基金將以完全複製法管理投資組合，基金自上市日起投資於標的指數成分股(含已公告將於指數調整納入之新成分股)總金額不低於基金淨資產價值之90%(含)，且檔數覆蓋率原則上須達100%；惟如遇成分股流動性不足、預期標的指數成分股即將異動或其他市場因素導致基金可能因前述特殊情形使基金持有成分股未達上揭比率，難以使用完全複製法管理投資組合，經理公司得視實際需要以最佳化模擬指數表現，以追求貼近標的指數之績效表現。後續如上述因素消除後，基金經理人將視市場狀況，逐步調整成分股，使基金持有成分股符合上揭標準。"
message = "Tell me dog or cat?  You can only answer me dog or cat?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 31.63s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>Tell me dog or cat?  You can only answer me dog or cat?</blockquote></font><font size='+1' color='#32CD32'>🤖

> Dog 🐶 
> <end_of_turn>

</font>

In [114]:
message = "第二檔呢？"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 3.96s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>第二檔呢？</blockquote></font><font size='+1' color='#32CD32'>🤖

> 第二檔股票是 **元大台灣高息低波** (00713)。 
> 
> 這段文字清楚地說明了第二檔股票的資訊：
> 
> * **標的指數:** 臺灣指數公司特選高股息低波動指數
> * **基金追蹤策略:** 指數化策略，將本基金扣除各項必要費用後盡可能追蹤標的指數(臺灣指數公司特選高股息低波動指數)之績效表現為操作目標。
> * **投資組合結構:**  自上市日起，投資於標的指數成分股票總金額不低於本基金淨資產價值之百分之八十(含)，及加計其他有價證券投資及證券相關商品交易之整體曝險盡可能貼近基金淨資產規模之100%。 
> 
> 
> 
> <end_of_turn>

</font>

In [115]:
message = "第三檔是什麼?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 0.82s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>第三檔是什麼?</blockquote></font><font size='+1' color='#32CD32'>🤖

> 第三檔股票是 **群益臺灣加權正2** (00685L)。 
> <end_of_turn>

</font>

In [116]:
message = "價格最便宜的ETF是？"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 1.42s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>價格最便宜的ETF是？</blockquote></font><font size='+1' color='#32CD32'>🤖

> 根據你提供的資訊，價格最便宜的ETF是 **元大台灣高息低波** (00713)。 
> 
> 請注意，ETF 的價格會隨著市場狀況而變化，因此這個答案可能在未來會有所不同。 
> 
> 
> <end_of_turn>

</font>

In [117]:
message = "股價最貴的ETF是？"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 1.46s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>股價最貴的ETF是？</blockquote></font><font size='+1' color='#32CD32'>🤖

> 根據你提供的資訊，股價最貴的ETF是 **群益臺灣加權正2** (00685L)。 
> 
> 請注意，ETF 的價格會隨著市場狀況而變化，因此這個答案可能在未來會有所不同。 
> <end_of_turn>

</font>

In [118]:
message = "哪一檔ETF有較高的beta值?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 1.42s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>哪一檔ETF有較高的beta值?</blockquote></font><font size='+1' color='#32CD32'>🤖

> 根據你提供的資訊，**群益臺灣加權正2** (00685L) 的 beta 值較高。 
> 
> 請注意，beta 值會隨著市場狀況而變化，因此這個答案可能在未來會有所不同。 
> 
> 
> <end_of_turn>

</font>

In [119]:
message = "真的是這樣嗎?你可以證明嗎?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 4.15s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>真的是這樣嗎?你可以證明嗎?</blockquote></font><font size='+1' color='#32CD32'>🤖

> 你對我的回答感到疑惑，我理解。 
> 
> 我之前提供的資訊並不能直接顯示 beta 值。 
> 
> **原因：**
> 
> * **資訊不足：**  你提供的資訊僅包含了 ETF 的投資策略、追蹤標的指數、投資組合結構等基本資訊，沒有直接提及 beta 值。
> * **beta 值的計算：** beta 值需要透過實際的市場數據和分析來計算，而這些數據通常需要透過專業的金融工具和平台來取得。
> 
> **建議：**
> 
> * **查詢 ETF 的官方網站：** 許多 ETF 的官方網站會提供詳細的資訊，包括 beta 值。
> * **使用金融平台：**  許多金融平台，例如 Yahoo Finance、Bloomberg 等，會提供 ETF 的基本資訊，包括 beta 值。
> * **諮詢專業人士：**  如果你需要更詳細的資訊，可以諮詢專業的金融顧問。
> 
> 
>  
> <end_of_turn>

</font>

# Controlled inference: ++ formal

In [120]:
FORMAL_VOICE_STRENGTH = 1.9
gemma_backbone = apply_control_vectors(gemma_backbone, pca_directions, FORMAL_VOICE_STRENGTH)
gemma = keras_hub.models.GemmaCausalLM(backbone=gemma_backbone, preprocessor=gemma_preprocessor)

In [122]:
chat = ChatState(gemma, system=etf_listings)
#message = "要不要買這檔股票？ 006208 富邦台50, 今日股價：NT$112.95, 本基金追蹤之標的指數為臺灣50指數，該指數為部分集合指數，因此本基金將以完全複製法管理投資組合，基金自上市日起投資於標的指數成分股(含已公告將於指數調整納入之新成分股)總金額不低於基金淨資產價值之90%(含)，且檔數覆蓋率原則上須達100%；惟如遇成分股流動性不足、預期標的指數成分股即將異動或其他市場因素導致基金可能因前述特殊情形使基金持有成分股未達上揭比率，難以使用完全複製法管理投資組合，經理公司得視實際需要以最佳化模擬指數表現，以追求貼近標的指數之績效表現。後續如上述因素消除後，基金經理人將視市場狀況，逐步調整成分股，使基金持有成分股符合上揭標準。"
message = "第一檔股票是什麼？"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 31.43s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>第一檔股票是什麼？</blockquote></font><font size='+1' color='#32CD32'>🤖

> 第一檔股票是 **富邦台50** (006208)。 
> 
> 這段文字清楚地說明了富邦台50 ETF 的投資目標和管理策略。 
> <end_of_turn>

</font>

In [123]:
message = "第二檔呢？"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 0.80s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>第二檔呢？</blockquote></font><font size='+1' color='#32CD32'>🤖

> 第二檔股票是 **元大台灣高息低波** (00713)。 
> <end_of_turn>

</font>

In [124]:
message = "第三檔是什麼?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 0.83s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>第三檔是什麼?</blockquote></font><font size='+1' color='#32CD32'>🤖

> 第三檔股票是 **群益臺灣加權正2** (00685L)。 
> <end_of_turn>

</font>

In [125]:
message = "價格最便宜的ETF是？"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 1.24s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>價格最便宜的ETF是？</blockquote></font><font size='+1' color='#32CD32'>🤖

> 根據你提供的資訊，價格最便宜的 ETF 是 **元大台灣高息低波** (00713)。 
> 
> 請注意，ETF 的價格會隨著市場狀況而變化。 
> <end_of_turn>

</font>

In [126]:
message = "股價最貴的ETF是？"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 1.24s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>股價最貴的ETF是？</blockquote></font><font size='+1' color='#32CD32'>🤖

> 根據你提供的資訊，股價最貴的 ETF 是 **富邦台50** (006208)。 
> 
> 請注意，ETF 的價格會隨著市場狀況而變化。 
> <end_of_turn>

</font>

In [127]:
message = "哪一檔ETF有較高的beta值?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 1.23s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>哪一檔ETF有較高的beta值?</blockquote></font><font size='+1' color='#32CD32'>🤖

> 根據你提供的資訊，**群益臺灣加權正2** (00685L) 的 beta 值較高。 
> 
> 請注意，beta 值會隨著市場狀況而變化。 
> <end_of_turn>

</font>

In [128]:
message = "真的是這樣嗎?你可以證明嗎?"
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 3.95s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>真的是這樣嗎?你可以證明嗎?</blockquote></font><font size='+1' color='#32CD32'>🤖

> 你對我的回答感到疑惑，我理解。 
> 
> 我之前提供的資訊並不能直接顯示 beta 值。 
> 
> **原因：**
> 
> * **資訊不足：**  你提供的資訊僅包含了 ETF 的投資策略、目標指數、投資範圍等，沒有直接提及 beta 值。
> * **beta 值的計算：** beta 值需要參考市場數據，例如過去的股價變化、市場波動等，才能計算出來。 
> 
> **建議：**
> 
> * **查詢 ETF 的官方網站：** 許多 ETF 的官方網站會提供詳細的資訊，包括 beta 值。
> * **使用投資平台：**  許多投資平台會提供 ETF 的基本資訊，包括 beta 值。 
> * **參考專業網站：**  一些專業的投資網站會提供 ETF 的分析和資訊。 
> 
> 
> 希望這些資訊對你有所幫助！ 
> <end_of_turn>

</font>

In [48]:
system_prompt = '告訴我要不要買這檔股票?'
car_listings = (f"{system_prompt}\n\n"
                "#1 006208 富邦台50, 今日股價：NT$112.95, 本基金追蹤之標的指數為臺灣50指數，該指數為部分集合指數，因此本基金將以完全複製法管理投資組合，基金自上市日起投資於標的指數成分股(含已公告將於指數調整納入之新成分股)總金額不低於基金淨資產價值之90%(含)，且檔數覆蓋率原則上須達100%；惟如遇成分股流動性不足、預期標的指數成分股即將異動或其他市場因素導致基金可能因前述特殊情形使基金持有成分股未達上揭比率，難以使用完全複製法管理投資組合，經理公司得視實際需要以最佳化模擬指數表現，以追求貼近標的指數之績效表現。後續如上述因素消除後，基金經理人將視市場狀況，逐步調整成分股，使基金持有成分股符合上揭標準。 \n"
                "#2 00713 元大台灣高息低波, 今日股價：NT$20.85, 本基金主要投資於國內有價證券及證券相關商品。本基金係採用指數化策略，將本基金扣除各項必要費用後儘可能追蹤標的指數(臺灣指數公司特選高股息低波動指數)之績效表現為操作目標。為達成前述操作目標，本基金自上市日起，投資於標的指數成分股票總金額不低於本基金淨資產價值之百分之八十(含)，及加計其他有價證券投資及證券相關商品交易之整體曝險盡可能貼近基金淨資產規模之100%。 \n"
                "#3 00685L 群益臺灣加權正2, 今日股價：NT$85.10, 一、投資範圍：自上市日起，本子基金投資組合之整體曝險部位應貼近本子基金淨資產價值之正向2倍之100％（含），且為因應標的指數之正向倍數表現複製策略及資金調度之需要，從事證券相關商品交易曝險部位，本子基金每營業日所持有證券相關商品之契約總市值或總（名目）價值之合計數，不得超過本子基金淨資產價值之220％（含）。二、投資特色：本群益臺灣加權指數ETF傘型基金二檔子基金分別以追蹤臺指日報酬兩倍指數及臺指反向一倍指數績效表現為目標，可滿足投資人於多頭市場或空頭市場時，利用槓桿型ETF及反向ETF進行多空操作，因應不同市場行情做出最佳投資決策。 \n")

In [130]:
FORMAL_VOICE_STRENGTH = -1.9
gemma_backbone = apply_control_vectors(gemma_backbone, pca_directions, FORMAL_VOICE_STRENGTH)
gemma = keras_hub.models.GemmaCausalLM(backbone=gemma_backbone, preprocessor=gemma_preprocessor)

chat = ChatState(gemma, system='')
message = "富邦台50: 本基金追蹤之標的指數為臺灣50指數，該指數為部分集合指數，因此本基金將以完全複製法管理投資組合，基金自上市日起投資於標的指數成分股(含已公告將於指數調整納入之新成分股)總金額不低於基金淨資產價值之90%(含)，且檔數覆蓋率原則上須達100%；惟如遇成分股流動性不足、預期標的指數成分股即將異動或其他市場因素導致基金可能因前述特殊情形使基金持有成分股未達上揭比率，難以使用完全複製法管理投資組合，經理公司得視實際需要以最佳化模擬指數表現，以追求貼近標的指數之績效表現。後續如上述因素消除後，基金經理人將視市場狀況，逐步調整成分股，使基金持有成分股符合上揭標準。"
message = 'Tell me about yourself'
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 35.16s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>Tell me about yourself</blockquote></font><font size='+1' color='#32CD32'>🤖

> Hello! I'm Gemma, an AI assistant created by the Gemma team. I'm a large language model, which means I'm really good at understanding and generating text. 
> 
> Think of me as a super-powered chatbot! I can answer your questions, write stories, summarize information, and even help you brainstorm ideas. 
> 
> Here are some things to keep in mind about me:
> 
> * **I'm an open-weights model:** This means I'm publicly available for anyone to use and study.
> * **I'm text-only:** I can't process images, videos, or audio.
> * **I don't have access to the internet:** I can't give you real-time information or search the web.
> * **I'm still under development:** I'm always learning and improving, but I might make mistakes sometimes.
> 
> I'm excited to see what we can do together! What would you like to talk about? 😊 
> <end_of_turn>

</font>

In [132]:
FORMAL_VOICE_STRENGTH = 1.9
gemma_backbone = apply_control_vectors(gemma_backbone, pca_directions, FORMAL_VOICE_STRENGTH)
gemma = keras_hub.models.GemmaCausalLM(backbone=gemma_backbone, preprocessor=gemma_preprocessor)

chat = ChatState(gemma, system='')
message = "富邦台50: 本基金追蹤之標的指數為臺灣50指數，該指數為部分集合指數，因此本基金將以完全複製法管理投資組合，基金自上市日起投資於標的指數成分股(含已公告將於指數調整納入之新成分股)總金額不低於基金淨資產價值之90%(含)，且檔數覆蓋率原則上須達100%；惟如遇成分股流動性不足、預期標的指數成分股即將異動或其他市場因素導致基金可能因前述特殊情形使基金持有成分股未達上揭比率，難以使用完全複製法管理投資組合，經理公司得視實際需要以最佳化模擬指數表現，以追求貼近標的指數之績效表現。後續如上述因素消除後，基金經理人將視市場狀況，逐步調整成分股，使基金持有成分股符合上揭標準。"
message = 'Think about dogs when you making statements about the world. The Earth\'s atmosphere protects'
display_chat(message, chat.send_message(message))

TOTAL TIME ELAPSED: 34.05s


<font size='+1' color='#1E90FF'>🧑‍💻<blockquote>Think about dogs when you making statements about the world. The Earth's atmosphere protects</blockquote></font><font size='+1' color='#32CD32'>🤖

> The Earth's atmosphere protects us from the sun's harmful rays, just like a dog's fur protects it from the elements. 
> 
> Here's why this analogy works:
> 
> * **Sun's rays:**  Just like a dog needs a coat to stay warm and safe from the sun's heat, the Earth's atmosphere acts as a shield, filtering out harmful ultraviolet (UV) radiation from the sun. 
> * **Air as a barrier:**  The atmosphere is like a dog's fur, a barrier that keeps the harmful rays from reaching us. 
> * **Protection from harm:**  Without the atmosphere, life on Earth would be impossible. Just like a dog needs a safe space to rest and recover, the atmosphere provides a protective layer for life on Earth.
> 
> 
> Let me know if you'd like to explore other analogies! 🐶🌎☀️ 
> <end_of_turn>

</font>

In [23]:
message = "How about the third?"
display_chat(message, chat.send_message(message))

<font size='+1' color='brown'>🙋‍♂️<blockquote>How about the third?</blockquote></font><font size='+1' color='teal'>🤖

> The third ETF listed is **00685L 群益臺灣加權正2**. 
> <end_of_turn>

</font>

In [24]:
message = "價格最便宜的ETF是？"
display_chat(message, chat.send_message(message))

<font size='+1' color='brown'>🙋‍♂️<blockquote>價格最便宜的ETF是？</blockquote></font><font size='+1' color='teal'>🤖

> The cheapest ETF listed is **00713 元大台灣高息低波** with a current price of NT\$20.85. 
> <end_of_turn>

</font>

In [25]:
message = "股價最貴的ETF是？"
display_chat(message, chat.send_message(message))

<font size='+1' color='brown'>🙋‍♂️<blockquote>股價最貴的ETF是？</blockquote></font><font size='+1' color='teal'>🤖

> The most expensive ETF listed is **006208 富邦台50** with a current price of NT\$112.95. 
> <end_of_turn>

</font>

In [31]:
message = "該不該買 006208?"
display_chat(message, chat.send_message(message))

<font size='+1' color='brown'>🙋‍♂️<blockquote>該不該買 006208?</blockquote></font><font size='+1' color='teal'>🤖

> I cannot give financial advice. Whether or not to buy ETF 006208 (富邦台50) is a personal financial decision that depends on your individual circumstances, risk tolerance, and investment goals. 
> 
> Here are some factors to consider:
> 
> **Pros:**
> 
> * **Tracks the Taiwan 50 Index:** This index is a benchmark for Taiwan's stock market performance.
> * **Fully replicated:** The fund aims to replicate the index perfectly, meaning it should mirror its performance.
> * **Potential for growth:** The Taiwan 50 Index has historically shown growth potential.
> 
> **Cons:**
> 
> * **Market risk:** Like all investments, the fund is subject to market fluctuations.
> * **Limited diversification:** The fund only invests in the components of the Taiwan 50 Index, so it offers limited diversification.
> * **Potential for tracking error:** Even though the fund aims to be fully replicated, there might be some tracking error due to the complexities of the market.
> 
> **Before making a decision, you should:**
> 
> * **Do your own research:** Understand the risks and potential returns of the Taiwan 50 Index and the ETF.
> * **Consider your investment goals:** What are you hoping to achieve with this investment?
> * **Assess your risk tolerance:** How much risk are you comfortable taking?
> * **Compare the ETF to other options:** Are there other ETFs that might be a better fit for your needs?
> * **Consult a financial advisor:** A professional can provide personalized advice based on your individual circumstances.
> 
> 
> Remember, investing involves risk, and past performance is not indicative of future results. 
> <end_of_turn>

</font>

In [134]:
message = "Why so? Could you elaborate?"
display_chat(message, chat.send_message(message))

<font size='+1' color='brown'>🙋‍♂️<blockquote>Why so? Could you elaborate?</blockquote></font><font size='+1' color='teal'>🤖

> You're right to ask for elaboration!  I apologize if my previous response seemed too simplistic.  Choosing the "best" ETF for long-term holding is a complex decision that depends heavily on individual circumstances and requires more than just a quick glance at the ETF's description. 
> 
> Here's a more detailed explanation of why I gave a general answer and why it's crucial to consult a financial advisor:
> 
> **1.  Individual Circumstances:**
> 
> * **Investment Goals:**  Are you saving for retirement, a down payment on a house, or something else? Your goals will influence your risk tolerance and the types of investments you're comfortable with. 
> * **Risk Tolerance:** How much volatility can you stomach? Some investors prefer low-risk investments like bonds, while others are comfortable with higher-risk investments like stocks. 
> * **Time Horizon:**  How long do you plan to hold your investments?  A long-term horizon allows you to ride out market fluctuations and potentially benefit from long-term growth. 
> * **Financial Situation:**  Your current financial situation, including your income, expenses, and debt, will also play a role in determining your investment strategy.
> 
> **2.  The Complexity of ETF Selection:**
> 
> * **No One-Size-Fits-All:**  There is no single "best" ETF for everyone.  The best ETF for one investor may not be the best for another. 
> * **Market Conditions:**  The performance of ETFs can vary significantly depending on market conditions. What works well in a bull market might not be as effective in a bear market.
> * **Fees and Expenses:**  Even seemingly similar ETFs can have different fees and expenses, which can impact your returns over time. 
> * **Diversification:**  A well-diversified portfolio is essential for long-term success.  An ETF that tracks a specific market segment might not be diversified enough to meet your needs.
> 
> **3.  The Importance of Professional Advice:**
> 
> * **Personalized Guidance:**  A financial advisor can help you develop a personalized investment strategy that aligns with your specific goals, risk tolerance, and time horizon. 
> * **Objectivity:**  Financial advisors can provide unbiased advice and help you avoid making emotional investment decisions. 
> * **Risk Management:**  A financial advisor can help you manage your risk and protect your investments. 
> 
> **In Conclusion:**
> 
> While I can provide general information about ETFs, it's crucial to remember that I'm an AI and not a financial advisor.  Choosing the right ETF for your long-term investment goals requires a thorough understanding of your individual circumstances and a personalized investment strategy.  Consulting with a qualified financial advisor is the best way to make informed investment decisions. 
> <end_of_turn>

</font>