In [3]:
# Test Canada model

from transformers import pipeline
import os

# --- !! IMPORTANT: SET THIS PATH !! ---
# Set this to the path of your *latest* checkpoint
# e.g., r'D:\market_data\text_data\CHECKPOINTS\CANADA-fin-roberta\checkpoint-4000'
YOUR_TRAINED_MODEL_PATH = r'D:\market_data\text_data\CHECKPOINTS\CANADA-fin-roberta\checkpoint-4020'

# --- Test Sentences ---
test_sentences = [
    # 1. A test of general knowledge (Did we break it?)
    "The capital of France is <mask>.",
    
    # 2. A test of Canadian financial knowledge (The key test)
    "The Toronto Stock Exchange is commonly known as the <mask>.",
    
    # 3. A test of financial context
    "For the quarter, our company's net <mask> was $50 million.",
    
    # 4. Another financial context test
    "We expect to see high <mask> in the next quarter.",
    
    # 5. NEW: Test for Canadian-specific acronyms
    "All public filings are available on <mask>.",
    
    # 6. NEW: Test for Canadian-specific institutions
    "The <mask> Venture Exchange is home to many mining companies.",

    # 7. NEW: Test for Canadian-specific institutions
    "The Bank of <mask> sets the overnight interest rate."
]

# --- Script ---

def run_test():
    if YOUR_TRAINED_MODEL_PATH == r'PLEASE_SET_YOUR_LATEST_CHECKPOINT_PATH':
        print(f"ERROR: Please open '{__file__}' and set the 'YOUR_TRAINED_MODEL_PATH' variable.")
        return

    if not os.path.exists(YOUR_TRAINED_MODEL_PATH):
        print(f"ERROR: Path not found: {YOUR_TRAINED_MODEL_PATH}")
        print("Please make sure your checkpoint path is correct.")
        return

    print("Loading models... This may take a moment.")
    
    try:
        # Load the original, generic roberta-base
        base_model = pipeline('fill-mask', model='roberta-base')
        
        # Load your new, finetuned model from the checkpoint
        finetuned_model = pipeline('fill-mask', model=YOUR_TRAINED_MODEL_PATH)
        
        print("Models loaded. Running tests...\n")
        print("="*50)

        for sentence in test_sentences:
            print(f"\nPrompt: {sentence}")
            
            # Test Base Model
            print("\n--- 1. Base 'roberta-base' Predictions ---")
            base_results = base_model(sentence, top_k=5)
            for result in base_results:
                print(f"  Token: {result['token_str']}\t(Score: {result['score']:.4f})")
                
            # Test Finetuned Model
            print("\n--- 2. Finetuned Model Predictions ---")
            finetuned_results = finetuned_model(sentence, top_k=5)
            for result in finetuned_results:
                print(f"  Token: {result['token_str']}\t(Score: {result['score']:.4f})")
            
            print("="*50)

    except Exception as e:
        print(f"\nAn error occurred: {e}")
        print("This can happen if the model path is wrong or 'transformers' is not installed.")

if __name__ == "__main__":
    run_test()


Loading models... This may take a moment.


Device set to use cuda:0
Device set to use cuda:0


Models loaded. Running tests...


Prompt: The capital of France is <mask>.

--- 1. Base 'roberta-base' Predictions ---
  Token:  Paris	(Score: 0.9036)
  Token:  Lyon	(Score: 0.0803)
  Token:  Nice	(Score: 0.0048)
  Token:  Nancy	(Score: 0.0021)
  Token:  Napoleon	(Score: 0.0011)

--- 2. Finetuned Model Predictions ---
  Token:  Paris	(Score: 0.7924)
  Token:  Lyon	(Score: 0.1572)
  Token:  Nancy	(Score: 0.0190)
  Token:  Nice	(Score: 0.0139)
  Token:  Cannes	(Score: 0.0041)

Prompt: The Toronto Stock Exchange is commonly known as the <mask>.

--- 1. Base 'roberta-base' Predictions ---
  Token:  TTC	(Score: 0.4028)
  Token:  Exchange	(Score: 0.1565)
  Token:  Toronto	(Score: 0.1346)
  Token:  Index	(Score: 0.0300)
  Token:  index	(Score: 0.0235)

--- 2. Finetuned Model Predictions ---
  Token:  Exchange	(Score: 0.1965)
  Token:  "	(Score: 0.1597)
  Token:  Toronto	(Score: 0.0802)
  Token:  Company	(Score: 0.0487)
  Token:  TS	(Score: 0.0397)

Prompt: For the quarter, our company's net <

In [5]:
# Test Asia Pacific

from transformers import pipeline
import os

# --- !! IMPORTANT: SET THIS PATH !! ---
# Set this to the path of your *latest* checkpoint
# e.g., r'D:\market_data\text_data\CHECKPOINTS\CANADA-fin-roberta\checkpoint-4000'
YOUR_TRAINED_MODEL_PATH = r'D:\market_data\text_data\CHECKPOINTS\ASIA_PACIFIC-fin-roberta\checkpoint-9600'

# --- Test Sentences ---
test_sentences = [
    # 1. A test of general knowledge (Did we break it?)
    "The capital of Japan is <mask>.",
    
    # 2. Test for major exchanges
    "The <mask> is the benchmark index for the Tokyo Stock Exchange.",
    
    # 3. Test for major exchanges
    "The <mask> Seng Index is the main stock market index in Hong Kong.",

    # 4. Test for major exchanges
    "The Shanghai Stock <mask> is a major exchange in mainland China.",
    
    # 5. Test for financial context (currency)
    "The company's profit for the year was 10 billion <mask>.",
    
    # 6. Test for specific country knowledge
    "Many tech companies like Samsung are listed on the <mask>.",
    
    # 7. Test for regional context
    "The <mask> Exchange (SGX) is a key financial hub in Asia."
]

# --- Script ---

def run_test():
    if YOUR_TRAINED_MODEL_PATH == r'PLEASE_SET_YOUR_LATEST_CHECKPOINT_PATH':
        print(f"ERROR: Please open '{__file__}' and set the 'YOUR_TRAINED_MODEL_PATH' variable.")
        return

    if not os.path.exists(YOUR_TRAINED_MODEL_PATH):
        print(f"ERROR: Path not found: {YOUR_TRAINED_MODEL_PATH}")
        print("Please make sure your checkpoint path is correct.")
        return

    print("Loading models... This may take a moment.")
    
    try:
        # Load the original, generic roberta-base
        base_model = pipeline('fill-mask', model='roberta-base')
        
        # Load your new, finetuned model from the checkpoint
        finetuned_model = pipeline('fill-mask', model=YOUR_TRAINED_MODEL_PATH)
        
        print("Models loaded. Running tests...\n")
        print("="*50)

        for sentence in test_sentences:
            print(f"\nPrompt: {sentence}")
            
            # Test Base Model
            print("\n--- 1. Base 'roberta-base' Predictions ---")
            base_results = base_model(sentence, top_k=5)
            for result in base_results:
                print(f"  Token: {result['token_str']}\t(Score: {result['score']:.4f})")
                
            # Test Finetuned Model
            print("\n--- 2. Finetuned Model Predictions ---")
            finetuned_results = finetuned_model(sentence, top_k=5)
            for result in finetuned_results:
                print(f"  Token: {result['token_str']}\t(Score: {result['score']:.4f})")
            
            print("="*50)

    except Exception as e:
        print(f"\nAn error occurred: {e}")
        print("This can happen if the model path is wrong or 'transformers' is not installed.")

if __name__ == "__main__":
    run_test()


Loading models... This may take a moment.


Device set to use cuda:0
Device set to use cuda:0


Models loaded. Running tests...


Prompt: The capital of Japan is <mask>.

--- 1. Base 'roberta-base' Predictions ---
  Token:  Tokyo	(Score: 0.8616)
  Token:  Kyoto	(Score: 0.0970)
  Token:  Osaka	(Score: 0.0224)
  Token:  Hiroshima	(Score: 0.0109)
  Token:  Kobe	(Score: 0.0029)

--- 2. Finetuned Model Predictions ---
  Token:  Tokyo	(Score: 0.8783)
  Token:  Kyoto	(Score: 0.0331)
  Token:  yen	(Score: 0.0170)
  Token:  Osaka	(Score: 0.0149)
  Token:  Japan	(Score: 0.0085)

Prompt: The <mask> is the benchmark index for the Tokyo Stock Exchange.

--- 1. Base 'roberta-base' Predictions ---
  Token:  benchmark	(Score: 0.0847)
  Token:  yen	(Score: 0.0635)
  Token:  index	(Score: 0.0549)
  Token: IX	(Score: 0.0546)
  Token:  Index	(Score: 0.0491)

--- 2. Finetuned Model Predictions ---
  Token:  Index	(Score: 0.2599)
  Token:  index	(Score: 0.1615)
  Token:  above	(Score: 0.1109)
  Token:  yen	(Score: 0.0460)
  Token:  benchmark	(Score: 0.0438)

Prompt: The <mask> Seng Index is the main s