# Test script for your environment

This jupyter notebook helps to test the environment. It is a basic "Hello World" equivalent for the HuggingFace. 

It imports the right libraries, downloads the right model and runs a simple test.

When you run this script from Visual Studio code, make sure that you use the right interpreter -- the one that is in your virtual environment. You can add it via the command palette `(Ctrl+Shift+P)` and then `Python: Select Interpreter` -- select the one that is in your virtual environment (or navigate to the right path to add it -- do NOT create a new virutal environment. )

In [1]:
# importing os to ensure that we do not get too many unrelated warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
os.environ['TOKENIZERS_PARALLELISM'] = 'true'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
os.environ['PYDEVD_DISABLE_FILE_VALIDATION'] = '1'

# stop warnings
import warnings
warnings.filterwarnings("ignore")

In [14]:
# this code checks if the transformers library is installed
# the output should be the version of the transformers library
import transformers

print(transformers.__version__)

4.38.2


In [15]:
# the same for the torch library
import torch

print(torch.__version__)

2.2.1+cu121


In [16]:
# and for the tensorflow library
import tensorflow as tf

print(tf.__version__)

2.15.0


In [3]:
# this tests a simple RoBERTa model trained by our research group on Singletons
from transformers import pipeline

unmasker = pipeline('fill-mask', model='mstaron/SingletonBERT')

unmasker("Hello I'm a <mask> model.")

[{'score': 0.36955469846725464,
  'token': 13963,
  'token_str': ' sliding',
  'sequence': "Hello I'm a sliding model."},
 {'score': 0.1200207844376564,
  'token': 11430,
  'token_str': ' working',
  'sequence': "Hello I'm a working model."},
 {'score': 0.07376236468553543,
  'token': 1464,
  'token_str': ' new',
  'sequence': "Hello I'm a new model."},
 {'score': 0.06340111047029495,
  'token': 1258,
  'token_str': ' into',
  'sequence': "Hello I'm a into model."},
 {'score': 0.03293626010417938,
  'token': 3596,
  'token_str': ' projective',
  'sequence': "Hello I'm a projective model."}]

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-coder-6.7b-instruct", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/deepseek-coder-6.7b-instruct", trust_remote_code=True, torch_dtype=torch.bfloat16)
messages=[
    { 'role': 'user', 'content': "write a quick sort algorithm in python."}
]
inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
# tokenizer.eos_token_id is the id of <|EOT|> token
outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, top_k=50, top_p=0.95, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
print(tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True))
