==============================可用程式區【HEAD】==============================

In [1]:
import tensorflow as tf
import bert
import numpy as np
import tflite_runtime.interpreter as tflite
import platform

#加速棒的code
EDGETPU_SHARED_LIB = {'Linux': 'libedgetpu.so.1' ,
                      'Darwin': 'libedgetpu.1.dylib',
                      'Windows': 'edgetpu.dll'}[platform.system()]
def make_interpreter(model_file):
    model_file , *device = model_file.split('@')
    return tflite.Interpreter(model_path = model_file ,
                              experimental_delegates = [tflite.load_delegate(EDGETPU_SHARED_LIB ,{'device': device[0]} if device else {})])

class MobileBERT:
	def __init__(self, tflite_path, tokenizer_file_path):
		self.max_length = 384 #要跟取用的模型吻合
		self.interpreter = make_interpreter(tflite_path)
		self.tokenizer = bert.bert_tokenization.FullTokenizer(tokenizer_file_path, True) #取用映射表作為tokenizer
		self.interpreter.allocate_tensors() #根據模型的要求，分配記憶體以供輸入和輸出張量使用。需要在執行推理之前，至少執行一次 allocate_tensors()。
		self.input_details = self.interpreter.get_input_details() #取得模型的input層資訊
		self.output_details = self.interpreter.get_output_details() #取得模型的output層資訊

	def get_summary(self):
		print("Inputs:",self.input_details,"\nOutputs:",self.output_details)

	def get_masks(self,tokens):
		if len(tokens)>self.max_length:
			raise IndexError("Token length more than max seq length!")
		return np.asarray([1]*len(tokens) + [0] * (self.max_length - len(tokens)))


	def get_segments(self,tokens):
		if len(tokens)>self.max_length:
			raise IndexError("Token length more than max seq length!")
		segments = []
		current_segment_id = 0
		for token in tokens:
			segments.append(current_segment_id)
			if token == "[SEP]":
				current_segment_id = 1
		return np.asarray(segments + [0] * (self.max_length - len(tokens)))


	def get_ids(self,tokens):
		token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
		input_ids = token_ids + [0] * (self.max_length-len(token_ids))
		return np.asarray(input_ids)

	def compile_text(self,text):
		text = text.lower().replace("-"," ")
		return ["[CLS]"] + self.tokenizer.tokenize(text) + ["[SEP]"]

	def run(self,query,context):
		stokens =  self.compile_text(query) + self.compile_text(context)

		if len(stokens)>self.max_length:
			raise IndexError("Token length more than max seq length!")
			print("Max exceeded")
		input_ids = tf.dtypes.cast(self.get_ids(stokens),tf.int32)
		input_masks = tf.dtypes.cast(self.get_masks(stokens),tf.int32)
		input_segments = tf.dtypes.cast(self.get_segments(stokens),tf.int32)

		self.interpreter.set_tensor(self.input_details[0]['index'], [input_ids])
		self.interpreter.set_tensor(self.input_details[1]['index'], [input_masks])
		self.interpreter.set_tensor(self.input_details[2]['index'], [input_segments])

		with tf.device('/CPU:0'):
			self.interpreter.invoke() #運行推理

		end_logits = self.interpreter.get_tensor(self.output_details[0]['index'])
		start_logits = self.interpreter.get_tensor(self.output_details[1]['index'])

		end = tf.argmax(end_logits,output_type=tf.dtypes.int32).numpy()[0]
		start = tf.argmax(start_logits,output_type=tf.dtypes.int32).numpy()[0]

		answers = " ".join(stokens[start:end+1]).replace("[CLS]","").replace("[SEP]","").replace(" ##","")
		return answers


2023-08-30 10:17:20.272762: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-08-30 10:17:20.273008: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


ModuleNotFoundError: No module named 'tflite_runtime'

In [9]:
m = MobileBERT('lite-model_mobilebert_1_metadata_1.tflite','vocab.txt') #tflite模型和tokenizer映射檔
answer = m.run(
"The Apollo program, also known as Project Apollo, was the third United States human spaceflight program carried out by NASA, which succeeded in landing the first humans on the Moon from 1969 to 1972.",
"What was the goal of the Apollo program?"
)
print(answer)
print("***運作到這邊還沒東西就是真的沒答案啦***")


***運作到這邊還沒東西就是真的沒答案啦***


==============================可用程式區【END】==============================

底下為測試用程式

In [1]:
from transformers import TFDistilBertForQuestionAnswering
import tensorflow as tf

desired_model = "distilbert-base-uncased" #要調用的模型
model = TFDistilBertForQuestionAnswering.from_pretrained(desired_model)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, 
                                       tf.lite.OpsSet.SELECT_TF_OPS]
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()
with open('./mobileBERT2.tflite', 'wb') as f:
    f.write(tflite_model)

  from .autonotebook import tqdm as notebook_tqdm
2023-08-28 17:02:38.421574: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-08-28 17:02:38.422443: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-08-28 17:04:13.039430: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-08-28 17:04:13.147858: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2023-08-28 17:04:17.347857: E tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:927] could not open file to read NUMA node: /sys/bus/pci/devices/0000:02:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-28 17:04:17.354670: I tensorflow/core/common_runtime



The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method


The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
T

INFO:tensorflow:Assets written to: /tmp/tmpjgilqc07/assets


INFO:tensorflow:Assets written to: /tmp/tmpjgilqc07/assets
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
2023-08-28 17:09:17.875560: E tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:927] could not open file to read NUMA node: /sys/bus/pci/devices/0000:02:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-28 17:09:17.893253: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2023-08-28 17:09:17.919728: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2023-08-28 17:09:17.986759: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not s

: 

: 

In [2]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, 
                                       tf.lite.OpsSet.SELECT_TF_OPS]
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()
with open('./mobileBERT2.tflite', 'wb') as f:
    f.write(tflite_model)

The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
2

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method


The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
T

: 

: 

In [1]:
from transformers import AutoTokenizer, TFMobileBertForQuestionAnswering
import tensorflow as tf

desired_model = "vumichien/mobilebert-uncased-squad-v2" #要調用的模型
tokenizer = AutoTokenizer.from_pretrained(desired_model)
model = TFMobileBertForQuestionAnswering.from_pretrained(desired_model)

question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"

inputs = tokenizer(question, text, return_tensors="tf")
outputs = model(**inputs)

answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])

predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
tokenizer.decode(predict_answer_tokens)

  from .autonotebook import tqdm as notebook_tqdm
2023-08-27 00:15:39.901742: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-08-27 00:15:39.901888: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-08-27 00:16:03.534719: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-08-27 00:16:03.671505: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2023-08-27 00:16:05.482667: E tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:927] could not open file to read NUMA node: /sys/bus/pci/devices/0000:02:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-27 00:16:05.482945: I tensorflow/core/common_runtime

'a nice puppet'

In [None]:
import numpy as np
import tensorflow as tf
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("vumichien/mobilebert-uncased-squad-v2")
tflite_model_path = './lite-model_mobilebert_1_metadata_1.tflite'
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

context = "The Apollo program, also known as Project Apollo, was the third United States human spaceflight program carried out by NASA, which succeeded in landing the first humans on the Moon from 1969 to 1972."
question = "What was the goal of the Apollo program?"

# 使用分詞器將文章和問題轉換成模型所需的輸入格式
inputs = tokenizer(question, context, return_tensors="tf")

input_ids  = np.array(inputs['input_ids'], dtype=np.int32)
input_mask = np.array(inputs['token_type_ids'], dtype=np.int32)
segment_ids  = np.array(inputs['attention_mask'], dtype=np.int32)

interpreter.set_tensor(input_details[0]["index"], input_ids)
interpreter.set_tensor(input_details[1]["index"], input_mask)
interpreter.set_tensor(input_details[2]["index"], segment_ids)
interpreter.invoke()

end_logits = interpreter.get_tensor(output_details[0]["index"])[0]
start_logits = interpreter.get_tensor(output_details[1]["index"])[0]

predict_answer_tokens = inputs.input_ids[0, start_logits : end_logits + 1]
tokenizer.decode(predict_answer_tokens)


In [None]:
tokenizer = AutoTokenizer.from_pretrained("mobilebert/uncased_L-24_H-128_B-512_A-4_F-4_OPT")
tokenizer(question, context, return_tensors="tf")

In [6]:
tokenizer.encode_plus(question, context, return_tensors="tf")

{'input_ids': <tf.Tensor: shape=(1, 51), dtype=int32, numpy=
array([[  101,  2054,  2001,  1996,  3125,  1997,  1996,  9348,  2565,
         1029,   102,  1996,  9348,  2565,  1010,  2036,  2124,  2004,
         2622,  9348,  1010,  2001,  1996,  2353,  2142,  2163,  2529,
         2686, 28968,  2565,  3344,  2041,  2011,  9274,  1010,  2029,
         4594,  1999,  4899,  1996,  2034,  4286,  2006,  1996,  4231,
         2013,  3440,  2000,  3285,  1012,   102]], dtype=int32)>, 'token_type_ids': <tf.Tensor: shape=(1, 51), dtype=int32, numpy=
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(1, 51), dtype=int32, numpy=
array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1]], dtype=int32)>}

In [None]:
def compile_text(text):
		text = text.lower().replace("-"," ")
		return ["[CLS]"] + tokenizer.tokenize(text) + ["[SEP]"]

def run(query,context):
	interpreter = tf.lite.Interpreter(tflite_model_path)
    input_details = self.input_details
    output_details = self.output_details

    input_ids = np.array(input_ids, dtype=np.int32)
    input_mask = np.array(input_mask, dtype=np.int32)
    segment_ids = np.array(segment_ids, dtype=np.int32)

    interpreter.set_tensor(input_details[0]["index"], input_ids)
    interpreter.set_tensor(input_details[1]["index"], input_mask)
    interpreter.set_tensor(input_details[2]["index"], segment_ids)
    interpreter.invoke()

    end_logits = interpreter.get_tensor(output_details[0]["index"])[0]
    start_logits = interpreter.get_tensor(output_details[1]["index"])[0]
    return start_logits, end_logits


In [1]:
import tensorflow as tf
from transformers import TFMobileBertForQuestionAnswering, MobileBertTokenizer

# 載入分詞器和模型
tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased')
model = TFMobileBertForQuestionAnswering.from_pretrained('google/mobilebert-uncased')

# 輸入的文章和問題
context = "The Apollo program, also known as Project Apollo, was the third United States human spaceflight program carried out by NASA, which succeeded in landing the first humans on the Moon from 1969 to 1972."
question = "What was the goal of the Apollo program?"

# 使用分詞器將文章和問題轉換成模型所需的輸入格式
inputs = tokenizer(question, context, return_tensors="tf")

outputs = model(**inputs)

answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])

predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
tokenizer.decode(predict_answer_tokens)


2023-08-25 10:17:39.680853: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-08-25 10:17:39.681018: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
  from .autonotebook import tqdm as notebook_tqdm
2023-08-25 10:17:57.987749: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-08-25 10:17:58.010242: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2023-08-25 10:17:59.701734: E tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:927] could not open file to read NUMA node: /sys/bus/pci/devices/0000:02:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-25 10:17:59.701829: I tensorflow/core/common_runtime

NameError: name 'text' is not defined

In [3]:
# 使用分詞器將文章和問題轉換成模型所需的輸入格式
inputs = tokenizer(question, context, return_tensors="tf")

outputs = model(**inputs)

answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])

predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
tokenizer.decode(predict_answer_tokens)

''

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, 
                                       tf.lite.OpsSet.SELECT_TF_OPS]
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()
with open('./mobileBERT.tflite', 'wb') as f:
    f.write(tflite_model)

In [None]:
import tensorflow as tf
import numpy as np
from transformers import MobileBertTokenizer
import platform
# import tflite_runtime.interpreter as tflite

# tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased')
# tflite_model_path = "./mobileBERT.tflite"

# EDGETPU_SHARED_LIB = {'Linux': 'libedgetpu.so.1' ,
#                       'Darwin': 'libedgetpu.1.dylib',
#                       'Windows': 'edgetpu.dll'}[platform.system()]

# def make_interpreter(model_file):
#     model_file , *device = model_file.split('@')
#     return tflite.Interpreter(model_path = model_file ,
#                               experimental_delegates = [tflite.load_delegate(EDGETPU_SHARED_LIB ,
#                                                                              {'device': device[0]} if device else {})])
# interpreter = make_interpreter(tflite_model_path)
tflite_model_path = './lite-model_mobilebert_1_metadata_1.tflite'
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

# Input context and question
context = "The Apollo program, also known as Project Apollo, was the third United States human spaceflight program carried out by NASA, which succeeded in landing the first humans on the Moon from 1969 to 1972."
question = "What was the goal of the Apollo program?"

# Use tokenizer to convert context and question to model input format
tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased')
inputs = tokenizer(question, context, return_tensors="tf")

input_ids = np.array(inputs['input_ids'], dtype=np.int32)

# input_ids = input_ids[:, :5]  

# Run inference
input_details = interpreter.get_input_details()
interpreter.set_tensor(input_details[0]['index'], input_ids)
interpreter.invoke()

# Get prediction results
start_logits = interpreter.get_tensor(interpreter.get_output_details()[0]['index'])
end_logits = interpreter.get_tensor(interpreter.get_output_details()[1]['index'])

# Get the answer from the prediction
start_index = np.argmax(start_logits)
end_index = np.argmax(end_logits) + 1  # Ending index needs to be incremented by 1

answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[0][start_index:end_index]))

print("Answer:", answer)