In [None]:
import transformers
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM

class LlamaTextGenerator:
    def __init__(self, model_path):
        self.model_path = model_path
        self.tokenizer = None
        self.model = None
        self.pipeline = None

    def load_model(self):
        # トークナイザーの読み込み
        self.tokenizer = LlamaTokenizer.from_pretrained(self.model_path)

        # モデルの読み込み
        self.model = LlamaForCausalLM.from_pretrained(
            self.model_path,
            torch_dtype=torch.bfloat16,
            low_cpu_mem_usage=True,
            device_map="auto"
        )

        # パイプラインの作成
        self.pipeline = transformers.pipeline(
            "text-generation",
            model=self.model,
            tokenizer=self.tokenizer,
            torch_dtype=torch.bfloat16,
            device_map="auto"
        )

    def generate_text(self, prompt, max_length=50, num_return_sequences=1):
        if self.pipeline is None:
            raise ValueError("Model is not loaded. Please call load_model() first.")
        
        result = self.pipeline(prompt, max_length=max_length, num_return_sequences=num_return_sequences)
        return result[0]['generated_text']

def main():
    # モデルのパスを設定
    model_path = "./Meta-Llama-3-8B"

    # LlamaTextGeneratorのインスタンスを作成
    text_generator = LlamaTextGenerator(model_path)

    # モデルを読み込む
    text_generator.load_model()

    # テキスト生成の実行
    prompt = "Hey how are you doing today?"
    generated_text = text_generator.generate_text(prompt)

    print(generated_text)

if __name__ == "__main__":
    main()