### meta-llama/Meta-Llama-3-70B

In [None]:
# from transformers import pipeline
# 
# pipe_70B = pipeline("text-generation", model="meta-llama/Meta-Llama-3-70B")  # Load model directly
# from transformers import AutoTokenizer, AutoModelForCausalLM
# 
# tokenizer_70B = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-70B")
# model_70B = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-70B")

In [None]:
# from transformers import pipeline
# 
# model="meta-llama/CodeLlama-7b-Python-hf"
# pipe = pipeline("text-generation", model=model)  # Load model directly
# from transformers import AutoTokenizer, AutoModelForCausalLM
# 
# tokenizer = AutoTokenizer.from_pretrained(model)
# model = AutoModelForCausalLM.from_pretrained(model)

In [7]:
!pip install transformers torch bitsandbytes

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting bitsandbytes
  Downloading bitsandbytes-0.42.0-py3-none-any.whl.metadata (9.9 kB)
Downloading bitsandbytes-0.42.0-py3-none-any.whl (105.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.42.0


In [None]:
# huggingface-cli download meta-llama/Meta-Llama-3-8B --include "original/*" --local-dir Meta-Llama-3-8B


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gc

print(f"PyTorch version: {torch.__version__}")
print("Using CPU for inference")

def setup_model(model_name):
    try:
        # トークナイザーの初期化
        tokenizer = AutoTokenizer.from_pretrained(model_name)

        # モデルの初期化（量子化なし）
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            # use_auth_token=token,
        )

        # pipelineの作成
        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            device=-1,  # CPUを使用
        )

        return pipe, tokenizer, model
    except Exception as e:
        print(f"モデルのセットアップ中にエラーが発生しました: {e}")
        raise

def generate_code(pipe, prompt, max_new_tokens=200):
    try:
        full_prompt = f"Human: Write Python code for the following task. Provide detailed comments and error handling where appropriate: {prompt}\n\nAssistant: Certainly! Here's the Python code for the task you described, including detailed comments and error handling:\n\n```python\n"
        generated_text = pipe(
            full_prompt, 
            max_new_tokens=max_new_tokens, 
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_p=0.95,
            repetition_penalty=1.1
        )
        return generated_text[0]['generated_text'].split("```python\n")[-1].split("```")[0].strip()
    except Exception as e:
        print(f"エラーが発生しました: {e}")
        return None

def main():
    # model_name = "meta-llama/CodeLlama-7b-Python-hf"
    model_name="meta-llama/Meta-Llama-3-8B"
    try:
        pipe, tokenizer, model = setup_model(model_name)
    except Exception as e:
        print(f"プログラムを終了します: {e}")
        return

    while True:
        prompt = input("Pythonコードの生成プロンプトを入力してください（終了するには 'quit' と入力）: ")
        if prompt.lower() == 'quit':
            break
        
        generated_code = generate_code(pipe, prompt)
        if generated_code:
            print("\n生成されたコード:")
            print(generated_code)
        print("\n")
        
        # メモリ解放
        gc.collect()

if __name__ == "__main__":
    main()
