# Setup

In [7]:
path_save_model = "..\\models"

In [None]:
# load all model


# Trainning GPT-2 to generate a story

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset
import os

  from .autonotebook import tqdm as notebook_tqdm


## Load model GPT-2

In [3]:
model_name = "openai-community/gpt2"
# model_name = "openai/gpt-oss-20b"

path_save_gpt2 = "..\\models\\gpt2"

# Load model & tokenizer
# if os.path.exists(os.path.join("..\\models", "config.json")):
#     print("Model found locally, loading offline...")
tokenizer = AutoTokenizer.from_pretrained(path_save_gpt2, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(path_save_gpt2, trust_remote_code=True)
# else:
#     print("Downloading model from Hugging Face...")
#     tokenizer = AutoTokenizer.from_pretrained(
#         model_name,
#         trust_remote_code=True,
#         cache_dir=path_save_gpt2
#     )
#     model = AutoModelForCausalLM.from_pretrained(
#         model_name,
#         trust_remote_code=True,
#         cache_dir=path_save_gpt2
#     )
#     print("Saving model for offline use...")
#     tokenizer.save_pretrained(path_save_gpt2)
#     model.save_pretrained(path_save_gpt2)

## Finetune with short story dataset

In [4]:
from datasets import load_dataset
short_stories_dataset = load_dataset("roneneldan/TinyStories",
                                     cache_dir="..\\data")

In [11]:
# tokenize dataset

def preprocess_batch(batch):
    all_text_times = batch["text"]
    trimmed_text_times = [x[:] for x in all_text_times]
    return tokenizer(
        trimmed_text_times,
        truncation=True,
        # max_length=tokenizer.model_max_length,
        # padding="max_length"  # nếu cần padding cho batch
        )

tokenized_dataset = short_stories_dataset.map(
    preprocess_batch,
    batched=True,
    batch_size=10,
    remove_columns=short_stories_dataset["train"].column_names,
)


Map: 100%|██████████| 2119719/2119719 [07:55<00:00, 4454.82 examples/s]
Map: 100%|██████████| 21990/21990 [00:05<00:00, 4230.46 examples/s]


In [None]:
tokenized_dataset.save_to_disk("..\\data\\tokenized_data_stories")

# from datasets import load_from_disk
# tokenized_dataset = load_from_disk("path/to/tokenized_dataset")

Saving the dataset (5/5 shards): 100%|██████████| 2119719/2119719 [00:04<00:00, 471497.47 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 21990/21990 [00:00<00:00, 546305.43 examples/s]


In [14]:
prompt = "story about a princess"
input = tokenizer(prompt, return_tensors="pt")

# outputs = model.generate(input.input_ids, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
outputs = model.generate(input.input_ids, max_new_tokens=100, do_sample=False, top_k=50, top_p=0.99)


output_string = tokenizer.batch_decode(outputs)
print(output_string)

The following generation flags are not valid and may be ignored: ['top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


['story about a princess who is a princess.\n\nThe story is about a princess who is a princess. The story is about a princess who is a princess. The story is about a princess who is a princess. The story is about a princess who is a princess. The story is about a princess who is a princess. The story is about a princess who is a princess. The story is about a princess who is a princess. The story is about a princess who is a princess. The story is about a']


# Load model suno bark-small

In [15]:
suno_path = "..\\models\\suno-bark-small"

In [21]:
from transformers import BarkModel
import os
import torch

device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = BarkModel.from_pretrained("suno/bark-small", trust_remote_code=True, cache_dir=suno_path)
model = model.to(device)

In [22]:
from transformers import AutoProcessor
processor = AutoProcessor.from_pretrained("suno/bark")

inputs = processor(prompt)
# generate speech
speech_output = model.generate(**inputs.to(device))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:10000 for open-end generation.


In [1]:
from transformers import BarkModel, AutoProcessor
import torch
import os

# ----------------------------
# Cấu hình
# ----------------------------
suno_path = "../models/suno_bark_small"
device = "cuda:0" if torch.cuda.is_available() else "cpu"

# ----------------------------
# Load model & processor
# ----------------------------
if os.path.exists(os.path.join(suno_path, "config.json")) and \
   os.path.exists(os.path.join(suno_path, "preprocessor_config.json")):
    print("🔹 Model & processor found locally. Loading offline...")
    model = BarkModel.from_pretrained(suno_path, trust_remote_code=True).to(device)
    processor = AutoProcessor.from_pretrained(suno_path, local_files_only=True)
else:
    print("⬇ Downloading model & processor from Hugging Face...")
    model = BarkModel.from_pretrained(
        "suno/bark-small",
        trust_remote_code=True,
        cache_dir=suno_path
    ).to(device)
    processor = AutoProcessor.from_pretrained(
        "suno/bark-small",
        cache_dir=suno_path
    )
    # Lưu lại để lần sau load offline
    model.save_pretrained(suno_path)
    processor.save_pretrained(suno_path)

# ----------------------------
# Đảm bảo pad token
# ----------------------------
if processor.tokenizer.pad_token is None:
    processor.tokenizer.pad_token = processor.tokenizer.eos_token

# ----------------------------
# Tạo input tensor
# ----------------------------
inputs = processor(
    prompt,
    return_tensors="pt",
    padding=True,
    truncation=True,
    max_length=processor.tokenizer.model_max_length,
    return_attention_mask=True
)

# Chuyển tất cả tensor sang GPU
inputs = {k: v.to(device) for k, v in inputs.items()}

# ----------------------------
# Generate speech
# ----------------------------
with torch.no_grad():
    speech_output = model.generate(**inputs)

print("✅ Speech generated, shape:", speech_output.shape)


  from .autonotebook import tqdm as notebook_tqdm


⬇ Downloading model & processor from Hugging Face...


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


KeyboardInterrupt: 

In [23]:
from IPython.display import Audio
sampling_rate = model.generation_config.sample_rate
Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)

In [None]:
import scipy
scipy.io.wavfile.write("bark_out.wav", rate=sampling_rate, data=speech_output[0].cpu().numpy())

## Another voice

In [None]:
voice_preset = "v2/en_speaker_6"

# prepare the inputs
inputs = processor(text_prompt, voice_preset=voice_preset)

# generate speech
speech_output = model.generate(**inputs.to(device))

# let's hear it
Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)

In [None]:
voice_preset = "v2/en_speaker_3"

# prepare the inputs
inputs = processor(text_prompt, voice_preset=voice_preset)

# generate speech
speech_output = model.generate(**inputs.to(device))

# let's hear it
Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)

In [None]:
speech_output = model.generate(**inputs, num_beams = 4, temperature = 0.5, semantic_temperature = 0.8)

Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)

# Multilingual speech - simplified Chinese
inputs = processor("惊人的！我会说中文")

# generate speech
speech_output = model.generate(**inputs.to(device))

Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)

In [None]:
# Multilingual speech - French - let's use a voice_preset as well
inputs = processor("Je peux générer du son facilement avec ce modèle.", voice_preset="fr_speaker_3")

# generate speech
speech_output = model.generate(**inputs.to(device))

Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)

# Adding non-speech cues to the input text
inputs = processor("[clears throat] Hello uh ..., my dog is cute [laughter]")


speech_output = model.generate(**inputs.to(device))

Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)

In [None]:
# more advanced prompts!

text_prompt = """
    WOMAN: I would like an oatmilk latte please.
    MAN: Wow, that's expensive!
"""

inputs = processor(text_prompt)

# generate speech
speech_output = model.generate(**inputs.to(device))

Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)

# Load stable-difffusion-v1-5

In [5]:
stablediffusion_path = "..\\models\\stabled-diffusion"

In [4]:
import torch
from diffusers import StableDiffusionPipeline

model_id = "runwayml/stable-diffusion-v1-5"
local_model_path = stablediffusion_path

# Lần 1: tải model đầy đủ và lưu thành thư mục riêng
pipe = StableDiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16
)
pipe.save_pretrained(local_model_path)  # lưu nguyên cấu trúc model vào thư mục

# Lần 2 trở đi: load offline
pipe = StableDiffusionPipeline.from_pretrained(
    local_model_path,
    torch_dtype=torch.float16
).to("cuda")

Loading pipeline components...: 100%|██████████| 7/7 [00:03<00:00,  2.20it/s]
Loading pipeline components...: 100%|██████████| 7/7 [00:00<00:00, 15.86it/s]


In [6]:
import torch
from diffusers import StableDiffusionPipeline

model_id = "runwayml/stable-diffusion-v1-5"
local_model_path = stablediffusion_path

# Lần 1: tải model đầy đủ và lưu thành thư mục riêng
pipe = StableDiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16
)
pipe.save_pretrained(local_model_path)  # lưu nguyên cấu trúc model vào thư mục

# Lần 2 trở đi: load offline
pipe = StableDiffusionPipeline.from_pretrained(
    local_model_path,
    torch_dtype=torch.float16
).to("cuda")

Loading pipeline components...: 100%|██████████| 7/7 [00:02<00:00,  2.34it/s]
Loading pipeline components...: 100%|██████████| 7/7 [00:00<00:00, 17.16it/s]


In [7]:
prompt = "King of owl"
image = pipe(prompt).images[0]
image.save("owl.png")

100%|██████████| 50/50 [00:06<00:00,  7.90it/s]
