**به نام خدا**

### **imports and inputs**

In [None]:
import os
import scipy.io.wavfile
from time import strftime
from transformers import VitsModel, AutoTokenizer

from src.utils.preprocess import CropAndExtract
from src.utils.init_path import init_path
from src.facerender.animate import AnimateFromCoeff
from src.generate_facerender_batch import get_facerender_data
from src.test_audio2coeff import Audio2Coeff
from src.generate_batch import get_data


import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
text = "سلام، من محمد عرفان هستم، من می‌خواهم پروژه درسی ر وتحویل بدم، این متن هم فکر میکنم ب اندازه کافی طولانی باشه برای این که نشون بده درست خروجی گرفتم یا نه. شایدم یه خورده بیشتر بنویسم بد نباشه"
source_image = "mine\\boy.jpg"   

result_dir = "./results"              

## **step 1**

In [None]:

preprocess_mode = "crop"             
checkpoint_dir = "./checkpoints"      
img_size = 256                       

save_dir = os.path.join(result_dir, strftime("%Y_%m_%d_%H.%M.%S"))
os.makedirs(save_dir, exist_ok=True)



sadtalker_paths = init_path(checkpoint_dir, os.path.join(".", "src/config"), img_size, False, preprocess_mode)
preprocess_model = CropAndExtract(sadtalker_paths, device)


first_frame_dir = os.path.join(save_dir, "first_frame_dir")
os.makedirs(first_frame_dir, exist_ok=True)

print("Extracting 3DMM from the source image...")
first_coeff_path, crop_pic_path, crop_info = preprocess_model.generate(
    source_image,
    first_frame_dir,
    preprocess_mode,
    source_image_flag=True,
    pic_size=img_size
)

if first_coeff_path is None:
    raise ValueError("Failed to extract coefficients from the source image.")
    
print("Extraction complete. Coefficients saved at:", first_coeff_path)


using safetensor as default
Extracting 3DMM from the source image...


landmark Det:: 100%|██████████| 1/1 [00:00<00:00, 19.68it/s]
3DMM Extraction In Video:: 100%|██████████| 1/1 [00:00<00:00, 103.02it/s]

Extraction complete. Coefficients saved at: ./results\2025_02_11_10.44.50\first_frame_dir\boy.mat





## **step 2**

In [None]:
model_name = "facebook/mms-tts-fas"

model = VitsModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
    waveform = model(**inputs).waveform
scipy.io.wavfile.write(result_dir+"output.wav", 
                    rate=model.config.sampling_rate, 
                    data=waveform[0].numpy())

## **step 3**

In [None]:


audio_path = "output.wav"
ref_eyeblink = None
ref_pose = None


batch = get_data(first_coeff_path, audio_path, device, ref_eyeblink, still=False)
audio_to_coeff = Audio2Coeff(sadtalker_paths, device)

print("Converting audio to motion coefficients...")
coeff_path = audio_to_coeff.generate(batch, save_dir, pose_style=0, ref_pose_coeff_path=None)
print("Audio-to-coefficient conversion complete. Coefficients saved at:", coeff_path)


mel:: 100%|██████████| 149/149 [00:00<00:00, 74496.52it/s]


Converting audio to motion coefficients...


audio2exp:: 100%|██████████| 15/15 [00:00<00:00, 447.73it/s]

Audio-to-coefficient conversion complete. Coefficients saved at: ./results\2025_02_11_10.44.50\boy##output.mat





## step 4

In [None]:
batch_size = 2
input_yaw = None    
input_pitch = None  
input_roll = None   
expression_scale = 1.0


data = get_facerender_data(
    coeff_path,         
    crop_pic_path,      
    first_coeff_path,   
    audio_path,         
    batch_size,
    input_yaw,
    input_pitch,
    input_roll,
    expression_scale=expression_scale,
    still_mode=False,
    preprocess=preprocess_mode,
    size=img_size
)


animate_from_coeff = AnimateFromCoeff(sadtalker_paths, device)
print("Rendering the animated video...")
result_video_path = animate_from_coeff.generate(
    data,
    save_dir,
    source_image,
    crop_info,
    enhancer="gfpgan",         
    background_enhancer=None,    
    preprocess=preprocess_mode,
    img_size=img_size
)


import shutil
final_video = save_dir + ".mp4"
shutil.move(result_video_path, final_video)
print("The generated video is saved as:", final_video)