<a href="https://colab.research.google.com/github/pedrogengo/3D_avatar_generator/blob/main/notebooks/Create_Your_own_3D_Avatar.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setup

In [None]:
!git clone https://github.com/ritwikraha/TripoSR.git

In [None]:
import sys
sys.path.append('/content/TripoSR/tsr')

In [None]:
%cd TripoSR

In [None]:
!pip install -r requirements.txt -q
!pip install -U diffusers accelerate -qq

In [None]:
# Adjust device based on CUDA availability
device = "cuda" if torch.cuda.is_available() else "cpu"

## Imports

In [None]:
import torch
from diffusers import AutoPipelineForText2Image, DDIMScheduler
from transformers import CLIPVisionModelWithProjection
from diffusers.utils import load_image
import os
import time
from PIL import Image
import numpy as np
from IPython.display import Video
from tsr.system import TSR
from tsr.utils import remove_background, resize_foreground, save_video
import rembg

## Load the Pipelines for Image Preprocessing


In [None]:
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
    "h94/IP-Adapter",
    subfolder="models/image_encoder",
    torch_dtype=torch.float16,
).to(device)

In [None]:
pipeline = AutoPipelineForText2Image.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
    image_encoder=image_encoder,
).to(device)
pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
pipeline.load_ip_adapter(
  "h94/IP-Adapter",
  subfolder="sdxl_models",
  weight_name=["ip-adapter-plus_sdxl_vit-h.safetensors", "ip-adapter-plus-face_sdxl_vit-h.safetensors"]
)
pipeline.set_ip_adapter_scale([0.7, 0.3])
pipeline.enable_model_cpu_offload()

## Timer Class for Utility

In [None]:
# Define Timer class
class Timer:
    def __init__(self):
        self.items = {}
        self.time_scale = 1000.0  # ms
        self.time_unit = "ms"

    def start(self, name: str) -> None:
        if torch.cuda.is_available():
            torch.cuda.synchronize()
        self.items[name] = time.time()

    def end(self, name: str) -> float:
        if name not in self.items:
            return
        if torch.cuda.is_available():
            torch.cuda.synchronize()
        start_time = self.items.pop(name)
        delta = time.time() - start_time
        t = delta * self.time_scale
        print(f"{name} finished in {t:.2f}{self.time_unit}.")

timer = Timer()

## Upload your data and Generate Avatar Image

### Upload your picture

In [None]:
from google.colab import files

uploaded = files.upload()

In [None]:
# @title Enter a prompt { run: "auto", vertical-output: true, form-width: "10000px", display-mode: "form" }
prompt = "" # @param {type:"string"}


### Upload the style images to guide the generation

Here, we are using some images hosted on HF that have the funko pop style. If you want, you can change it to use any style you want.

In [None]:
face_image = Image.open(list(uploaded.keys())[0])
style_folder = "https://huggingface.co/datasets/pedrogengo/funkopop_images/resolve/main"
style_images = [load_image(f"{style_folder}/funko{i}.jpeg").resize((1024, 1024)) for i in range(1, 5)]

### Generate images using the pipeline

In [None]:
generator = torch.Generator(device=device).manual_seed(42)

image = pipeline(
    prompt=prompt,
    ip_adapter_image=[style_images, face_image],
    negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality",
    num_inference_steps=50, num_images_per_prompt=1,
    generator=generator,
).images[0]

In [None]:
image.resize((512, 512))

In [None]:
image.resize((512, 512)).save("examples/avatar.jpg")

## The TripoSR model for 3D Avatar

In [None]:
# Parameters for running the TripoSR
image_paths = "/content/TripoSR/examples/avatar.jpg"
device = "cuda:0"
pretrained_model_name_or_path = "stabilityai/TripoSR"
chunk_size = 8192
no_remove_bg = True
foreground_ratio = 0.85
output_dir = "output/"
model_save_format = "obj"
render = True

In [None]:
output_dir = output_dir.strip()
os.makedirs(output_dir, exist_ok=True)

### Generate Images for the 3D model

In [None]:
# Initialize model
timer.start("Initializing model")
model = TSR.from_pretrained(
    pretrained_model_name_or_path,
    config_name="config.yaml",
    weight_name="model.ckpt",
)
model.renderer.set_chunk_size(chunk_size)
model.to(device)
timer.end("Initializing model")

# Process images
timer.start("Processing images")
images = []


rembg_session = rembg.new_session()

image = remove_background(image, rembg_session)
image = resize_foreground(image, foreground_ratio)

if image.mode == "RGBA":
  image = np.array(image).astype(np.float32) / 255.0
  image = image[:, :, :3] * image[:, :, 3:4] + (1 - image[:, :, 3:4]) * 0.5
  image = Image.fromarray((image * 255.0).astype(np.uint8))

image_dir = os.path.join(output_dir, str(0))
os.makedirs(image_dir, exist_ok=True)
image.save(os.path.join(image_dir, "input.png"))
images.append(image)
timer.end("Processing images")

In [None]:
# Visualise the image
image

### Render Video from Images

In [None]:
# Process each image
for i, image in enumerate(images):
    print(f"Running image {i + 1}/{len(images)} ...")

    # Run model
    timer.start("Running model")
    with torch.no_grad():
        scene_codes = model([image], device=device)
    timer.end("Running model")

    # Rendering
    if render:
        timer.start("Rendering")
        render_images = model.render(scene_codes, n_views=30, return_type="pil")
        for ri, render_image in enumerate(render_images[0]):
            render_image.save(os.path.join(output_dir, str(i), f"render_{ri:03d}.png"))
        save_video(
            render_images[0], os.path.join(output_dir, str(i), "render.mp4"), fps=30
        )
        timer.end("Rendering")

    # Export mesh
    timer.start("Exporting mesh")
    meshes = model.extract_mesh(scene_codes)
    mesh_file = os.path.join(output_dir, str(i), f"mesh.{model_save_format}")
    meshes[0].export(mesh_file)
    timer.end("Exporting mesh")

print("Processing complete.")

## Output Video

In [None]:
# Display the video
Video('output/0/render.mp4', embed=True)