### Given a topic research the latest news

In [1]:
import sys
sys.path.append('..')

from dotenv import load_dotenv
load_dotenv()

from utils import utils

In [2]:
utils.SCRIPT = "outputs/silvio/scriptwriter_out.txt"
scenes = utils.get_scenes()
# Extract lines starting with "[Scene"
scene_lines = [s.description for s in scenes]
scene_lines

['- "A simple, somber room with a portrait of Silvio Berlusconi on the wall. The camera slowly zooms in on the portrait."',
 '- "A montage of clips showcasing Berlusconi\'s political career and business achievements. Images of newspaper headlines detailing his scandals are interspersed."',
 '- "Footage of lowered flags across Italy and Europe. A crowd gathered for the funeral at Milan Cathedral."',
 '- "A series of photos and clips of world leaders offering tributes."',
 '- "Clips of Berlusconi\'s center-right party, Forza Italia, and its activities under Giorgia Meloni\'s leadership. Images of Berlusconi\'s public appearances before his death."']

In [3]:

import torch
from diffusers import StableDiffusionPipeline

pipeline = StableDiffusionPipeline.from_pretrained("SG161222/Realistic_Vision_V2.0", torch_dtype=torch.float16)  

`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.


In [4]:
pipeline = pipeline.to("cuda")
pipeline.enable_xformers_memory_efficient_attention()

In [5]:
pipeline.scheduler.compatibles

[diffusers.schedulers.scheduling_ddim.DDIMScheduler,
 diffusers.schedulers.scheduling_lms_discrete.LMSDiscreteScheduler,
 diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler,
 diffusers.schedulers.scheduling_heun_discrete.HeunDiscreteScheduler,
 diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler,
 diffusers.schedulers.scheduling_unipc_multistep.UniPCMultistepScheduler,
 diffusers.schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteScheduler,
 diffusers.schedulers.scheduling_k_dpm_2_ancestral_discrete.KDPM2AncestralDiscreteScheduler,
 diffusers.utils.dummy_torch_and_torchsde_objects.DPMSolverSDEScheduler,
 diffusers.schedulers.scheduling_pndm.PNDMScheduler,
 diffusers.schedulers.scheduling_ddpm.DDPMScheduler,
 diffusers.schedulers.scheduling_dpmsolver_singlestep.DPMSolverSinglestepScheduler,
 diffusers.schedulers.scheduling_deis_multistep.DEISMultistepScheduler,
 diffusers.schedulers.scheduling_k_dpm_2_discrete.KDPM2Discret

In [6]:
from diffusers import DPMSolverMultistepScheduler

pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config)

In [7]:
positive_prompt = """
(high detailed skin:1.2), 8k uhd, dslr, soft lighting, (high quality), film grain, Fujifilm XT3, (reportage, journalism)
"""

negative_prompt="""
((text:1.4, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4, nudity)),
close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate,
morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation,
deformed, blurry, burns, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured,
gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers,
too many fingers, long neck
"""

In [8]:
guidance_scale        = 0.7
num_inference_steps   = 50
num_images_per_prompt = 5

for i,line in enumerate(scene_lines):
    prompt = line
    images = pipeline(
        prompt="{} {}".format(prompt, positive_prompt),
        negative_prompt=negative_prompt,
        num_images_per_prompt=num_images_per_prompt,
        num_inference_steps=num_inference_steps,
        #guidance_scale=guidance_scale,
    ).images
        
    for j,image in enumerate(images):
        image.save("outputs/silvio/scene_{}_{}.png".format(i+1, j+1))

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (79 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['journalism )']


  0%|          | 0/50 [00:00<?, ?it/s]

In [17]:
# Free GPU memory for upscaler
del pipeline
del images
torch.cuda.empty_cache()

In [10]:
from PIL import Image
from diffusers import StableDiffusionUpscalePipeline

import torch

pipeline = StableDiffusionUpscalePipeline.from_pretrained(
    "stabilityai/stable-diffusion-x4-upscaler",
    revision="fp16",
    torch_dtype=torch.float16
)

Downloading (…)p16/model_index.json:   0%|          | 0.00/485 [00:00<?, ?B/s]

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

Downloading (…)cheduler_config.json:   0%|          | 0.00/300 [00:00<?, ?B/s]

Downloading (…)tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/825 [00:00<?, ?B/s]

Downloading (…)_encoder/config.json:   0%|          | 0.00/634 [00:00<?, ?B/s]

Downloading (…)60b/unet/config.json:   0%|          | 0.00/982 [00:00<?, ?B/s]

Downloading (…)cheduler_config.json:   0%|          | 0.00/348 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/681M [00:00<?, ?B/s]

Downloading (…)tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

Downloading (…)360b/vae/config.json:   0%|          | 0.00/558 [00:00<?, ?B/s]

Downloading (…)on_pytorch_model.bin:   0%|          | 0.00/111M [00:00<?, ?B/s]

Downloading (…)on_pytorch_model.bin:   0%|          | 0.00/947M [00:00<?, ?B/s]

  deprecate("wrong scaling_factor", "1.0.0", deprecation_message, standard_warn=False)


In [11]:
from xformers.ops import MemoryEfficientAttentionFlashAttentionOp

pipeline = pipeline.to("cuda")
pipeline.enable_xformers_memory_efficient_attention(attention_op=MemoryEfficientAttentionFlashAttentionOp)
# Workaround for not accepting attention shape using VAE for Flash Attention
pipeline.vae.enable_xformers_memory_efficient_attention(attention_op=None)

In [12]:
guidance_scale        = 0.7
num_inference_steps   = 20
num_images_per_prompt = 1

for i,prompt in enumerate(scene_lines):
    low_res_image = Image.open("outputs/silvio/scene_{}_2.png".format(i+1)).convert("RGB")
    low_res_image = low_res_image.resize((256,256))

    image = pipeline(
        prompt="{} {}".format(prompt, positive_prompt),
        negative_prompt=negative_prompt,
        image=low_res_image,
        num_images_per_prompt=num_images_per_prompt,
        num_inference_steps=num_inference_steps,
        #guidance_scale=guidance_scale,    
    ).images[0]

    image.save("outputs/silvio/scene_{}_2_upscaled.png".format(i+1))

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (79 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['journalism )']


  0%|          | 0/20 [00:00<?, ?it/s]