## Image Generation

In [None]:
!pip install transformers==4.49 diffusers==0.32.2

In [None]:
!wget https://raw.githubusercontent.com/thiagohersan/media-landscape/refs/heads/main/python/utils.py
!wget https://raw.githubusercontent.com/thiagohersan/media-landscape/refs/heads/main/python/imgs/landscape_00.jpg

In [None]:
import os
os.environ["GEMINI_API_KEY"] = ""
os.environ["NEWSDATA_API_KEY"] = ""

import json

from PIL import Image as PImage

from utils import get_articles, get_articles_with_top_words, get_article_images_by_size
from utils import get_img, get_img_description
from utils import Generator, get_pipeline, get_input_images

In [None]:
# "runwayml/stable-diffusion-inpainting",
# "stable-diffusion-v1-5/stable-diffusion-inpainting",
# "stabilityai/stable-diffusion-2-inpainting",

pipe = get_pipeline("stable-diffusion-v1-5/stable-diffusion-inpainting")

In [None]:
img00 = PImage.open("./landscape_00.jpg")
img, mask = get_input_images(img00, keep_width=256, size=(1440, 512))
# display(img)
# display(mask)

In [None]:
prompt = get_img_description(img00)
anthropoprompt = f"post apocalyptic version of {prompt}, but with things on fire, floods and trash everywhere"
anthropoprompt

In [None]:
generator = Generator(device="cuda").manual_seed(1010)

output = pipe(
  prompt=anthropoprompt,
  negative_prompt="repetitive, distortion, glitch, borders, stretched, frames, breaks, multiple rows",
  image=img,
  mask_image=mask,
  width=img.size[0], height=img.size[1],
  guidance_scale=16.0,
  num_inference_steps=24,
  generator=generator
)

display(output.images[0])


## Newsdata.io

In [None]:
res = get_articles(n_articles=200)

with open(f"./data/newsdata_top_200.json", "w") as ofp:
  json.dump(res, ofp)

In [None]:
q = "catastrophe"
res = get_articles(q=q, cat="environment", n_articles=200)

with open(f"./data/newsdata_{q}_200.json", "w") as ofp:
  json.dump(res, ofp)

In [None]:
q = "rain"
res = get_articles(q=q, cat="environment", n_articles=200)

with open(f"./data/newsdata_{q}_200.json", "w") as ofp:
  json.dump(res, ofp)

## Read News JSON

In [None]:
with open("./data/newsdata_rain_200.json", "r") as ifp:
  newsdata_res = json.load(ifp)

art_idxs = get_articles_with_top_words(newsdata_res, n_words=9, n_articles=7)

display(art_idxs)

len(set(art_idxs.values.reshape(-1)))

In [None]:
import time

imgs_by_size = get_article_images_by_size(newsdata_res, art_idxs.values.reshape(-1), limit=30)

img_data = []

for img in imgs_by_size:
  iw,ih = img["image"].size
  if ih > iw:
    continue

  if ih > 512:
    img_512 = img["image"].resize((int(iw/ih*512), 512))
  else:
    img_512 = img["image"]

  description = get_img_description(img["image"])
  img_content = description["content"]
  img_style = description["style"]
  time.sleep(7)

  if (
    img_content == "" or img_style == "" or
    "logo" in img_content or "logo" in img_style or
    "branding" in img_content or "branding" in img_style or
    "line art" in img_content or "line art" in img_style or
    "typograph" in img_content or "typograph" in img_style or
    "illustrat" in img_content or "illustrat" in img_style or
    "digital art" in img_content or "digital art" in img_style or
    "graphic design" in img_content or "graphic design" in img_style or
    "graphic overlay" in img_content or "graphic overlay" in img_style
  ): continue

  display(img_512)

  img_data.append({
    "article_id" : newsdata_res[img["idx"]]["article_id"],
    "pubDate": newsdata_res[img["idx"]]["pubDate"],
    "image": img_512,
    "content": description["content"],
    "style": description["style"],
  })

len(img_data)

## Push to HF

In [None]:
from datasets import Dataset

dataset = Dataset.from_list(img_data)
dataset.push_to_hub("thiagohersan/newsdata-images", split="newsdata")

In [None]:
from datasets import Dataset, concatenate_datasets, load_dataset

dataset_hf = load_dataset("thiagohersan/newsdata-images", split="newsdata")

dataset = concatenate_datasets([dataset_hf, Dataset.from_list(img_data)])

dataset.push_to_hub("thiagohersan/newsdata-images", split="newsdata")