<a href="https://colab.research.google.com/github/kakyo888/public/blob/main/genTiktok2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Mount google drive and define DIRs
from google.colab import drive
drive.mount('/content/drive')
OUTPUT_DIR = "/content/drive/MyDrive/ai_tiktok"   # 生成物フォルダ
AUDIO_DIR = "/content/drive/MyDrive/ai_tiktok/audio_segments" # audio output
import os, pathlib; pathlib.Path(OUTPUT_DIR).mkdir(exist_ok=True)
print("Files will be saved under", OUTPUT_DIR)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Files will be saved under /content/drive/MyDrive/ai_tiktok


In [2]:
#@title 🔑 Input API key for Gemini
import os
from google.colab import userdata

# Use Colab's Secrets Manager to store your API key
# Add your API key to the Secrets Manager under the name 'GOOGLE_API_KEY'
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

# You can optionally use getpass for interactive input if not using Secrets Manager,
# but Secrets Manager is recommended for security and persistence.
# import getpass
# os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google API Key: ")

In [3]:
#@title pip and apt-get
!pip -q install feedparser gTTS
!apt-get -y install ffmpeg

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [4]:
#@title 📰 Generate news script into script.json
import google.generativeai as genai, feedparser, datetime, re, json, textwrap, pathlib

genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
MODEL = "gemini-2.0-flash"

RSS = [
  "https://www.theverge.com/rss/index.xml",
  "https://feeds.arstechnica.com/arstechnica/technology-lab",
  "https://generativeai.substack.com/feed",
  "https://news.yahoo.co.jp/rss/media/techcrunch/all.xml",
]
KEYWORDS = re.compile(r"(生成|generative|AI|LLM)", re.I)
WEEK = datetime.timedelta(days=7)
now  = datetime.datetime.now(datetime.timezone.utc)
entries = []

for url in RSS:
  for e in feedparser.parse(url).entries:
    if not hasattr(e,"published_parsed"): continue
    pub = datetime.datetime(*e.published_parsed[:6], tzinfo=datetime.timezone.utc)
    if now-pub>WEEK: continue
    if KEYWORDS.search(e.title) or KEYWORDS.search(getattr(e,"summary","")):
      entries.append(e)

titles = [e.title for e in entries][:10] or ["No hot AI news this week"]

prompt = textwrap.dedent(f"""
  以下の AI ニュース見出しを 5 トピック・各9秒ナレーションの
  TikTok 台本(JSON)にしてください。visual_suggestion も含めて。
  ニュース:
  {chr(10).join('- '+t for t in titles)}
""").strip()

resp = genai.GenerativeModel(MODEL).generate_content(
        prompt,
        generation_config={"temperature":0.7})

print("Raw model response text:")
print(resp.text)

# Attempt to extract and parse the JSON array part from the response text more robustly
script = None
try:
    # Remove markdown code block syntax
    json_string = resp.text.strip()
    if json_string.startswith("```json"):
        json_string = json_string[len("```json"):].strip()
    if json_string.endswith("```"):
        json_string = json_string[:-len("```")].strip()

    # Attempt to parse the cleaned string as JSON
    script = {"tiktok_script": json.loads(json_string)} # Assume it's a list and wrap it

except json.JSONDecodeError as e:
    print(f"Initial JSON decoding failed: {e}")
    print("Attempting to find and parse JSON array within the text.")
    # If initial parsing fails, try to find the JSON array structure
    json_start = json_string.find('[')
    json_end = json_string.rfind(']')
    if json_start != -1 and json_end != -1 and json_end > json_start:
        json_string_to_parse = json_string[json_start : json_end + 1]
        try:
            script = {"tiktok_script": json.loads(json_string_to_parse)} # Wrap the list in a dictionary
        except json.JSONDecodeError as inner_e:
             print(f"Error decoding extracted JSON array: {inner_e}")
             print("Extracted string attempting to parse:")
             print(json_string_to_parse)
             raise # Re-raise the inner exception
    else:
        print("Could not find a valid JSON array structure in the model response after cleaning.")
        raise # Re-raise the initial exception or a new ValueError

except ValueError as e:
    print(f"Error extracting JSON structure: {e}")
    print("Model response text (again for clarity):")
    print(resp.text)
    raise # Re-raise the exception

if script is None:
     raise ValueError("Failed to extract and parse JSON from model response.")


path_script = pathlib.Path(OUTPUT_DIR)/"script.json"
path_script.write_text(json.dumps(script,ensure_ascii=False,indent=2))
print("✅ script.json saved:", path_script)

Raw model response text:
```json
[
  {
    "topic": "電気自動車の価格変動",
    "segments": [
      {
        "duration": 3,
        "narration": "Slate Autoの電気ピックアップトラック、",
        "visual_suggestion": "Slate Autoの電気ピックアップトラックの画像。以前の価格を示すテキストオーバーレイも入れる。"
      },
      {
        "duration": 3,
        "narration": "以前は20,000ドル以下だったのが、",
        "visual_suggestion": "価格が上昇していることを示すグラフ。"
      },
      {
        "duration": 3,
        "narration": "なんと値上がり！その理由は…？",
        "visual_suggestion": "疑問符アニメーション。テキストで「トランプ？」と表示。"
      }
    ]
  },
  {
    "topic": "TikTok存続の裏側",
    "segments": [
      {
        "duration": 3,
        "narration": "TikTokがGoogleとAppleで存続！",
        "visual_suggestion": "TikTokのロゴ。Google PlayストアとApple App Storeのアイコンを背景に。"
      },
      {
        "duration": 3,
        "narration": "その舞台裏には、説得力のある",
        "visual_suggestion": "手紙のイメージ。重要部分をハイライト表示。"
      },
      {
        "duration": 3,
        "narration": "「手紙」の存在があった！",
        "visual_suggestion": "手紙が開封されるアニメー

In [5]:
#@title 📰 Load script.json and extract narration segments
# 📰 Load script.json and extract narration segments
import os, json, pathlib

# 1) Folder setup ─ change if needed
#OUTPUT_DIR = "/content/drive/MyDrive/ai_tiktok"
script_path = os.path.join(OUTPUT_DIR, "script.json")

if not pathlib.Path(script_path).is_file():
    raise FileNotFoundError(f"❌ {script_path} not found")

# 2) Read JSON
with open(script_path, "r", encoding="utf-8") as f:
    script_data = json.load(f)
print("✅ script_data loaded successfully")

# 3) Walk the structure: tiktok_script ➜ tiktok_script (list) ➜ segments
# Corrected access based on actual script_data structure and model output
# Access the inner list of topics correctly
topics = script_data.get("tiktok_script", {}).get("tiktok_script", [])

narration_texts = []
# Iterate through each topic in the list
for topic_idx, topic in enumerate(topics):
    # Access the segments list within each topic dictionary
    for seg_idx, seg in enumerate(topic.get("segments", [])):
        # The narration text is under the "narration" key, not "text"
        text = seg.get("narration")
        if text:
            narration_texts.append(text.strip())
        else:
            print(f"⚠️ Topic {topic_idx} / segment {seg_idx} missing narration text")


if not narration_texts:
    raise ValueError("❌ No narration strings found in script.json")

# 4) Save merged narration file
out_txt = os.path.join(OUTPUT_DIR, "narration_text.txt")
with open(out_txt, "w", encoding="utf-8") as f:
    # Join narration texts with a space for the combined file
    f.write(" ".join(narration_texts))

print(f"✅ Extracted {len(narration_texts)} narration segments → {out_txt}")

# 5) Quick preview
for i, t in enumerate(narration_texts[:5], 1):
    print(f"{i}. {t}")

✅ script_data loaded successfully


AttributeError: 'list' object has no attribute 'get'

In [6]:
#@title Generate audio for each text segment using gTTS and store the generated audio objects or paths.
from gtts import gTTS
import os

audio_segments = []
os.makedirs(AUDIO_DIR, exist_ok=True)

for i, text in enumerate(narration_texts):
    try:
        tts = gTTS(text=text, lang='ja', slow=False)
        audio_filename = f"segment_{i:03d}.mp3"
        audio_filepath = os.path.join(AUDIO_DIR, audio_filename)
        tts.save(audio_filepath)
        audio_segments.append(audio_filepath)
        print(f"✅ Generated audio for segment {i}: {audio_filename}")
    except Exception as e:
        print(f"Error generating audio for segment {i}: {text} - {e}")
        # Optionally, append None or handle the error as needed
        audio_segments.append(None)

print(f"\nGenerated {len(audio_segments)} audio files.")

NameError: name 'narration_texts' is not defined

In [None]:
#@title Concatenate Audio Files
import os
import glob
from pydub import AudioSegment

# Set working directory to folder containing audio segments
os.chdir(AUDIO_DIR)
print("🔁 Working directory:", os.getcwd())

# Load all .mp3 files
audio_segments = sorted(glob.glob("*.mp3"))
print(f"🎧 Found {len(audio_segments)} audio segments.")

combined_narration = AudioSegment.empty()
pause_duration = 500  # milliseconds

for i, audio_path in enumerate(audio_segments):
    if audio_path and os.path.exists(audio_path):
        try:
            segment_audio = AudioSegment.from_file(audio_path, format="mp3")
            combined_narration += segment_audio
            if i < len(audio_segments) - 1:
                combined_narration += AudioSegment.silent(duration=pause_duration)
        except Exception as e:
            print(f"❌ Error processing {audio_path}: {e}")
    else:
        print(f"⚠️ Skipping missing file: {audio_path}")

# Output combined audio
output_combined_audio_path = os.path.join(OUTPUT_DIR, "final_narration.mp3")

if len(combined_narration) == 0:
    print("⚠️ No audio was combined. Output will be empty.")
else:
    try:
        combined_narration.export(output_combined_audio_path, format="mp3")
        print(f"✅ Narration saved: {output_combined_audio_path} ({combined_narration.duration_seconds:.1f} sec)")
    except Exception as e:
        print(f"❌ Error exporting audio: {e}")


In [None]:
'''
Customising visuals
    Swap background: Replace ColorClip with VideoFileClip("your_stock.mp4").resize(height=1920) and, if needed, add .fx(mpy.vfx.crop, ...) to keep vertical framing.
    Text styles: Change font, fontsize, position ("center","top" etc.), or animate with .crossfadein() / .fadein().
    B-roll per segment: Instead of one long background, build a list of 15 s stock clips and use the same start/duration logic to line them up behind the captions.
'''

#@title generate final movie

# 1) ── USER CONFIG ---------------------------------------------------
# NOTE: `OUTPUT_DIR` and `AUDIO_DIR` must already be defined in the notebook:
# OUTPUT_DIR = "/content/drive/MyDrive/ai_tiktok"
# AUDIO_DIR  = "/content/drive/MyDrive/ai_tiktok/audio_segments"

# Install required libraries
!pip install moviepy openai-whisper pysrt pillow tqdm imageio imageio-ffmpeg -q
!apt-get -y install ffmpeg

try:
    OUTPUT_DIR
    AUDIO_DIR
except NameError as e:
    raise NameError("❌ Please run the cell that defines OUTPUT_DIR and AUDIO_DIR first.") from e

import pysrt, whisper, moviepy.editor as mpy
import os, pathlib, textwrap

MP3_PATH  = os.path.join(OUTPUT_DIR, "final_narration.mp3")     # adjust if it lives in AUDIO_DIR
WORK_DIR  = os.path.join(OUTPUT_DIR, "out")
pathlib.Path(WORK_DIR).mkdir(parents=True, exist_ok=True)

OUT_VIDEO = os.path.join(WORK_DIR, "tiktok_final.mp4")
SRT_PATH  = os.path.splitext(OUT_VIDEO)[0] + ".srt"
FONT_TTF  = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"

# Appearance
FONT_SIZE, TXT_COLOR, STROKE_W = 70, "white", 2
BG_COLOR, FPS, WRAP_CHARS      = (0, 0, 0), 30, 30

# 2) ── Transcribe with Whisper --------------------------------------
model    = whisper.load_model("base")
result   = model.transcribe(MP3_PATH, fp16=False)
segments = result["segments"]

# Save captions as SRT
srts = pysrt.SubRipFile()
for i, seg in enumerate(segments, 1):
    srts.append(
        pysrt.SubRipItem(
            index = i,
            start = pysrt.SubRipTime(milliseconds=int(seg["start"]*1000)),
            end   = pysrt.SubRipTime(milliseconds=int(seg["end"]  *1000)),
            text  = seg["text"].strip()
        )
    )
srts.save(SRT_PATH, encoding="utf-8")
print("📄  SRT saved →", SRT_PATH)

# 3) ── Build vertical 1080×1920 video -------------------------------
audio_clip = mpy.AudioFileClip(MP3_PATH)
DURATION   = audio_clip.duration
BG_SIZE    = (1080, 1920)

bg_clip = mpy.ColorClip(size=BG_SIZE, color=BG_COLOR, duration=DURATION)

txt_layers = []
for seg in segments:
    caption = textwrap.fill(seg["text"].strip(), WRAP_CHARS)
    txt = (mpy.TextClip(
              caption,
              font_size   = FONT_SIZE,
              font        = FONT_TTF,
              color       = TXT_COLOR,
              stroke_color= "black",
              stroke_width= STROKE_W,
              method      = "pillow",
              size        = (int(BG_SIZE[0]*0.9), None))
           .set_position(("center", "bottom"))
           .set_start(seg["start"])
           .set_duration(seg["end"] - seg["start"]))
    txt_layers.append(txt)

final = mpy.CompositeVideoClip([bg_clip, *txt_layers]).set_audio(audio_clip)
final.write_videofile(
    OUT_VIDEO,
    fps         = FPS,
    codec       = "libx264",
    audio_codec = "aac",
    preset      = "medium",
    threads     = 4)

print("✅  Finished! TikTok-ready MP4 at:", OUT_VIDEO)