<a href="https://colab.research.google.com/github/kakyo888/public/blob/main/genTiktok2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Mount google drive and define DIRs
from google.colab import drive
drive.mount('/content/drive')
OUTPUT_DIR = "/content/drive/MyDrive/ai_tiktok"   # 生成物フォルダ
AUDIO_DIR = "/content/drive/MyDrive/ai_tiktok/audio_segments" # audio output
import os, pathlib; pathlib.Path(OUTPUT_DIR).mkdir(exist_ok=True)
print("Files will be saved under", OUTPUT_DIR)

Mounted at /content/drive
Files will be saved under /content/drive/MyDrive/ai_tiktok


In [2]:
#@title 🔑 Input API key for Gemini
import os
from google.colab import userdata

# Use Colab's Secrets Manager to store your API key
# Add your API key to the Secrets Manager under the name 'GOOGLE_API_KEY'
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

# You can optionally use getpass for interactive input if not using Secrets Manager,
# but Secrets Manager is recommended for security and persistence.
# import getpass
# os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google API Key: ")

In [3]:
#@title pip and apt-get
!pip -q install feedparser gTTS
!apt-get -y install ffmpeg

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [4]:
#@title 📰 Generate news script into script.json
import google.generativeai as genai, feedparser, datetime, re, json, textwrap, pathlib

genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
MODEL = "gemini-2.0-flash"

RSS = [
  "https://www.theverge.com/rss/index.xml",
  "https://feeds.arstechnica.com/arstechnica/technology-lab",
  "https://generativeai.substack.com/feed",
  "https://news.yahoo.co.jp/rss/media/techcrunch/all.xml",
]
KEYWORDS = re.compile(r"(生成|generative|AI|LLM)", re.I)
WEEK = datetime.timedelta(days=7)
now  = datetime.datetime.now(datetime.timezone.utc)
entries = []

for url in RSS:
  for e in feedparser.parse(url).entries:
    if not hasattr(e,"published_parsed"): continue
    pub = datetime.datetime(*e.published_parsed[:6], tzinfo=datetime.timezone.utc)
    if now-pub>WEEK: continue
    if KEYWORDS.search(e.title) or KEYWORDS.search(getattr(e,"summary","")):
      entries.append(e)

titles = [e.title for e in entries][:10] or ["No hot AI news this week"]

prompt = textwrap.dedent(f"""
  以下の AI ニュース見出しを 5 トピック・各9秒ナレーションの
  TikTok 台本(JSON)にしてください。visual_suggestion も含めて。
  ニュース:
  {chr(10).join('- '+t for t in titles)}
""").strip()

resp = genai.GenerativeModel(MODEL).generate_content(
        prompt,
        generation_config={"temperature":0.7})

print("Raw model response text:")
print(resp.text)

# Attempt to extract and parse the JSON array part from the response text more robustly
script = None
try:
    # Remove markdown code block syntax
    json_string = resp.text.strip()
    if json_string.startswith("```json"):
        json_string = json_string[len("```json"):].strip()
    if json_string.endswith("```"):
        json_string = json_string[:-len("```")].strip()

    # Attempt to parse the cleaned string as JSON
    script = {"tiktok_script": json.loads(json_string)} # Assume it's a list and wrap it

except json.JSONDecodeError as e:
    print(f"Initial JSON decoding failed: {e}")
    print("Attempting to find and parse JSON array within the text.")
    # If initial parsing fails, try to find the JSON array structure
    json_start = json_string.find('[')
    json_end = json_string.rfind(']')
    if json_start != -1 and json_end != -1 and json_end > json_start:
        json_string_to_parse = json_string[json_start : json_end + 1]
        try:
            script = {"tiktok_script": json.loads(json_string_to_parse)} # Wrap the list in a dictionary
        except json.JSONDecodeError as inner_e:
             print(f"Error decoding extracted JSON array: {inner_e}")
             print("Extracted string attempting to parse:")
             print(json_string_to_parse)
             raise # Re-raise the inner exception
    else:
        print("Could not find a valid JSON array structure in the model response after cleaning.")
        raise # Re-raise the initial exception or a new ValueError

except ValueError as e:
    print(f"Error extracting JSON structure: {e}")
    print("Model response text (again for clarity):")
    print(resp.text)
    raise # Re-raise the exception

if script is None:
     raise ValueError("Failed to extract and parse JSON from model response.")


path_script = pathlib.Path(OUTPUT_DIR)/"script.json"
path_script.write_text(json.dumps(script,ensure_ascii=False,indent=2))
print("✅ script.json saved:", path_script)

Raw model response text:
```json
[
  {
    "topic": "デザイン業界に激震！Figma上場へ",
    "narration": "デザインツールFigmaがついに上場！デザイン業界に更なる革新をもたらすか？今後の動向に注目。",
    "duration": 9,
    "visual_suggestion": "Figmaのロゴと株価チャートのアニメーション。興奮を表すBGM。"
  },
  {
    "topic": "X、AIボットによるコミュニティノート",
    "narration": "XがAIボットによるコミュニティノートを解禁！誤情報の拡散を防ぐ新たな試み。効果はいかに？",
    "duration": 9,
    "visual_suggestion": "XのロゴとAIボットのアイコン。意見が飛び交うようなイメージ。"
  },
  {
    "topic": "Ultra Mobile、データ容量増量！",
    "narration": "Ultra Mobileがデータ容量を増量！しかも価格据え置き！ユーザーにとっては嬉しいニュース。乗り換え検討のチャンス？",
    "duration": 9,
    "visual_suggestion": "Ultra Mobileのロゴとスマホの画面。通信速度が速くなるイメージ。"
  },
  {
    "topic": "Grammarly、AI生産性プラットフォームへ",
    "narration": "GrammarlyがAIを活用した生産性プラットフォームを目指す！文章校正だけじゃない、新たな可能性に期待。",
    "duration": 9,
    "visual_suggestion": "Grammarlyのロゴとキーボード。文章作成がスムーズになるイメージ。"
  },
  {
    "topic": "AnthropicとAI時代のFlashゲーム",
    "narration": "AnthropicがAIでFlashゲームの精神を再現！懐かしさと新しさが融合した、AIエンターテイメントの未来。",
    "duration": 9,
    "visual_suggestion

In [7]:
#@title 📰 Load script.json and extract narration segments
import os
import json
from pprint import pprint

# Make sure OUTPUT_DIR is defined
#OUTPUT_DIR = "/content/drive/MyDrive/ai_tiktok"

# Step 1: Load script.json
script_file_path = os.path.join(OUTPUT_DIR, "script.json")
with open(script_file_path, 'r') as f:
    script_data = json.load(f)

print("✅ script_data loaded successfully")

# Step 2: Extract the 'tiktok_script' list
topics_dict = script_data.get("tiktok_script", {})

# If the structure is nested again (double-wrapped), fix it
if isinstance(topics_dict, dict) and "tiktok_script" in topics_dict:
    topic_list = topics_dict["tiktok_script"]
elif isinstance(topics_dict, list):
    topic_list = topics_dict
else:
    raise ValueError("❌ Unexpected structure in script_data['tiktok_script'].")

# Step 3: Extract narration texts
narration_texts = []

for topic_idx, topic in enumerate(topic_list):
    if not isinstance(topic, dict):
        print(f"⚠️ Topic {topic_idx} is not a dict — skipping")
        continue

    text = topic.get("narration")
    if text:
        narration_texts.append(text)
    else:
        print(f"⚠️ Missing narration in topic {topic_idx}")

# save narration text file
merged_narration_texts = " ".join(narration_texts)
narration_file_path = os.path.join(OUTPUT_DIR, "narratin_text.txt")
with open(narration_file_path, "w", encoding="utf-8") as file:
    file.write(merged_narration_texts)

print(f"✅ Extracted {len(narration_texts)} narration segments.")

# Optional: Preview the narrations
for i, t in enumerate(narration_texts[:3]):
    print(f"{i+1}. {t}")


✅ script_data loaded successfully
✅ Extracted 6 narration segments.
1. デザインツールFigmaがついに上場！デザイン業界に更なる革新をもたらすか？今後の動向に注目。
2. XがAIボットによるコミュニティノートを解禁！誤情報の拡散を防ぐ新たな試み。効果はいかに？
3. Ultra Mobileがデータ容量を増量！しかも価格据え置き！ユーザーにとっては嬉しいニュース。乗り換え検討のチャンス？


In [6]:
#@title Generate audio for each text segment using gTTS and store the generated audio objects or paths.
from gtts import gTTS
import os

audio_segments = []
output_audio_dir = os.path.join(OUTPUT_DIR, "audio_segments")
os.makedirs(output_audio_dir, exist_ok=True)

for i, text in enumerate(narration_texts):
    try:
        tts = gTTS(text=text, lang='ja', slow=False)
        audio_filename = f"segment_{i:03d}.mp3"
        audio_filepath = os.path.join(output_audio_dir, audio_filename)
        tts.save(audio_filepath)
        audio_segments.append(audio_filepath)
        print(f"✅ Generated audio for segment {i}: {audio_filename}")
    except Exception as e:
        print(f"Error generating audio for segment {i}: {text} - {e}")
        # Optionally, append None or handle the error as needed
        audio_segments.append(None)

print(f"\nGenerated {len(audio_segments)} audio files.")

✅ Generated audio for segment 0: segment_000.mp3
✅ Generated audio for segment 1: segment_001.mp3
✅ Generated audio for segment 2: segment_002.mp3
✅ Generated audio for segment 3: segment_003.mp3
✅ Generated audio for segment 4: segment_004.mp3
✅ Generated audio for segment 5: segment_005.mp3

Generated 6 audio files.


In [7]:
#@title Concatenate Audio Files
import os
import glob
from pydub import AudioSegment

# Set working directory to folder containing audio segments
os.chdir(AUDIO_DIR)
print("🔁 Working directory:", os.getcwd())

# Load all .mp3 files
audio_segments = sorted(glob.glob("*.mp3"))
print(f"🎧 Found {len(audio_segments)} audio segments.")

combined_narration = AudioSegment.empty()
pause_duration = 500  # milliseconds

for i, audio_path in enumerate(audio_segments):
    if audio_path and os.path.exists(audio_path):
        try:
            segment_audio = AudioSegment.from_file(audio_path, format="mp3")
            combined_narration += segment_audio
            if i < len(audio_segments) - 1:
                combined_narration += AudioSegment.silent(duration=pause_duration)
        except Exception as e:
            print(f"❌ Error processing {audio_path}: {e}")
    else:
        print(f"⚠️ Skipping missing file: {audio_path}")

# Output combined audio
output_combined_audio_path = os.path.join(OUTPUT_DIR, "final_narration.mp3")

if len(combined_narration) == 0:
    print("⚠️ No audio was combined. Output will be empty.")
else:
    try:
        combined_narration.export(output_combined_audio_path, format="mp3")
        print(f"✅ Narration saved: {output_combined_audio_path} ({combined_narration.duration_seconds:.1f} sec)")
    except Exception as e:
        print(f"❌ Error exporting audio: {e}")


🔁 Working directory: /content/drive/MyDrive/ai_tiktok/audio_segments
🎧 Found 6 audio segments.
✅ Narration saved: /content/drive/MyDrive/ai_tiktok/final_narration.mp3 (69.1 sec)
