# Setup Lengkap

In [None]:
# @title 👣 Buat struktur folder
import os

base_dir = "/content/transkrip_project"
video_dir = os.path.join(base_dir, "video")
audio_dir = os.path.join(base_dir, "audio")
output_dir = os.path.join(base_dir, "output")
log_dir = os.path.join(base_dir, "logs")

os.makedirs(video_dir, exist_ok=True)
os.makedirs(audio_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
os.makedirs(log_dir, exist_ok=True)

print("📁 Struktur folder dibuat:")
print(f"- Video: {video_dir}")
print(f"- Audio: {audio_dir}")
print(f"- Output: {output_dir}")
print(f"- Log: {log_dir}")

In [None]:
# @title 🧩 Install Library Tambahan
# @markdown 👈 klik untuk menginstall library
!pip install -q gdown
!pip install -q yt-dlp
!pip install -q tqdm
!pip install -q ffmpeg-python
!pip install -q m3u8downloader
!sudo apt-get install -y ffmpeg

# 🔧 Install Pyrogram dan TgCrypto (untuk kecepatan)
!pip install -U pyrogram tgcrypto

!pip install git+https://github.com/openai/whisper.git
!sudo apt-get update -y

# Download Video

In [None]:
# @title 🍪 [Manual] Upload Cookies
# @markdown - Upload file cookies (Untuk download twitter yang membutuhkan auth)
from google.colab import files
import os

print("📤 Silakan upload file cookies.txt (hasil dari Kiwi Browser)...")
uploaded = files.upload()

# Temukan file .txt yang diupload
cookies_file = None
for name in uploaded.keys():
    if name.endswith(".txt"):
        cookies_file = name
        break

# Rename ke cookies.txt
if cookies_file and os.path.exists(cookies_file):
    if cookies_file != "cookies.txt":
        os.rename(cookies_file, "cookies.txt")
        print(f"📁 File '{cookies_file}' di-rename menjadi 'cookies.txt'")
    else:
        print("✅ File sudah bernama cookies.txt")

    # Tampilkan contoh isi file
    print("📄 Contoh isi cookies.txt:")
    with open("cookies.txt", "r", encoding="utf-8") as f:
        for i in range(3):
            line = f.readline()
            if line:
                print("  ", line.strip())
else:
    print("❌ File .txt tidak valid ditemukan atau gagal upload.")

In [None]:
# @title 🔗 [A1] Video Downloader (non-twitter)
# @markdown - Nama file boleh kosong
download_type = "auto"  # @param ["auto", "google_drive", "direct", "m3u8"]
video_url = ""  # @param {type:"string"}
file_name = ""  # @param {type:"string"}

from datetime import datetime
import os

# 🔧 Lokasi penyimpanan
video_dir = "/content/transkrip_project/video"
os.makedirs(video_dir, exist_ok=True)

# 🕒 Nama file default jika kosong
if not file_name.strip():
    timestamp = datetime.now().strftime("video_%Y%m%d_%H%M%S")
    file_name = f"{timestamp}.mp4"
elif not file_name.endswith(".mp4"):
    file_name += ".mp4"

# 📍 Path output final
output_path = os.path.join(video_dir, file_name)

print(f"🎯 Link: {video_url}")
print(f"🧩 Jenis Unduhan: {download_type}")
print(f"📁 File akan disimpan di: {output_path}")

In [None]:
# @title 🚀 [A2] Proses Download Berdasarkan Jenis yang Dipilih
# @markdown 👈 klik untuk mengunduh

import subprocess
import re
import os

def extract_drive_id(url):
    patterns = [
        r"drive\.google\.com\/file\/d\/([^\/]+)",
        r"drive\.google\.com\/open\?id=([^&]+)",
        r"drive\.google\.com\/uc\?id=([^&]+)",
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    return None

def is_m3u8(url):
    return url.endswith(".m3u8") or ".m3u8?" in url

def is_drive(url):
    return "drive.google.com" in url

# 🔧 Pastikan dependensi
print("🔧 Memastikan dependensi...")
!apt -qq install -y ffmpeg
!pip install -q --upgrade yt-dlp
!pip install -q --user gdown m3u8downloader

# 🌐 Tentukan alat berdasarkan input user atau auto-detect
tool = download_type

if tool == "auto":
    if is_drive(video_url):
        tool = "google_drive"
    elif is_m3u8(video_url):
        tool = "m3u8"
    else:
        tool = "direct"

print(f"🚀 Menggunakan alat: {tool}")

# ===================== Proses Unduh =====================
if tool == "google_drive":
    print("📁 Mendownload dari Google Drive...")
    import gdown
    drive_id = extract_drive_id(video_url)
    if not drive_id:
        raise ValueError("❌ Tidak dapat mendeteksi ID file dari URL Google Drive.")
    gdown.download(f"https://drive.google.com/uc?id={drive_id}", output_path, quiet=False)

elif tool == "m3u8":
    print("📺 Mendownload dari M3U8 (streaming)...")
    cmd = [
        os.path.expanduser("~/.local/bin/downloadm3u8"),
        "-o", output_path,
        video_url
    ]
    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    for line in process.stdout:
        print(f"\r{line.strip()[:150]}", end="", flush=True)
    process.wait()

elif tool == "direct":
    print("📥 Mendownload dari Direct Link menggunakan yt-dlp...")
    cmd = [
        "yt-dlp",
        "-f", "best",
        "-o", output_path,
        "--no-warnings",
        "--retries", "3",
        video_url
    ]
    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    for line in process.stdout:
        print(f"\r{line.strip()[:150]}", end="", flush=True)
    process.wait()

else:
    raise ValueError("❌ Jenis download tidak dikenali!")

# ✅ Cek hasil
if os.path.exists(output_path):
    size = os.path.getsize(output_path) / (1024 * 1024)
    print(f"\n\n✅ Selesai! File disimpan di: {output_path}")
    print(f"📦 Ukuran file: {size:.2f} MB")
else:
    print("\n⚠️ Download selesai tapi file tidak ditemukan.")

In [None]:
# @title 🎞️ [B] Unduh Video Twitter
# @markdown - Masukkan URL tweet (single-video / multi-video)
tweet_url = ""  # @param {type: "string"}

import os, re, json, glob, subprocess, datetime
from tqdm import tqdm

# 📁 Siapkan folder
base_dir = "/content/transkrip_project"
video_dir = os.path.join(base_dir, "video")
output_dir = os.path.join(base_dir, "output")
os.makedirs(video_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

# 🆔 Ambil Tweet ID
match = re.search(r"/status/(\d+)", tweet_url)
if not match:
    raise ValueError("❌ URL tidak valid: Tidak ditemukan Tweet ID.")
tweet_id = match.group(1)

# 🔍 Simulasi metadata
info = None
ydl_opts = {
    "quiet": True,
    "simulate": True,
    "extract_flat": True,
    "dump_single_json": True,
}
print(f"🔍 Mendeteksi metadata tweet... ({tweet_url})")

try:
    import yt_dlp
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(tweet_url, download=False)
except Exception as e:
    print(f"❌ Gagal mendeteksi metadata: {e}")
    if "authentication" in str(e).lower():
        print("🔐 Tweet ini kemungkinan membutuhkan cookies.txt (login).")

# 💾 Simpan JSON jika tersedia
if info:
    json_path = os.path.join(output_dir, f"deteksi_tweet_{tweet_id}.json")
    with open(json_path, "w", encoding="utf-8") as f:
        json.dump(info, f, ensure_ascii=False, indent=2)
    print(f"📄 Metadata disimpan ke: {json_path}")
else:
    print("⚠️ Metadata tidak tersedia atau tidak disimpan.")

# 🍪 Cek cookies
use_cookies = os.path.exists("cookies.txt")
print("🔐 Menggunakan cookies.txt" if use_cookies else "🔓 Tidak menggunakan cookies")

# 📥 Proses download (progres dengan tqdm)
print("📥 Mulai mengunduh video...\n")
command = ["yt-dlp"]
if use_cookies:
    command += ["--cookies", "cookies.txt"]
command += [
    "-f", "best",
    "-o", f"{video_dir}/%(id)s_video.%(ext)s",
    tweet_url
]

progress_bar = tqdm(total=100, desc="📥 Download", unit="%")
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

for line in process.stdout:
    line = line.strip()
    if "%" in line:
        import re
        match = re.search(r'(\d{1,3}\.\d)%', line)
        if match:
            percent = float(match.group(1))
            progress_bar.n = int(percent)
            progress_bar.refresh()
    elif "[download]" in line or "Destination" in line:
        print(line)
process.wait()
progress_bar.n = 100
progress_bar.refresh()
progress_bar.close()

print("✅ Download selesai.")

# 📦 Status akhir
downloaded = glob.glob(os.path.join(video_dir, "*_video.*"))
total_size_mb = sum(os.path.getsize(f) for f in downloaded) / (1024 * 1024)
file_names = [os.path.basename(f) for f in downloaded]

print("\n📊 Ringkasan Status:")
print(f"- 📌 URL Tweet       : {tweet_url}")
print(f"- 🆔 ID Tweet        : {tweet_id}")
print(f"- 🔐 Cookies         : {'✅ Digunakan' if use_cookies else '❌ Tidak digunakan'}")
print(f"- 📄 Metadata JSON   : {'✅ Tersimpan' if info else '❌ Tidak ada'}")
print(f"- 📁 Total Video     : {len(downloaded)} file")
print(f"- 💾 Ukuran Total    : {total_size_mb:.2f} MB")
print(f"- 🕒 Selesai pada     : {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"- 📂 Lokasi Video    : {video_dir}")
print(f"- 📜 Daftar File     :")
for i, fname in enumerate(file_names, 1):
    print(f"   {i}. {fname}")

# Ekstrak Audio dari Video

In [None]:
# @title 🔉 Ekstrak Audio
# @markdown - Masukkan file Videonya
video_input_path = ""  # @param {type:"string"}

import os

# Folder output audio
audio_dir = "/content/transkrip_project/audio"
os.makedirs(audio_dir, exist_ok=True)

# Ambil nama file video
video_name = os.path.splitext(os.path.basename(video_input_path))[0]
audio_path = os.path.join(audio_dir, f"{video_name}.wav")

# Fungsi konversi ukuran file
def human_readable_size(size_bytes):
    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if size_bytes < 1024:
            return f"{size_bytes:.2f} {unit}"
        size_bytes /= 1024
    return f"{size_bytes:.2f} PB"

# Hapus file audio lama jika sudah ada
if os.path.exists(audio_path):
    os.remove(audio_path)

# Eksekusi ekstraksi
print("🔊 Menjalankan ekstraksi audio...")
print(f"🎬 Input:  {video_input_path}")
print(f"🎧 Output: {audio_path}")

!ffmpeg -i "{video_input_path}" -vn -acodec pcm_s16le -ar 16000 -ac 1 "{audio_path}" -y -loglevel error

# Konfirmasi hasil
if os.path.exists(audio_path):
    size_str = human_readable_size(os.path.getsize(audio_path))
    print("\n✅ Audio berhasil diekstrak!")
    print(f"📍 Lokasi  : {audio_path}")
    print(f"📦 Ukuran  : {size_str}")
else:
    print("\n❌ Gagal mengekstrak audio.")

# Audio to Teks

In [None]:
# @title 👣 [1] Pilih Model Whisper { run: "auto" }
model_choice = "base"  # @param ["tiny", "base", "small", "medium", "large"]

print(f"Model : {model_choice}")

Model : base


In [None]:
# @title 👣 [2] Transkripsi Audio
# @markdown - Gunakan Model dari Cell 1

# @markdown - Otomatis menyimpan hasil transkripsi ke `/content/transkrip_project/output`

audio_input_path = "/content/transkrip_project/audio/PRED-144 Eimi Fukada.wav"  # @param {type:"string"}

import whisper
import time
import os
import json

# Lokasi folder output
output_dir = "/content/transkrip_project/output"
os.makedirs(output_dir, exist_ok=True)

# Validasi audio path
if not os.path.exists(audio_input_path):
    raise FileNotFoundError(f"❌ File tidak ditemukan: {audio_input_path}")

# Gunakan model dari cell sebelumnya
chosen_model = model_choice
print(f"📚 Memuat model Whisper: {chosen_model} ...")
start = time.time()

model = whisper.load_model(chosen_model)
result = model.transcribe(audio_input_path, verbose=True)

end = time.time()
print(f"⏱️ Transkripsi selesai dalam {end - start:.2f} detik")

# 📄 Simpan hasil segments ke JSON
json_output_path = os.path.join(output_dir, "segments.json")
with open(json_output_path, "w", encoding="utf-8") as f:
    json.dump(result["segments"], f, ensure_ascii=False, indent=2)

# 📝 Cuplikan hasil
print(f"\n✅ Hasil transkripsi disimpan ke: {json_output_path}")
print("📝 Cuplikan teks:")
print(result["text"][:100] + "...")

In [None]:
# @title 👣 [3] Ekspor ke Subtitle .srt dengan Auto-Split Teks { run: "auto" }
# @markdown - Atur maksimal jumlah karakter per baris subtitle
json_path = "/content/transkrip_project/output/segments.json"  # @param {type:"string"}
max_chars_per_line = 95  # @param {type:"slider", min:40, max:120, step:5}

import os
import json
import textwrap
from datetime import timedelta

# 🔧 Fungsi bantu konversi waktu
def format_time(seconds):
    t = timedelta(seconds=seconds)
    return str(t).split(".")[0].zfill(8).replace(".", ",") + f",{int(t.microseconds/1000):03d}"

# 🔠 Fungsi bungkus teks per baris
def wrap_text(text, width=80):
    return "\n".join(textwrap.wrap(text, width=width))

# 📥 Baca file JSON transkrip
if not os.path.exists(json_path):
    raise FileNotFoundError(f"❌ File tidak ditemukan: {json_path}")

with open(json_path, "r", encoding="utf-8") as f:
    segments = json.load(f)

# 📝 Buat konten subtitle
srt_lines = []
for i, seg in enumerate(segments, 1):
    start = format_time(seg["start"])
    end = format_time(seg["end"])
    text = wrap_text(seg["text"].strip(), width=max_chars_per_line)

    srt_lines.append(f"{i}")
    srt_lines.append(f"{start} --> {end}")
    srt_lines.append(text)
    srt_lines.append("")

# 💾 Simpan ke file .srt dengan nama sama seperti JSON
json_base_name = os.path.splitext(os.path.basename(json_path))[0]
srt_output_path = os.path.join("/content/transkrip_project/output", f"{json_base_name}.srt")

with open(srt_output_path, "w", encoding="utf-8") as f:
    f.write("\n".join(srt_lines))

print(f"✅ Subtitle disimpan ke: {srt_output_path}")

# Upload to Gogle Drive

In [None]:
# @title 🔌 [Mount] Sambungkan Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# @title ☁️ [Manual] Upload ke Google Drive
# @markdown - `drive_target_folder`: Masukkan path folder tujuan di Google Drive

# @markdown - `drive_filename`: Masukkan nama file di Google Drive (opsional, kosongkan untuk gunakan nama asli)

# @markdown - `file_to_upload`: Masukkan file lokal yang ingin diupload

drive_target_folder = ""  # @param {type:"string"}
drive_filename = ""  # @param {type:"string"}
file_to_upload = ""  # @param {type:"string"}

import os, shutil, sys, datetime

# 🔐 Cek apakah Google Drive sudah di-mount
if not os.path.exists("/content/drive"):
    print("❌ Google Drive belum di-mount.")
    print("🔧 Jalankan dulu cell: `from google.colab import drive; drive.mount('/content/drive')`")
    sys.exit()

# 📂 Validasi file sumber
if not os.path.exists(file_to_upload):
    print(f"❌ File tidak ditemukan: {file_to_upload}")
    sys.exit()

# 📄 Info file sumber
file_size_mb = os.path.getsize(file_to_upload) / (1024 * 1024)
file_name_local = os.path.basename(file_to_upload)
print(f"🔍 File sumber ditemukan:")
print(f"   📄 Nama       : {file_name_local}")
print(f"   📦 Ukuran     : {file_size_mb:.2f} MB")
print(f"   📂 Lokasi     : {file_to_upload}")

# 📄 Gunakan nama asli jika kosong
if not drive_filename:
    drive_filename = file_name_local

# 📌 Bangun path lengkap tujuan
drive_target_path = os.path.join(drive_target_folder, drive_filename)

# 🛡️ Validasi path tujuan
if not drive_target_path.startswith("/content/drive/"):
    print("❌ Path tujuan harus berada di dalam `/content/drive/`")
    sys.exit()

# 📁 Buat folder jika belum ada
if not os.path.exists(drive_target_folder):
    os.makedirs(drive_target_folder)
    print(f"📁 Folder baru dibuat: {drive_target_folder}")
else:
    print(f"📁 Folder tujuan ditemukan: {drive_target_folder}")

# ☁️ Mulai proses upload
print("📤 Mengunggah file ke Google Drive...")
shutil.copy(file_to_upload, drive_target_path)

# 🕒 Info selesai
upload_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

# ✅ Ringkasan akhir
print("\n✅ File berhasil dikirim ke Google Drive:")
print(f"   📄 Nama File   : {drive_filename}")
print(f"   📦 Ukuran      : {file_size_mb:.2f} MB")
print(f"   📂 Lokasi GDrive : {drive_target_path}")
print(f"   🕒 Selesai pada  : {upload_time}")

In [None]:
# @title ☁️ [Manual] Upload Banyak File ke Google Drive (Auto Rename)
# @markdown - `upload_source_folder`: Folder lokal berisi file yang akan diupload

# @markdown - `drive_target_folder`: Tujuan upload di Google Drive

upload_source_folder = ""  # @param {type:"string"}
drive_target_folder = ""  # @param {type:"string"}

import os, shutil, sys, datetime

# 🔐 Cek Google Drive
if not os.path.exists("/content/drive"):
    print("❌ Google Drive belum di-mount.")
    print("🔧 Jalankan dulu cell: `from google.colab import drive; drive.mount('/content/drive')`")
    sys.exit()

# 📁 Cek folder sumber
if not os.path.exists(upload_source_folder):
    print(f"❌ Folder sumber tidak ditemukan: {upload_source_folder}")
    sys.exit()

# 🧾 Ambil daftar file (hanya file, bukan folder)
file_list = [f for f in os.listdir(upload_source_folder)
             if os.path.isfile(os.path.join(upload_source_folder, f))]

if not file_list:
    print("⚠️ Tidak ada file di dalam folder untuk diupload.")
    sys.exit()

# 📁 Buat folder tujuan jika belum ada
if not os.path.exists(drive_target_folder):
    os.makedirs(drive_target_folder)
    print(f"📁 Folder tujuan baru dibuat: {drive_target_folder}")
else:
    print(f"📁 Folder tujuan ditemukan: {drive_target_folder}")

# 🚀 Mulai upload
print(f"\n📤 Mengunggah {len(file_list)} file ke Google Drive...\n")

for i, filename in enumerate(file_list, 1):
    local_path = os.path.join(upload_source_folder, filename)

    # Rename otomatis jika file sudah ada
    name_only, ext = os.path.splitext(filename)
    drive_path = os.path.join(drive_target_folder, filename)
    counter = 1
    while os.path.exists(drive_path):
        new_name = f"{name_only}_{counter}{ext}"
        drive_path = os.path.join(drive_target_folder, new_name)
        counter += 1
    final_name = os.path.basename(drive_path)

    # Info ukuran
    file_size_mb = os.path.getsize(local_path) / (1024 * 1024)

    # Salin file
    shutil.copy(local_path, drive_path)
    upload_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    # Tampilkan status
    if counter > 1:
        print(f"⚠️  File dengan nama sama ditemukan. Diubah menjadi: {final_name}")
    print(f"✅ ({i}/{len(file_list)}) {final_name}")
    print(f"   📦 Ukuran   : {file_size_mb:.2f} MB")
    print(f"   📂 Tujuan   : {drive_path}")
    print(f"   🕒 Waktu    : {upload_time}\n")

print("🎉 Semua file berhasil diupload (dengan rename otomatis jika perlu).")

# Upload to Telegram

In [None]:
# @title 🎯 Ambil Thumbnail dan Durasi Video
video_path = ""  # @param {type:"string"}

import os
import subprocess
import json

# 📁 Buat folder metadata dan thumbnail
meta_dir = "/content/transkrip_project/meta"
thumb_dir = "/content/transkrip_project/thumbnails"
os.makedirs(meta_dir, exist_ok=True)
os.makedirs(thumb_dir, exist_ok=True)

# 📝 Nama file dasar
basename = os.path.splitext(os.path.basename(video_path))[0]
thumbnail_path = os.path.join(thumb_dir, f"{basename}_thumb.jpg")
json_path = os.path.join(meta_dir, f"{basename}_meta.json")

# 📸 Ambil thumbnail dari detik ke-1
print("📸 Mengambil thumbnail...")
subprocess.run([
    "ffmpeg", "-y",
    "-i", video_path,
    "-ss", "00:00:01.000",
    "-vframes", "1",
    "-q:v", "2",
    thumbnail_path
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# ⏱️ Ambil durasi, format, dan ukuran file
def extract_video_info(path):
    result = subprocess.run(
        ["ffprobe", "-v", "error", "-select_streams", "v:0",
         "-show_entries", "format=duration,format_name,size", "-of", "json", path],
        stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
    )
    data = json.loads(result.stdout)
    durasi = float(data['format']['duration'])
    format_name = data['format']['format_name']
    size_bytes = int(data['format']['size'])
    return durasi, format_name, size_bytes

duration_sec, format_name, size_bytes = extract_video_info(video_path)
duration_str = f"{int(duration_sec // 60)}:{int(duration_sec % 60):02d}"

# 💾 Simpan metadata
metadata = {
    "video_path": video_path,
    "thumbnail_path": thumbnail_path,
    "duration": int(duration_sec),
    "duration_str": duration_str,
    "filename": os.path.basename(video_path),
    "format": format_name,
    "size_mb": round(size_bytes / (1024 * 1024), 2)
}
with open(json_path, "w") as f:
    json.dump(metadata, f, indent=2)

# 📊 Tampilkan ringkasan
print("\n📦 Metadata Disimpan:")
print(f"📝 JSON       : {json_path}")
print(f"🎞️ Video Asli : {metadata['filename']}")
print(f"🖼️ Thumbnail  : {os.path.basename(thumbnail_path)}")
print(f"⏱️ Durasi     : {duration_str}")
print(f"💾 Ukuran     : {metadata['size_mb']} MB")
print(f"📀 Format     : {format_name.upper()}")

In [None]:
# @title ✈️ Batch Upload Video ke Bot Telegram

meta_dir = "/content/transkrip_project/meta"
CHAT_ID = 123      # @param {type:"number"}
API_ID = 123         # @param {type:"number"}
API_HASH = ""   # @param {type:"string"}
BOT_TOKEN = ""  # @param {type:"string"}

from pyrogram import Client
from pyrogram.enums import ParseMode
from tqdm import tqdm
import asyncio, json, os, glob, traceback, re, time

# Escape karakter Markdown
def escape_md(text):
    return re.sub(r'([_*\[\]()~`>#+\-=|{}.!])', r'\\\1', text)

# Progress bar di terminal
def progress(current, total):
    bar.update(current - bar.n)

# Status awal upload
def status_awal(filename, filesize_mb, duration, current_index, total_count):
    return f"""🚀 Upload ({current_index}/{total_count})

╭───────── Detail Upload ─────────╮

  📁 Nama   : {filename}
  📦 Ukuran : {filesize_mb:.2f} MB
  🕒 Durasi : {duration} detik
  ⏳ Status : Mengunggah...

╰───────────────────────────╯
"""

# Status sukses (termasuk waktu upload per file)
def status_sukses(filename, current_index, total_count, waktu_upload):
    return f"""✅ Upload Berhasil! ({current_index}/{total_count})

╭───────── Detail Upload ─────────╮

  🎬 File    : {filename}
  📤 Status  : Sukses
  🧹 Cleanup : File dihapus otomatis
  ⏱️ Waktu   : {waktu_upload:.2f} detik

╰───────────────────────────╯
"""

# Status error
def status_error(filename, error_text, current_index, total_count):
    return f"""❌ Upload Gagal! ({current_index}/{total_count})

╭───────── Detail Upload ─────────╮

  📁 File   : {filename}
  ⚠️ Error  : {error_text}

╰───────────────────────────╯
"""

# Kirim video per file
async def kirim_video(app, meta_path, current_index, total_count):
    try:
        start_upload = time.time()

        with open(meta_path, "r") as f:
            meta = json.load(f)

        video_path = meta["video_path"]
        thumbnail_path = meta["thumbnail_path"]
        filename = escape_md(meta["filename"])
        duration = meta["duration"]
        filesize = os.path.getsize(video_path)
        filesize_mb = filesize / (1024 * 1024)

        print(f"\n📤 Mengunggah video {current_index} dari {total_count}: {filename}")
        await app.send_message(
            chat_id=CHAT_ID,
            text=status_awal(filename, filesize_mb, duration, current_index, total_count),
            parse_mode=ParseMode.MARKDOWN
        )

        global bar
        bar = tqdm(total=filesize, unit='B', unit_scale=True, desc=f"📤 Upload {current_index}/{total_count}", dynamic_ncols=True)

        await app.send_video(
            chat_id=CHAT_ID,
            video=video_path,
            thumb=thumbnail_path,
            caption=filename,
            duration=duration,
            supports_streaming=True,
            progress=progress
        )
        bar.close()
        print("✅ Upload berhasil!")

        waktu_upload = round(time.time() - start_upload, 2)
        await app.send_message(
            chat_id=CHAT_ID,
            text=status_sukses(filename, current_index, total_count, waktu_upload),
            parse_mode=ParseMode.MARKDOWN
        )

        for f in [thumbnail_path, meta_path, video_path]:
            if os.path.exists(f):
                os.remove(f)
                print(f"🗑️ Dihapus: {f}")

    except Exception as e:
        error_msg = str(e) or traceback.format_exc()
        print(f"❌ Error saat upload video {current_index} dari {total_count}: {error_msg}")
        await app.send_message(
            chat_id=CHAT_ID,
            text=status_error(
                escape_md(meta.get("filename", os.path.basename(meta_path))),
                escape_md(error_msg),
                current_index,
                total_count
            ),
            parse_mode=ParseMode.MARKDOWN
        )

# Proses batch upload
async def batch_upload():
    async with Client("upload_bot", api_id=API_ID, api_hash=API_HASH, bot_token=BOT_TOKEN) as app:
        meta_files = sorted(glob.glob(os.path.join(meta_dir, "*_video_meta.json")))
        total = len(meta_files)
        print(f"📦 Menemukan {total} file untuk diunggah.")

        if total == 0:
            await app.send_message(
                chat_id=CHAT_ID,
                text="⚠️ Tidak ada file video yang ditemukan untuk diupload.",
                parse_mode=ParseMode.MARKDOWN
            )
            return

        start_time = time.time()
        total_size_bytes = 0

        for meta_path in meta_files:
            try:
                with open(meta_path, "r") as f:
                    meta = json.load(f)
                    total_size_bytes += os.path.getsize(meta["video_path"])
            except:
                pass

        for idx, meta_path in enumerate(meta_files, start=1):
            await kirim_video(app, meta_path, idx, total)
            await asyncio.sleep(2)

        elapsed = time.time() - start_time
        minutes, seconds = divmod(int(elapsed), 60)
        total_size_mb = total_size_bytes / (1024 * 1024)

        await app.send_message(
            chat_id=CHAT_ID,
            text=f"""✅ Batch Upload Selesai!!

📁 Total File   : {total} video
📦 Total Ukuran : {total_size_mb:.2f} MB
⏱️ Total Waktu  : {minutes} menit {seconds} detik

🎉 Semua video berhasil diupload!
""",
            parse_mode=ParseMode.MARKDOWN
        )

# Jalankan
await batch_upload()

# Utility

In [None]:
# @title 📄 [View Log] Tampilkan isi log yt-dlp
# @markdown - Menampilkan isi file yt_dlp_log.txt (log download video)

log_path = "/content/transkrip_project/logs/yt_dlp_log.txt"  # lokasi log

if os.path.exists(log_path):
    print(f"📁 Menampilkan isi log dari: {log_path}\n")

    with open(log_path, "r", encoding="utf-8", errors="ignore") as f:
        log_lines = f.readlines()

    # Tampilkan hanya 50 baris terakhir (ubah sesuai kebutuhan)
    num_lines = 50
    for line in log_lines[-num_lines:]:
        print(line.rstrip())
else:
    print("❌ File log belum tersedia atau belum dibuat.")

In [None]:
# @title 📑 Tampilkan Ringkasan Transkripsi

# @markdown - Menampilkan Ringkasan dari File JSON atau SRT

transcript_path = ""  # @param {type:"string"}

import os
import json

# Validasi file
if not os.path.exists(transcript_path):
    raise FileNotFoundError(f"❌ File tidak ditemukan: {transcript_path}")

print(f"📑 Menampilkan isi dari: {transcript_path}\n")

# ========= Jika .json (format Whisper) =========
if transcript_path.endswith(".json"):
    with open(transcript_path, "r", encoding="utf-8") as f:
        segments = json.load(f)

    for seg in segments:
        start = round(seg["start"], 2)
        end = round(seg["end"], 2)
        text = seg["text"].strip()
        print(f"[{start} - {end}] {text}")

# ========= Jika .srt (subtitle biasa) =========
elif transcript_path.endswith(".srt"):
    with open(transcript_path, "r", encoding="utf-8") as f:
        lines = f.readlines()

    block = []
    for line in lines:
        line = line.strip()
        if line == "":
            if len(block) >= 3:
                index = block[0]
                timecode = block[1]
                text = " ".join(block[2:])
                print(f"[{timecode}] {text}")
            block = []
        else:
            block.append(line)

else:
    raise ValueError("❌ Format file tidak didukung. Gunakan .json atau .srt.")

In [None]:
# @title 🔍 Deteksi Apakah Audio Lossless atau Lossy
# @markdown - Masukkan path lengkap file audio:
audio_file_path = ""  # @param {type:"string"}

import subprocess
import json
import os

def is_lossless(codec_name):
    # Daftar codec yang umumnya lossless
    lossless_codecs = [
        "flac", "pcm_s16le", "pcm_s24le", "alac", "ape", "wavpack", "tta", "mlp"
    ]
    return codec_name in lossless_codecs

if not os.path.exists(audio_file_path):
    raise FileNotFoundError(f"❌ File tidak ditemukan: {audio_file_path}")

# Jalankan ffprobe untuk dapatkan metadata
cmd = [
    "ffprobe", "-v", "error", "-show_streams", "-print_format", "json", audio_file_path
]
result = subprocess.run(cmd, capture_output=True, text=True)
info = json.loads(result.stdout)

# Ambil stream audio pertama
audio_stream = next((stream for stream in info["streams"] if stream["codec_type"] == "audio"), None)

if audio_stream:
    codec = audio_stream.get("codec_name", "unknown")
    sample_rate = audio_stream.get("sample_rate", "unknown")
    channels = audio_stream.get("channels", "unknown")
    bit_rate = int(audio_stream.get("bit_rate", 0)) if "bit_rate" in audio_stream else None
    duration = float(audio_stream.get("duration", 0.0))

    readable_rate = f"{int(bit_rate)/1000:.1f} kbps" if bit_rate else "Unknown"

    print(f"📄 Informasi Audio:")
    print(f"🔹 Codec       : {codec}")
    print(f"🔹 Sample Rate : {sample_rate} Hz")
    print(f"🔹 Channels    : {channels}")
    print(f"🔹 Bitrate     : {readable_rate}")
    print(f"⏱️ Durasi      : {duration:.2f} detik")

    if is_lossless(codec):
        print("\n✅ Audio ini menggunakan codec lossless.")
    else:
        print("\n⚠️ Audio ini tampaknya **lossy** (kompresi dengan kehilangan data).")
else:
    print("❌ Tidak ada stream audio yang ditemukan dalam file.")

In [None]:
# @title 🎬 [Insert] Embed Subtitle (.srt) → Output MKV
# @markdown - Masukkan path lengkap video dan subtitle (.srt)
video_input_path = ""  # @param {type:"string"}
subtitle_path = ""  # @param {type:"string"}

import os
import subprocess

# Ambil nama asli video tanpa ekstensi
video_basename = os.path.splitext(os.path.basename(video_input_path))[0]

# Output video (mkv)
output_video_path = os.path.join(output_dir, f"{video_basename}_with_subtitles.mkv")

# Jalankan ffmpeg
cmd = [
    "ffmpeg",
    "-i", video_input_path,
    "-i", subtitle_path,
    "-c", "copy",        # tanpa re-encode
    "-c:s", "srt",       # jaga format subtitle .srt
    "-map", "0",
    "-map", "1",
    output_video_path
]

print("🎞️ Menyisipkan subtitle ke video (.mkv, softsub)...")
subprocess.run(cmd, check=True)
print(f"✅ Video akhir disimpan di: {output_video_path}")

In [None]:
# @title 🗂️ [Manual] Kompres Folder Menjadi .zip
# @markdown - Masukkan path folder dan nama file zip output
folder_to_zip = ""  # @param {type:"string"}
zip_filename = ""  # @param {type:"string"}

import shutil
import os

# Lokasi penyimpanan ZIP di dalam transkrip_project
zip_path = os.path.join("/content/transkrip_project", zip_filename)

# Validasi folder input
if not os.path.exists(folder_to_zip):
    raise FileNotFoundError(f"❌ Folder tidak ditemukan: {folder_to_zip}")

# Kompres folder
shutil.make_archive(zip_path.replace(".zip", ""), 'zip', folder_to_zip)
print(f"✅ Folder berhasil dikompres ke ZIP: {zip_path}")

# Tampilkan isi folder yang dikompres
print("\n📂 Daftar isi dalam folder:")
for root, dirs, files in os.walk(folder_to_zip):
    for file in files:
        rel_dir = os.path.relpath(root, folder_to_zip)
        rel_file = os.path.join(rel_dir, file) if rel_dir != "." else file
        print(f"  ├─ {rel_file}")

# Tampilkan ukuran file zip
if os.path.exists(zip_path):
    size_mb = os.path.getsize(zip_path) / (1024 * 1024)
    print(f"\n📦 Ukuran file ZIP: {size_mb:.2f} MB")
else:
    print("⚠️ File ZIP tidak ditemukan.")

In [None]:
# @title 📤 [Manual] Cek & Ekstrak Subtitle Softsub dari Video
# @markdown - Masukkan path video yang ingin dicek
video_with_sub_path = ""  # @param {type:"string"}

import subprocess, os, json

# Lokasi output subtitle
subtitle_extracted_path = os.path.join(output_dir, "extracted_subtitle.srt")

# 1️⃣ Cek dengan ffprobe apakah subtitle stream tersedia
probe_cmd = [
    "ffprobe",
    "-v", "error",
    "-select_streams", "s",
    "-show_entries", "stream=index:stream_tags=language",
    "-of", "json",
    video_with_sub_path
]

probe_result = subprocess.run(probe_cmd, capture_output=True, text=True)

# Parse hasil JSON
streams = json.loads(probe_result.stdout).get("streams", [])

if not streams:
    print("⚠️ Video tidak memiliki stream subtitle (softsub).")
else:
    print(f"✅ Ditemukan {len(streams)} subtitle stream. Mengekstrak stream pertama...")

    # 2️⃣ Jalankan ffmpeg untuk ekstrak subtitle pertama
    extract_cmd = [
        "ffmpeg",
        "-y",
        "-i", video_with_sub_path,
        "-map", "0:s:0",
        subtitle_extracted_path
    ]
    result = subprocess.run(extract_cmd, capture_output=True, text=True)

    if result.returncode == 0 and os.path.exists(subtitle_extracted_path):
        print(f"✅ Subtitle berhasil diekstrak ke: {subtitle_extracted_path}")
    else:
        print("❌ Gagal mengekstrak subtitle.")
        print("📄 FFmpeg log:")
        print(result.stderr)

In [None]:
# @title 🧹 Bersihkan File Setelah Proses
# @markdown Pilih folder yang ingin dibersihkan:
hapus_video = True  # @param {type:"boolean"}
hapus_audio = True  # @param {type:"boolean"}
hapus_output = True  # @param {type:"boolean"}
hapus_logs = True  # @param {type:"boolean"}

import os

def delete_all_files(folder):
    if not os.path.exists(folder):
        print(f"❌ Folder tidak ditemukan: {folder}")
        return

    deleted = False
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        if os.path.isfile(file_path):
            try:
                os.remove(file_path)
                print(f"🗑️ Dihapus: {file_path}")
                deleted = True
            except Exception as e:
                print(f"⚠️ Gagal hapus {file_path}: {e}")
    if not deleted:
        print(f"ℹ️ Tidak ada file yang dihapus di: {folder}")

# Jalankan penghapusan per folder jika dicentang
if hapus_video:
    delete_all_files(video_dir)

if hapus_audio:
    delete_all_files(audio_dir)

if hapus_output:
    delete_all_files(output_dir)

if hapus_logs:
    delete_all_files(log_dir)