In [1]:
# Cài Detectron2
!pip install 'git+https://github.com/facebookresearch/detectron2.git'
!pip install -q setuptools
# Clone và setup DeepSolo++
!git clone https://github.com/kaitoud906/DeepSolo
!git checkout 85593db8b3bac0fc66ee77a95077c448caf08abc

%cd DeepSolo/DeepSolo++
!pip install -r requirements.txt
!python setup.py build develop
%cd ../../..  # Quay lại thư mục gốc






Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-lqhu69fb
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-lqhu69fb
  Resolved https://github.com/facebookresearch/detectron2.git to commit 18f69583391e5040043ca4f4bebd2c60f0ebfde0
  Preparing metadata (setup.py) ... [?25ldone
Collecting pycocotools>=2.0.2 (from detectron2==0.6)
  Downloading pycocotools-2.0.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)
Collecting yacs>=0.1.8 (from detectron2==0.6)
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Collecting fvcore<0.1.6,>=0.1.5 (from detectron2==0.6)
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?2

In [2]:
import sys
import torch
import subprocess

def check_versions():
    print("=" * 50)
    print("SYSTEM INFORMATION")
    print("=" * 50)
    
    # Python version
    print(f"Python version: {sys.version.split()[0]}")
    print(f"Python executable: {sys.executable}")
    
    # PyTorch version
    print(f"PyTorch version: {torch.__version__}")
    
    # CUDA information
    print(f"CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"CUDA version (PyTorch): {torch.version.cuda}")
        print(f"CUDA device count: {torch.cuda.device_count()}")
        print(f"Current CUDA device: {torch.cuda.current_device()}")
        print(f"CUDA device name: {torch.cuda.get_device_name(0)}")
        print(f"CUDA memory allocated: {torch.cuda.memory_allocated(0) / 1024**2:.2f} MB")
        print(f"CUDA memory reserved: {torch.cuda.memory_reserved(0) / 1024**2:.2f} MB")
    
    # GPU information
    try:
        result = subprocess.run(['nvidia-smi', '--query-gpu=gpu_name,driver_version,memory.total', '--format=csv,noheader,nounits'], 
                              capture_output=True, text=True)
        if result.returncode == 0:
            gpu_info = result.stdout.strip().split(', ')
            print(f"GPU: {gpu_info[0]}")
            print(f"Driver version: {gpu_info[1]}")
            print(f"GPU memory: {gpu_info[2]} MB")
    except:
        print("Could not retrieve GPU information")
    
    print("=" * 50)

# Chạy kiểm tra
check_versions()

SYSTEM INFORMATION
Python version: 3.10.13
Python executable: /opt/conda/bin/python3.10
PyTorch version: 2.1.2
CUDA available: True
CUDA version (PyTorch): 12.1
CUDA device count: 2
Current CUDA device: 0
CUDA device name: Tesla T4
CUDA memory allocated: 0.00 MB
CUDA memory reserved: 0.00 MB
GPU: Tesla T4
Driver version: 560.35.03
GPU memory: 15360
Tesla T4 MB


In [None]:
# Cài đặt các thư viện hỗ trợ
!pip install moviepy \
            git+https://github.com/openai/whisper.git \
            opensearch-py \
            requests-aws4auth \
            boto3 \
            nbimporter \
            transformers \
            torch \
            pillow \
            open-clip-torch \
            pymilvus \
            lmdb

In [None]:
!pwd

In [None]:
# Clone và setup Parseq
!git clone https://github.com/baudm/parseq.git
%cd parseq
!make torch-cpu
!pip install -r requirements/core.cpu.txt -e .[train,test]

# (Tuỳ chọn) Cài pip-tools để quản lý yêu cầu phụ thuộc
!pip install pip-tools
!make clean-reqs reqs

In [None]:
!pwd

In [None]:
# In[3]:
# Cell 3: Setup - Import, Config và Các Lớp Quản lý
import cv2
import os
import torch
import open_clip
from PIL import Image
import numpy as np
from tqdm.notebook import tqdm
import time
import whisper
from moviepy.editor import VideoFileClip
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
from opensearchpy import OpenSearch, helpers, RequestsHttpConnection
from kaggle_secrets import UserSecretsClient
from strhub.data.module import SceneTextDataModule
import sys
#sys.path.append("/kaggle/working/DeepSolo/DeepSolo++")
from scene_text_detection import SceneTextDetection

# --- LỚP CẤU HÌNH ---
class Config:
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    DATA_ROOT = "/kaggle/input/aic-l01-02/AIC"
    OUTPUT_DIR = "/kaggle/working/output"
    KEYFRAME_OUTPUT_DIR = os.path.join(OUTPUT_DIR, "temp_keyframes")
    KEYFRAME_THRESHOLD = 0.4
    
    user_secrets = UserSecretsClient()
    MILVUS_URI = user_secrets.get_secret("MILVUS_URI")
    MILVUS_TOKEN = user_secrets.get_secret("MILVUS_TOKEN")
    OPENSEARCH_HOST = user_secrets.get_secret("OPENSEARCH_HOST")
    OPENSEARCH_USERNAME = user_secrets.get_secret("OPENSEARCH_USERNAME")
    OPENSEARCH_PASSWORD = user_secrets.get_secret("OPENSEARCH_PASSWORD")
    
    CLIP_COLLECTION = 'arch_clip_v2'
    BEIT3_COLLECTION = 'arch_beit3_v2'
    OBJECT_COLLECTION = 'arch_object_v2'
    COLOR_COLLECTION = 'arch_color_v2'
    ASR_INDEX = "arch_asr_v2"
    OCR_INDEX = "arch_ocr_v2"
    OBJECT_INDEX = "arch_object_v2"
    COLOR_INDEX = "arch_color_v2"

In [None]:
import os

root_dir = "/kaggle/input/aloaic"
file_count = 0

for root, dirs, files in os.walk(root_dir):
    file_count += len(files)

print(f"Tổng số file: {file_count}")


In [None]:
import os

def print_directory_structure(root_dir, indent=""):
    for item in os.listdir(root_dir):
        path = os.path.join(root_dir, item)
        print(indent + "├── " + item)
        if os.path.isdir(path):
            print_directory_structure(path, indent + "│   ")

# Gọi hàm
root_path = "/kaggle/input/aloaic"
print(f"Cấu trúc thư mục của {root_path}:\n")
print_directory_structure(root_path)


In [None]:
import logging
logger = logging.getLogger("detectron2")
logger.setLevel(logging.INFO)  # hoặc DEBUG nếu muốn nhiều thông tin
for handler in logger.handlers[:]:
    logger.removeHandler(handler)

# Ghi ra file
file_handler = logging.FileHandler("mylog.txt")
file_handler.setLevel(logging.INFO)  # phù hợp với logger
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)

logger.addHandler(file_handler)



from pathlib import Path
import datetime

# Chỉ định thư mục chứa ảnh
input_folder = "/kaggle/input/aloaic/keyframes_output/"

print(f"Processing folder: {input_folder} at {datetime.datetime.now()}")
text_processor.process_folder(input_folder)

In [None]:
folder_name = Path(input_folder).name

command_zip_1 = f'zip -r "/kaggle/working/bbox_pred_{folder_name}.zip" $(find . -type d -wholename "./bbox_pred/{folder_name}*") > /dev/null 2>&1'
!{command_zip_1}

command_zip_2 = f'zip -r "/kaggle/working/bbox_cut_{folder_name}.zip" $(find . -type d -wholename "./bbox_cut/{folder_name}*") > /dev/null 2>&1'
!{command_zip_2}

In [None]:
# command_zip_1 = f'zip -r "/kaggle/working/bbox_pred_{prev_folder}.zip" $(find . -type d -wholename "./bbox_pred/{prev_folder}*") > /dev/null 2>&1'
# !{command_zip_1}
# command_zip_2 = f'zip -r "/kaggle/working/bbox_cut_{prev_folder}.zip" $(find . -type d -wholename "./bbox_cut/{prev_folder}*") > /dev/null 2>&1'
# !{command_zip_2}

In [None]:
# # %cd /kaggle/working
# # # !zip -r "/kaggle/working/L02.zip" $(find . -type d -wholename './bbox_cut/L02*')

# # !zip -r "/kaggle/working/bbox_cut-L08.zip" $(find . -type d -wholename './bbox_cut/L08*')
# # !rm -rf /kaggle/working/bbox_cut/L08
# # !zip -r "/kaggle/working/bbox_pred-L08.zip" $(find . -type d -wholename './bbox_pred/L08*')
# # !rm -rf /kaggle/working/bbox_pred/L08

# # !zip -r "/kaggle/working/bbox_cut-L09.zip" $(find . -type d -wholename './bbox_cut/L09*')
# # !rm -rf /kaggle/working/bbox_cut/L09
# # !zip -r "/kaggle/working/bbox_pred-L09.zip" $(find . -type d -wholename './bbox_pred/L09*')
# # !rm -rf /kaggle/working/bbox_pred/L09

# # !zip -r "/kaggle/working/bbox_cut-L10.zip" $(find . -type d -wholename './bbox_cut/L10*')
# # !rm -rf /kaggle/working/bbox_cut/L10
# # !zip -r "/kaggle/working/bbox_pred-L10.zip" $(find . -type d -wholename './bbox_pred/L10*')
# # !rm -rf /kaggle/working/bbox_pred/L10

# # !zip -r "/kaggle/working/bbox_cut-L11.zip" $(find . -type d -wholename './bbox_cut/L11*')
# # !rm -rf /kaggle/working/bbox_cut/L11
# # !zip -r "/kaggle/working/bbox_pred-L11.zip" $(find . -type d -wholename './bbox_pred/L11*')
# # !rm -rf /kaggle/working/bbox_pred/L11

# # !zip -r "/kaggle/working/bbox_cut-L12.zip" $(find . -type d -wholename './bbox_cut/L12*')
# # !rm -rf /kaggle/working/bbox_cut/L12
# # !zip -r "/kaggle/working/bbox_pred-L12.zip" $(find . -type d -wholename './bbox_pred/L12*')
# # !rm -rf /kaggle/working/bbox_pred/L12