In [None]:
###############################################
# Extract the circumstance from the image
################################################

import requests
import ollama
# Ollama 서버 설정
ollama_host = "http://sg029:11434"
client = ollama.Client(host=ollama_host)  # 클라이언트 인스턴스 생성
prompt = "Analyze the given image and describe the specific actions and interactions of the people in the scene. Focus on what they are doing, their gestures, expressions, and interactions, and provide general details about the environment or objects. Ignore any information about text."

try:
    response = requests.get(ollama_host)
    print("Server connected")
    print(response.text)
    
except requests.ConnectionError:
    print("Not connected")

In [None]:
###############################################
# Test llama vision working
# Must choose a100:2, for llama3.2-vision:90b
# a100:1 works for llama3.2-vision
################################################

import base64

with open("./image3.png", "rb") as img_file:
    base64_image = base64.b64encode(img_file.read()).decode("utf-8")
    
response = client.chat(
    model='llama3.2-vision', # need a100:1 gpu
    # model='llama3.2-vision:90b', # need a100:2 gpu
    # model='llama3.2-vision', # need a100:1 gpu
    messages=[{
        'role': 'user',
        'content': prompt,
        'images': [base64_image]  # Base64 문자열 전달
    }]
)

print(response['message']['content'])

In [16]:
###############################################
# Test llama vision working
# Must choose a100:2, for llama3.2-vision:90b
# a100:1 works for llama3.2-vision
################################################

import base64

with open("./image3.png", "rb") as img_file:
    base64_image = base64.b64encode(img_file.read()).decode("utf-8")
    
response = client.chat(
    # model='llama3.2-vision', # need a100:1 gpu
    model='llama3.2-vision:90b', # need a100:2 gpu
    # model='llama3.2-vision', # need a100:1 gpu
    messages=[{
        'role': 'user',
        'content': prompt,
        'images': [base64_image]  # Base64 문자열 전달
    }]
)

print(response['message']['content'])

The image shows a man and woman sitting on a deck with a dog lying between them. The purpose of the image is to depict a relaxed outdoor scene.

* A man:
	+ Sitting on the left side of the image
	+ Wearing a light-colored shirt
	+ Has dark hair
	+ Looking down at the dog
* A woman:
	+ Sitting on the right side of the image
	+ Wearing a floral dress
	+ Has long brown hair
	+ Petting the dog
* A dog:
	+ Lying between the man and woman
	+ Small to medium size
	+ Brown fur

The image suggests that the couple is enjoying a leisurely moment together, possibly during a picnic or outdoor gathering. The presence of the dog adds a sense of warmth and companionship to the scene.


In [80]:
#######################
# Check all the images in one drama folder
# Read and analyze through llama3.2-vision
#######################

import os
import json
import base64
import ollama
import time
import traceback
from pathlib import Path
from datetime import datetime

# 경로 설정
drama_folder_name = "물가 급등한 발리, 집 구하고 살아본 솔직 후기"
image_dir = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Images/{drama_folder_name}')
output_file = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/{drama_folder_name}.json')

#######################
# model choose
#######################
used_model = "llama3.2-vision"

# 출력 디렉토리 생성
output_file.parent.mkdir(parents=True, exist_ok=True)

# 로깅 설정
def log(message, level="INFO"):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"[{timestamp}] [{level}] {message}")

# 초기화
results = []
VALID_EXTENSIONS = ('.png', '.jpg', '.jpeg')
total_images = len([f for f in image_dir.iterdir() if f.suffix.lower() in VALID_EXTENSIONS])
processed = 0

log(f"Starting image processing for {total_images} images")

# 이미지 처리
for image_path in image_dir.iterdir():
    if not (image_path.is_file() and image_path.suffix.lower() in VALID_EXTENSIONS):
        continue

    processed += 1
    log(f"Processing image ({processed}/{total_images}): {image_path.name}")
    start_time = time.time()
    
    try:
        # 이미지 인코딩
        encode_start = time.time()
        with open(image_path, "rb") as img_file:
            base64_image = base64.b64encode(img_file.read()).decode("utf-8")
        encode_time = time.time() - encode_start
        log(f"Image encoded in {encode_time:.2f}s")

        # API 요청
        api_start = time.time()
        used_model = "llama3.2-vision"
        response = client.chat(
        ###################### Choose Model ###################
            model="llama3.2-vision",
            # "llama3.2-vision:90b"
            # llama3.2-vision"
            messages=[{
                'role': 'user',
                'content': prompt,
                'images': [base64_image]
            }]
        )
        api_time = time.time() - api_start
        log(f"API response received in {api_time:.2f}s")

        # 결과 저장
        results.append({
            'image': str(image_path),
            'response': response['message']['content'],
            'processing_time': {
                'encoding': encode_time,
                'api_call': api_time,
                'total': time.time() - start_time
            },
            'status': 'success'
        })

    except Exception as e:
        error_msg = f"Error processing {image_path.name}: {str(e)}"
        error_trace = traceback.format_exc()
        log(error_msg, "ERROR")
        log(f"Error details:\n{error_trace}", "DEBUG")
        
        results.append({
            'image': str(image_path),
            'error': error_msg,
            'error_trace': error_trace,
            'status': 'failed'
        })

# 결과 저장
save_start = time.time()
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)
save_time = time.time() - save_start

log("=========================================")
log(f"Results saved to {output_file} in {save_time:.2f}s")
log(f"Processing completed. Success: {len([x for x in results if x['status']=='success'])}, Failed: {len([x for x in results if x['status']=='failed'])}")
log("=========================================")

[2025-03-08 17:16:38] [INFO] Starting image processing for 306 images
[2025-03-08 17:16:38] [INFO] Processing image (1/306): frame_0155.png
[2025-03-08 17:16:38] [INFO] Image encoded in 0.06s
[2025-03-08 17:16:43] [INFO] API response received in 4.77s
[2025-03-08 17:16:43] [INFO] Processing image (2/306): frame_0062.png
[2025-03-08 17:16:43] [INFO] Image encoded in 0.03s
[2025-03-08 17:16:45] [INFO] API response received in 2.20s
[2025-03-08 17:16:45] [INFO] Processing image (3/306): frame_0079.png
[2025-03-08 17:16:45] [INFO] Image encoded in 0.04s
[2025-03-08 17:16:47] [INFO] API response received in 2.34s
[2025-03-08 17:16:47] [INFO] Processing image (4/306): frame_0076.png
[2025-03-08 17:16:48] [INFO] Image encoded in 0.09s
[2025-03-08 17:16:50] [INFO] API response received in 2.83s
[2025-03-08 17:16:50] [INFO] Processing image (5/306): frame_0117.png
[2025-03-08 17:16:50] [INFO] Image encoded in 0.06s
[2025-03-08 17:16:55] [INFO] API response received in 4.78s
[2025-03-08 17:16:55

In [81]:
#######################
# Organize by frame number
#######################

import json
from pathlib import Path

# 파일 경로 설정
input_file = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/{drama_folder_name}.json')
output_file = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/{drama_folder_name}_organized.json')

# JSON 파일 읽기
with open(input_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 아이템 처리 함수
def process_item(item):
    try:
        filename = Path(item['image']).name
        
        # 파일명 구조: frame_0001.png → ['frame', '0001.png']
        parts = filename.split('_')
        
        # 숫자 부분 추출 (frame_0001.png → 0001)
        frame_number = int(parts[1].split('.')[0])  # frame_0001.png → 0001
        
        return {
            'image': filename,
            'frame_number': frame_number,
            'response': item['response'],
            'status': item['status']
        }
    except Exception as e:
        print(f"파일명 형식 오류: {filename} → {str(e)}")
        return None

# 데이터 처리 (오류 항목 필터링)
processed_data = [item for item in (process_item(i) for i in data) if item is not None]

# 숫자 순으로 정렬 (frame_number 기준)
sorted_data = sorted(processed_data, key=lambda x: x['frame_number'])

# 최종 출력 형식
final_data = [
    {
        'used_model': 'llama3.2-vision',  # 모델 이름을 하드코딩 (필요 시 수정)
        'image': sorted_data[0]['image'],
        'response': sorted_data[0]['response'],
    }
] + [
    {
        'image': item['image'],
        'response': item['response'],
    }
    for item in sorted_data[1:]
]

# JSON 파일로 저장
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(final_data, f, ensure_ascii=False, indent=4)
    
# 파일 삭제
if input_file.exists():  # 파일이 존재하는지 확인
    input_file.unlink()  # 파일 삭제
    print(f"파일 제거: {input_file}")
else:
    print(f"{input_file} 파일이 존재하지 않습니다.")
    
print(f"정렬 완료! 결과 파일: {output_file}")

파일 제거: /scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/물가 급등한 발리, 집 구하고 살아본 솔직 후기.json
정렬 완료! 결과 파일: /scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/물가 급등한 발리, 집 구하고 살아본 솔직 후기_organized.json


In [82]:
################################################
# Merging llama-vision result + subtitle + timestamp
################################################
import json
from pathlib import Path


output_file = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/{drama_folder_name}_organized.json')


# llama_vision_data.json 파일 로드
with open(output_file, "r", encoding="utf-8") as f:
    llama_vision_data = json.load(f)

subtitle = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Subtitles/{drama_folder_name}_organized.ko.json')
# subtitle.json 파일 로드
with open(subtitle, "r", encoding="utf-8") as f:
    subtitle_data = json.load(f)

# final_output.json으로 저장할 데이터 리스트 초기화
dataset = []

# 두 파일의 데이터를 매칭하여 dataset 생성
for result_item, subtitle_item in zip(llama_vision_data, subtitle_data):
    input_text = subtitle_item.get("context", "")  # subtitle_data.json의 "context"를 input으로
    timestamp = subtitle_item.get("timestamp", "")  # subtitle_data.json의 "timestamp"를 timestamp
    output_text = result_item.get("response", "")  # llama_vision_data.json의 "response"를 output으로
    
    # input, output, timestamp가 모두 비어있지 않은 경우만 추가
    if input_text and output_text:
        dataset.append({"timestamp": timestamp, "input": input_text, "output": output_text})


final_output = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Final/{drama_folder_name}_final.json')
# dataset.json 파일로 저장
with open(final_output, "w", encoding="utf-8") as f:
    json.dump(dataset, f, ensure_ascii=False, indent=4)

print(f"/Data_Final/{drama_folder_name}_final.json 파일이 생성되었습니다.")

/Data_Final/물가 급등한 발리, 집 구하고 살아본 솔직 후기_final.json 파일이 생성되었습니다.


In [83]:
###############################################
# Reverse input <-> output
# Professor suggestion
################################################

import json
from pathlib import Path

# final_output 파일 경로 설정
final_output = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Final/{drama_folder_name}_final.json')

# final_output에서 데이터 읽기
with open(final_output, "r", encoding="utf-8") as f:
    dataset = json.load(f)

# input과 output 값을 교환하는 작업
reversed_dataset = []
for item in dataset:
    # input과 output을 서로 바꾸기
    reversed_item = {
        'timestamp': item['timestamp'],
        'input': item['output'],  # output을 input으로
        'output': item['input'],  # input을 output으로
    }
    reversed_dataset.append(reversed_item)

# 기존 dataset과 reversed_dataset을 합치기
combined_dataset = dataset + reversed_dataset

# reversed_final_output 파일 경로 설정
reversed_final_output = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Final_Reversed/{drama_folder_name}_reversed_final.json')

# combined dataset을 새로운 JSON 파일로 저장
with open(reversed_final_output, "w", encoding="utf-8") as f:
    json.dump(combined_dataset, f, ensure_ascii=False, indent=4)

print(f"/Data_Final_Reversed/{drama_folder_name}_reversed_final.json 파일이 생성되었습니다.")


/Data_Final_Reversed/물가 급등한 발리, 집 구하고 살아본 솔직 후기_reversed_final.json 파일이 생성되었습니다.


In [None]:
# second start

In [84]:
#######################
# Check all the images in one drama folder
# Read and analyze through llama3.2-vision
#######################

import os
import json
import base64
import ollama
import time
import traceback
from pathlib import Path
from datetime import datetime

# 경로 설정
drama_folder_name = "미국 음식 먹는 브이로그"
image_dir = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Images/{drama_folder_name}')
output_file = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/{drama_folder_name}.json')

#######################
# model choose
#######################
used_model = "llama3.2-vision"

# 출력 디렉토리 생성
output_file.parent.mkdir(parents=True, exist_ok=True)

# 로깅 설정
def log(message, level="INFO"):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"[{timestamp}] [{level}] {message}")

# 초기화
results = []
VALID_EXTENSIONS = ('.png', '.jpg', '.jpeg')
total_images = len([f for f in image_dir.iterdir() if f.suffix.lower() in VALID_EXTENSIONS])
processed = 0

log(f"Starting image processing for {total_images} images")

# 이미지 처리
for image_path in image_dir.iterdir():
    if not (image_path.is_file() and image_path.suffix.lower() in VALID_EXTENSIONS):
        continue

    processed += 1
    log(f"Processing image ({processed}/{total_images}): {image_path.name}")
    start_time = time.time()
    
    try:
        # 이미지 인코딩
        encode_start = time.time()
        with open(image_path, "rb") as img_file:
            base64_image = base64.b64encode(img_file.read()).decode("utf-8")
        encode_time = time.time() - encode_start
        log(f"Image encoded in {encode_time:.2f}s")

        # API 요청
        api_start = time.time()
        used_model = "llama3.2-vision"
        response = client.chat(
        ###################### Choose Model ###################
            model="llama3.2-vision",
            # "llama3.2-vision:90b"
            # llama3.2-vision"
            messages=[{
                'role': 'user',
                'content': prompt,
                'images': [base64_image]
            }]
        )
        api_time = time.time() - api_start
        log(f"API response received in {api_time:.2f}s")

        # 결과 저장
        results.append({
            'image': str(image_path),
            'response': response['message']['content'],
            'processing_time': {
                'encoding': encode_time,
                'api_call': api_time,
                'total': time.time() - start_time
            },
            'status': 'success'
        })

    except Exception as e:
        error_msg = f"Error processing {image_path.name}: {str(e)}"
        error_trace = traceback.format_exc()
        log(error_msg, "ERROR")
        log(f"Error details:\n{error_trace}", "DEBUG")
        
        results.append({
            'image': str(image_path),
            'error': error_msg,
            'error_trace': error_trace,
            'status': 'failed'
        })

# 결과 저장
save_start = time.time()
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)
save_time = time.time() - save_start

log("=========================================")
log(f"Results saved to {output_file} in {save_time:.2f}s")
log(f"Processing completed. Success: {len([x for x in results if x['status']=='success'])}, Failed: {len([x for x in results if x['status']=='failed'])}")
log("=========================================")

[2025-03-08 17:32:00] [INFO] Starting image processing for 335 images
[2025-03-08 17:32:00] [INFO] Processing image (1/335): frame_0221.png
[2025-03-08 17:32:00] [INFO] Image encoded in 0.05s
[2025-03-08 17:32:05] [INFO] API response received in 4.49s
[2025-03-08 17:32:05] [INFO] Processing image (2/335): frame_0019.png
[2025-03-08 17:32:05] [INFO] Image encoded in 0.12s
[2025-03-08 17:32:07] [INFO] API response received in 2.46s
[2025-03-08 17:32:07] [INFO] Processing image (3/335): frame_0024.png
[2025-03-08 17:32:07] [INFO] Image encoded in 0.03s
[2025-03-08 17:32:09] [INFO] API response received in 1.95s
[2025-03-08 17:32:09] [INFO] Processing image (4/335): frame_0295.png
[2025-03-08 17:32:09] [INFO] Image encoded in 0.07s
[2025-03-08 17:32:13] [INFO] API response received in 3.66s
[2025-03-08 17:32:13] [INFO] Processing image (5/335): frame_0142.png
[2025-03-08 17:32:13] [INFO] Image encoded in 0.05s
[2025-03-08 17:32:16] [INFO] API response received in 2.59s
[2025-03-08 17:32:16

In [85]:
#######################
# Organize by frame number
#######################

import json
from pathlib import Path

# 파일 경로 설정
input_file = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/{drama_folder_name}.json')
output_file = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/{drama_folder_name}_organized.json')

# JSON 파일 읽기
with open(input_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 아이템 처리 함수
def process_item(item):
    try:
        filename = Path(item['image']).name
        
        # 파일명 구조: frame_0001.png → ['frame', '0001.png']
        parts = filename.split('_')
        
        # 숫자 부분 추출 (frame_0001.png → 0001)
        frame_number = int(parts[1].split('.')[0])  # frame_0001.png → 0001
        
        return {
            'image': filename,
            'frame_number': frame_number,
            'response': item['response'],
            'status': item['status']
        }
    except Exception as e:
        print(f"파일명 형식 오류: {filename} → {str(e)}")
        return None

# 데이터 처리 (오류 항목 필터링)
processed_data = [item for item in (process_item(i) for i in data) if item is not None]

# 숫자 순으로 정렬 (frame_number 기준)
sorted_data = sorted(processed_data, key=lambda x: x['frame_number'])

# 최종 출력 형식
final_data = [
    {
        'used_model': 'llama3.2-vision',  # 모델 이름을 하드코딩 (필요 시 수정)
        'image': sorted_data[0]['image'],
        'response': sorted_data[0]['response'],
    }
] + [
    {
        'image': item['image'],
        'response': item['response'],
    }
    for item in sorted_data[1:]
]

# JSON 파일로 저장
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(final_data, f, ensure_ascii=False, indent=4)
    
# 파일 삭제
if input_file.exists():  # 파일이 존재하는지 확인
    input_file.unlink()  # 파일 삭제
    print(f"파일 제거: {input_file}")
else:
    print(f"{input_file} 파일이 존재하지 않습니다.")
    
print(f"정렬 완료! 결과 파일: {output_file}")

파일 제거: /scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/미국 음식 먹는 브이로그.json
정렬 완료! 결과 파일: /scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/미국 음식 먹는 브이로그_organized.json


In [86]:
################################################
# Merging llama-vision result + subtitle + timestamp
################################################
import json
from pathlib import Path


output_file = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/{drama_folder_name}_organized.json')


# llama_vision_data.json 파일 로드
with open(output_file, "r", encoding="utf-8") as f:
    llama_vision_data = json.load(f)

subtitle = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Subtitles/{drama_folder_name}_organized.ko.json')
# subtitle.json 파일 로드
with open(subtitle, "r", encoding="utf-8") as f:
    subtitle_data = json.load(f)

# final_output.json으로 저장할 데이터 리스트 초기화
dataset = []

# 두 파일의 데이터를 매칭하여 dataset 생성
for result_item, subtitle_item in zip(llama_vision_data, subtitle_data):
    input_text = subtitle_item.get("context", "")  # subtitle_data.json의 "context"를 input으로
    timestamp = subtitle_item.get("timestamp", "")  # subtitle_data.json의 "timestamp"를 timestamp
    output_text = result_item.get("response", "")  # llama_vision_data.json의 "response"를 output으로
    
    # input, output, timestamp가 모두 비어있지 않은 경우만 추가
    if input_text and output_text:
        dataset.append({"timestamp": timestamp, "input": input_text, "output": output_text})


final_output = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Final/{drama_folder_name}_final.json')
# dataset.json 파일로 저장
with open(final_output, "w", encoding="utf-8") as f:
    json.dump(dataset, f, ensure_ascii=False, indent=4)

print(f"/Data_Final/{drama_folder_name}_final.json 파일이 생성되었습니다.")

/Data_Final/미국 음식 먹는 브이로그_final.json 파일이 생성되었습니다.


In [87]:
###############################################
# Reverse input <-> output
# Professor suggestion
################################################

import json
from pathlib import Path

# final_output 파일 경로 설정
final_output = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Final/{drama_folder_name}_final.json')

# final_output에서 데이터 읽기
with open(final_output, "r", encoding="utf-8") as f:
    dataset = json.load(f)

# input과 output 값을 교환하는 작업
reversed_dataset = []
for item in dataset:
    # input과 output을 서로 바꾸기
    reversed_item = {
        'timestamp': item['timestamp'],
        'input': item['output'],  # output을 input으로
        'output': item['input'],  # input을 output으로
    }
    reversed_dataset.append(reversed_item)

# 기존 dataset과 reversed_dataset을 합치기
combined_dataset = dataset + reversed_dataset

# reversed_final_output 파일 경로 설정
reversed_final_output = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Final_Reversed/{drama_folder_name}_reversed_final.json')

# combined dataset을 새로운 JSON 파일로 저장
with open(reversed_final_output, "w", encoding="utf-8") as f:
    json.dump(combined_dataset, f, ensure_ascii=False, indent=4)

print(f"/Data_Final_Reversed/{drama_folder_name}_reversed_final.json 파일이 생성되었습니다.")


/Data_Final_Reversed/미국 음식 먹는 브이로그_reversed_final.json 파일이 생성되었습니다.


In [88]:
# third start

In [89]:
#######################
# Check all the images in one drama folder
# Read and analyze through llama3.2-vision
#######################

import os
import json
import base64
import ollama
import time
import traceback
from pathlib import Path
from datetime import datetime

# 경로 설정
drama_folder_name = "미래에 간다면 이런 느낌일까"
image_dir = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Images/{drama_folder_name}')
output_file = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/{drama_folder_name}.json')

#######################
# model choose
#######################
used_model = "llama3.2-vision"

# 출력 디렉토리 생성
output_file.parent.mkdir(parents=True, exist_ok=True)

# 로깅 설정
def log(message, level="INFO"):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"[{timestamp}] [{level}] {message}")

# 초기화
results = []
VALID_EXTENSIONS = ('.png', '.jpg', '.jpeg')
total_images = len([f for f in image_dir.iterdir() if f.suffix.lower() in VALID_EXTENSIONS])
processed = 0

log(f"Starting image processing for {total_images} images")

# 이미지 처리
for image_path in image_dir.iterdir():
    if not (image_path.is_file() and image_path.suffix.lower() in VALID_EXTENSIONS):
        continue

    processed += 1
    log(f"Processing image ({processed}/{total_images}): {image_path.name}")
    start_time = time.time()
    
    try:
        # 이미지 인코딩
        encode_start = time.time()
        with open(image_path, "rb") as img_file:
            base64_image = base64.b64encode(img_file.read()).decode("utf-8")
        encode_time = time.time() - encode_start
        log(f"Image encoded in {encode_time:.2f}s")

        # API 요청
        api_start = time.time()
        used_model = "llama3.2-vision"
        response = client.chat(
        ###################### Choose Model ###################
            model="llama3.2-vision",
            # "llama3.2-vision:90b"
            # llama3.2-vision"
            messages=[{
                'role': 'user',
                'content': prompt,
                'images': [base64_image]
            }]
        )
        api_time = time.time() - api_start
        log(f"API response received in {api_time:.2f}s")

        # 결과 저장
        results.append({
            'image': str(image_path),
            'response': response['message']['content'],
            'processing_time': {
                'encoding': encode_time,
                'api_call': api_time,
                'total': time.time() - start_time
            },
            'status': 'success'
        })

    except Exception as e:
        error_msg = f"Error processing {image_path.name}: {str(e)}"
        error_trace = traceback.format_exc()
        log(error_msg, "ERROR")
        log(f"Error details:\n{error_trace}", "DEBUG")
        
        results.append({
            'image': str(image_path),
            'error': error_msg,
            'error_trace': error_trace,
            'status': 'failed'
        })

# 결과 저장
save_start = time.time()
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)
save_time = time.time() - save_start

log("=========================================")
log(f"Results saved to {output_file} in {save_time:.2f}s")
log(f"Processing completed. Success: {len([x for x in results if x['status']=='success'])}, Failed: {len([x for x in results if x['status']=='failed'])}")
log("=========================================")

[2025-03-08 17:47:52] [INFO] Starting image processing for 471 images
[2025-03-08 17:47:52] [INFO] Processing image (1/471): frame_0385.png
[2025-03-08 17:47:52] [INFO] Image encoded in 0.04s
[2025-03-08 17:47:55] [INFO] API response received in 3.34s
[2025-03-08 17:47:55] [INFO] Processing image (2/471): frame_0040.png
[2025-03-08 17:47:55] [INFO] Image encoded in 0.05s
[2025-03-08 17:47:58] [INFO] API response received in 2.61s
[2025-03-08 17:47:58] [INFO] Processing image (3/471): frame_0132.png
[2025-03-08 17:47:58] [INFO] Image encoded in 0.06s
[2025-03-08 17:48:01] [INFO] API response received in 2.54s
[2025-03-08 17:48:01] [INFO] Processing image (4/471): frame_0042.png
[2025-03-08 17:48:01] [INFO] Image encoded in 0.05s
[2025-03-08 17:48:04] [INFO] API response received in 3.09s
[2025-03-08 17:48:04] [INFO] Processing image (5/471): frame_0378.png
[2025-03-08 17:48:04] [INFO] Image encoded in 0.11s
[2025-03-08 17:48:07] [INFO] API response received in 3.10s
[2025-03-08 17:48:07

In [90]:
#######################
# Organize by frame number
#######################

import json
from pathlib import Path

# 파일 경로 설정
input_file = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/{drama_folder_name}.json')
output_file = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/{drama_folder_name}_organized.json')

# JSON 파일 읽기
with open(input_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 아이템 처리 함수
def process_item(item):
    try:
        filename = Path(item['image']).name
        
        # 파일명 구조: frame_0001.png → ['frame', '0001.png']
        parts = filename.split('_')
        
        # 숫자 부분 추출 (frame_0001.png → 0001)
        frame_number = int(parts[1].split('.')[0])  # frame_0001.png → 0001
        
        return {
            'image': filename,
            'frame_number': frame_number,
            'response': item['response'],
            'status': item['status']
        }
    except Exception as e:
        print(f"파일명 형식 오류: {filename} → {str(e)}")
        return None

# 데이터 처리 (오류 항목 필터링)
processed_data = [item for item in (process_item(i) for i in data) if item is not None]

# 숫자 순으로 정렬 (frame_number 기준)
sorted_data = sorted(processed_data, key=lambda x: x['frame_number'])

# 최종 출력 형식
final_data = [
    {
        'used_model': 'llama3.2-vision',  # 모델 이름을 하드코딩 (필요 시 수정)
        'image': sorted_data[0]['image'],
        'response': sorted_data[0]['response'],
    }
] + [
    {
        'image': item['image'],
        'response': item['response'],
    }
    for item in sorted_data[1:]
]

# JSON 파일로 저장
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(final_data, f, ensure_ascii=False, indent=4)
    
# 파일 삭제
if input_file.exists():  # 파일이 존재하는지 확인
    input_file.unlink()  # 파일 삭제
    print(f"파일 제거: {input_file}")
else:
    print(f"{input_file} 파일이 존재하지 않습니다.")
    
print(f"정렬 완료! 결과 파일: {output_file}")

파일 제거: /scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/미래에 간다면 이런 느낌일까.json
정렬 완료! 결과 파일: /scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/미래에 간다면 이런 느낌일까_organized.json


In [91]:
################################################
# Merging llama-vision result + subtitle + timestamp
################################################
import json
from pathlib import Path


output_file = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_llama_vision/{drama_folder_name}_organized.json')


# llama_vision_data.json 파일 로드
with open(output_file, "r", encoding="utf-8") as f:
    llama_vision_data = json.load(f)

subtitle = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Subtitles/{drama_folder_name}_organized.ko.json')
# subtitle.json 파일 로드
with open(subtitle, "r", encoding="utf-8") as f:
    subtitle_data = json.load(f)

# final_output.json으로 저장할 데이터 리스트 초기화
dataset = []

# 두 파일의 데이터를 매칭하여 dataset 생성
for result_item, subtitle_item in zip(llama_vision_data, subtitle_data):
    input_text = subtitle_item.get("context", "")  # subtitle_data.json의 "context"를 input으로
    timestamp = subtitle_item.get("timestamp", "")  # subtitle_data.json의 "timestamp"를 timestamp
    output_text = result_item.get("response", "")  # llama_vision_data.json의 "response"를 output으로
    
    # input, output, timestamp가 모두 비어있지 않은 경우만 추가
    if input_text and output_text:
        dataset.append({"timestamp": timestamp, "input": input_text, "output": output_text})


final_output = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Final/{drama_folder_name}_final.json')
# dataset.json 파일로 저장
with open(final_output, "w", encoding="utf-8") as f:
    json.dump(dataset, f, ensure_ascii=False, indent=4)

print(f"/Data_Final/{drama_folder_name}_final.json 파일이 생성되었습니다.")

/Data_Final/미래에 간다면 이런 느낌일까_final.json 파일이 생성되었습니다.


In [92]:
###############################################
# Reverse input <-> output
# Professor suggestion
################################################

import json
from pathlib import Path

# final_output 파일 경로 설정
final_output = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Final/{drama_folder_name}_final.json')

# final_output에서 데이터 읽기
with open(final_output, "r", encoding="utf-8") as f:
    dataset = json.load(f)

# input과 output 값을 교환하는 작업
reversed_dataset = []
for item in dataset:
    # input과 output을 서로 바꾸기
    reversed_item = {
        'timestamp': item['timestamp'],
        'input': item['output'],  # output을 input으로
        'output': item['input'],  # input을 output으로
    }
    reversed_dataset.append(reversed_item)

# 기존 dataset과 reversed_dataset을 합치기
combined_dataset = dataset + reversed_dataset

# reversed_final_output 파일 경로 설정
reversed_final_output = Path(f'/scratch/jsong132/Can_LLM_Learn_New_Language/Data_Final_Reversed/{drama_folder_name}_reversed_final.json')

# combined dataset을 새로운 JSON 파일로 저장
with open(reversed_final_output, "w", encoding="utf-8") as f:
    json.dump(combined_dataset, f, ensure_ascii=False, indent=4)

print(f"/Data_Final_Reversed/{drama_folder_name}_reversed_final.json 파일이 생성되었습니다.")


/Data_Final_Reversed/미래에 간다면 이런 느낌일까_reversed_final.json 파일이 생성되었습니다.


In [10]:
###############################################
# Don't need to do
# Extra, this is not a main process
# Make .json file readible
# json_pretty.py
# Make json file readible.
################################################

import json
import textwrap

# JSON 파일 로드
with open(final_output, 'r', encoding='utf-8') as f:
    data = json.load(f)

# response 값을 50자씩 나누어 배열로 분할
for item in data:
    if 'response' in item:
        # 50자씩 나누기
        wrapped_text = textwrap.wrap(item['response'], width=100)
        item['response'] = wrapped_text

# 수정된 JSON 파일 저장
with open(output_pretty, 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=4)

print("JSON 파일이 수정되어 저장되었습니다.")


JSON 파일이 수정되어 저장되었습니다.
