In [1]:
###############################################
# Extract the circumstance from the image
################################################

import requests
import ollama
# Ollama 서버 설정
ollama_host = "http://sg027:11434"
client = ollama.Client(host=ollama_host)  # 클라이언트 인스턴스 생성
prompt = "Analyze the given image and describe the specific actions and interactions of the people in this circumstance.\
Focus on what they are doing, their gestures, expressions, and interactions, and provide general details about the environment or objects. \
Guess what kind of conversation might typically occur in this situation. Ignore any information about text."

try:
    response = requests.get(ollama_host)
    print("Server connected")
    print(response.text)
    
except requests.ConnectionError:
    print("Not connected")

Server connected
Ollama is running


In [3]:
###############################################
# Test llama vision working
# Must choose a100:2, for llama3.2-vision:90b
# a100:1 works for llama3.2-vision
################################################

import base64

with open("./Data_Images/고물가시대 혜자로운 코스트코 장보기/frame_0009.png", "rb") as img_file:
    base64_image = base64.b64encode(img_file.read()).decode("utf-8")
    
response = client.chat(
    model='llama3.2-vision', # need a100:1 gpu
    # model='llama3.2-vision:90b', # need a100:2 gpu
    messages=[{
        'role': 'user',
        'content': prompt,
        'images': [base64_image]  # Base64 문자열 전달
    }]
)

print(response['message']['content'])

In this serene image, a couple is situated within a cozy living room or studio apartment. The woman stands at the edge of an orange couch with a white blanket draped over it, while the man stands by the sliding glass door on the right side of the picture.

On the left wall, two chairs are positioned in front of a wooden table and a large, modern TV is mounted against the wall. A small coffee table sits between the sofa and the woman's feet, creating a comfortable seating area. The beige carpet adds warmth to the space. Outside the glass door, tall buildings can be seen through the sheer white curtains.

The overall atmosphere suggests that this couple may be enjoying some quality time together, possibly relaxing or watching TV in their home.


In [4]:
###############################################
# Test llama vision working
# Must choose a100:2, for llama3.2-vision:90b
# a100:1 works for llama3.2-vision
################################################
response = client.chat(
    # model='llama3.2-vision', # need a100:1 gpu
    model='llama3.2-vision:90b', # need a100:2 gpu
    messages=[{
        'role': 'user',
        'content': prompt,
        'images': [base64_image]  # Base64 문자열 전달
    }]
)

print(response['message']['content'])

The image shows a couple standing by an open balcony door in a modern apartment.

In the foreground, on the right side of the image, the woman is wearing a light green dress and has her hands outstretched towards the balcony. The man is to her left, wearing a dark t-shirt. 

To their left are two chairs and a small table with a bottle of champagne and glasses. In front of them is a large TV screen on a wall-mounted stand.

The background features a city skyline at night, with tall buildings and bright lights visible through the open balcony door. 

Overall, the image suggests that the couple is celebrating a special occasion or enjoying some time together in their luxurious apartment.


In [None]:
#######################
# Check all the images in one drama folder
# Read and analyze through llama3.2-vision
#######################

import os
import json
import base64
import ollama
import time
import traceback
from pathlib import Path
from datetime import datetime

# 경로 설정
base_path = "/scratch/jsong132/Can_LLM_Learn_New_Language"
drama_folder_name = "100명, 100개의 동그라미, 50만 달러"
version = "v2"

image_dir = Path(f'{base_path}/Data_Images/{drama_folder_name}')
output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}.json')

#######################
# model choose
#######################
used_model = "llama3.2-vision"

# 출력 디렉토리 생성
output_file.parent.mkdir(parents=True, exist_ok=True)

# 로깅 설정
def log(message, level="INFO"):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"[{timestamp}] [{level}] {message}")

# 초기화
results = []
VALID_EXTENSIONS = ('.png', '.jpg', '.jpeg')
total_images = len([f for f in image_dir.iterdir() if f.suffix.lower() in VALID_EXTENSIONS])
processed = 0

log(f"Starting image processing for {total_images} images")

# 이미지 처리
for image_path in image_dir.iterdir():
    if not (image_path.is_file() and image_path.suffix.lower() in VALID_EXTENSIONS):
        continue

    processed += 1
    log(f"Processing image ({processed}/{total_images}): {image_path.name}")
    start_time = time.time()
    
    try:
        # 이미지 인코딩
        encode_start = time.time()
        with open(image_path, "rb") as img_file:
            base64_image = base64.b64encode(img_file.read()).decode("utf-8")
        encode_time = time.time() - encode_start
        log(f"Image encoded in {encode_time:.2f}s")

        # API 요청
        api_start = time.time()
        used_model = "llama3.2-vision"
        response = client.chat(
        ###################### Choose Model ###################
            model="llama3.2-vision",
            # "llama3.2-vision:90b"
            # llama3.2-vision"
            messages=[{
                'role': 'user',
                'content': prompt,
                'images': [base64_image]
            }]
        )
        api_time = time.time() - api_start
        log(f"API response received in {api_time:.2f}s")

        # 결과 저장
        results.append({
            'image': str(image_path),
            'response': response['message']['content'],
            'processing_time': {
                'encoding': encode_time,
                'api_call': api_time,
                'total': time.time() - start_time
            },
            'status': 'success'
        })

    except Exception as e:
        error_msg = f"Error processing {image_path.name}: {str(e)}"
        error_trace = traceback.format_exc()
        log(error_msg, "ERROR")
        log(f"Error details:\n{error_trace}", "DEBUG")
        
        results.append({
            'image': str(image_path),
            'error': error_msg,
            'error_trace': error_trace,
            'status': 'failed'
        })

# 결과 저장
save_start = time.time()
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)
save_time = time.time() - save_start

log("=========================================")
log(f"Results saved to {output_file} in {save_time:.2f}s")
log(f"Processing completed. Success: {len([x for x in results if x['status']=='success'])}, Failed: {len([x for x in results if x['status']=='failed'])}")
log("=========================================")

#######################
# Organize by frame number
#######################

import json
from pathlib import Path

# 파일 경로 설정
input_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}.json')
output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}_organized.json')

# JSON 파일 읽기
with open(input_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 아이템 처리 함수
def process_item(item):
    try:
        filename = Path(item['image']).name
        
        # 파일명 구조: frame_0001.png → ['frame', '0001.png']
        parts = filename.split('_')
        
        # 숫자 부분 추출 (frame_0001.png → 0001)
        frame_number = int(parts[1].split('.')[0])  # frame_0001.png → 0001
        
        return {
            'image': filename,
            'frame_number': frame_number,
            'response': item['response'],
            'status': item['status']
        }
    except Exception as e:
        print(f"파일명 형식 오류: {filename} → {str(e)}")
        return None

# 데이터 처리 (오류 항목 필터링)
processed_data = [item for item in (process_item(i) for i in data) if item is not None]

# 숫자 순으로 정렬 (frame_number 기준)
sorted_data = sorted(processed_data, key=lambda x: x['frame_number'])

# 최종 출력 형식
final_data = [
    {
        'used_model': 'llama3.2-vision',  # 모델 이름을 하드코딩 (필요 시 수정)
        'image': sorted_data[0]['image'],
        'response': sorted_data[0]['response'],
    }
] + [
    {
        'image': item['image'],
        'response': item['response'],
    }
    for item in sorted_data[1:]
]

# JSON 파일로 저장
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(final_data, f, ensure_ascii=False, indent=4)
    
# 파일 삭제
if input_file.exists():  # 파일이 존재하는지 확인
    input_file.unlink()  # 파일 삭제
    print(f"파일 제거: {input_file}")
else:
    print(f"{input_file} 파일이 존재하지 않습니다.")
    
print(f"정렬 완료! 결과 파일: {output_file}")

################################################
# Merging llama-vision result + subtitle + timestamp
################################################
import json
from pathlib import Path

output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}_organized.json')

# llama_vision_data.json 파일 로드
with open(output_file, "r", encoding="utf-8") as f:
    llama_vision_data = json.load(f)

subtitle = Path(f'{base_path}/Data_Subtitles/{drama_folder_name}_organized.ko.json')
# subtitle.json 파일 로드
with open(subtitle, "r", encoding="utf-8") as f:
    subtitle_data = json.load(f)

# final_output.json으로 저장할 데이터 리스트 초기화
dataset = []

# 두 파일의 데이터를 매칭하여 dataset 생성
for result_item, subtitle_item in zip(llama_vision_data, subtitle_data):
    input_text = subtitle_item.get("context", "")  # subtitle_data.json의 "context"를 input으로
    timestamp = subtitle_item.get("timestamp", "")  # subtitle_data.json의 "timestamp"를 timestamp
    output_text = result_item.get("response", "")  # llama_vision_data.json의 "response"를 output으로
    
    # input, output, timestamp가 모두 비어있지 않은 경우만 추가
    if input_text and output_text:
        dataset.append({"timestamp": timestamp, "input": input_text, "output": output_text})


final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json')
# dataset.json 파일로 저장
with open(final_output, "w", encoding="utf-8") as f:
    json.dump(dataset, f, ensure_ascii=False, indent=4)

print(f"/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json 파일이 생성되었습니다.")

###############################################
# Reverse input <-> output
# Professor suggestion
################################################

import json
from pathlib import Path

# final_output 파일 경로 설정
final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json')

# final_output에서 데이터 읽기
with open(final_output, "r", encoding="utf-8") as f:
    dataset = json.load(f)

# input과 output 값을 교환하는 작업
reversed_dataset = []
for item in dataset:
    # input과 output을 서로 바꾸기
    reversed_item = {
        'timestamp': item['timestamp'],
        'input': item['output'],  # output을 input으로
        'output': item['input'],  # input을 output으로
    }
    reversed_dataset.append(reversed_item)

# 기존 dataset과 reversed_dataset을 합치기
combined_dataset = dataset + reversed_dataset

# reversed_final_output 파일 경로 설정
reversed_final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final_Reversed/{drama_folder_name}_reversed_final.json')

# combined dataset을 새로운 JSON 파일로 저장
with open(reversed_final_output, "w", encoding="utf-8") as f:
    json.dump(combined_dataset, f, ensure_ascii=False, indent=4)

print(f"/Refined_Datas/{version}/Data_Final_Reversed/{drama_folder_name}_reversed_final.json 파일이 생성되었습니다.")


[2025-03-16 19:11:21] [INFO] Starting image processing for 986 images
[2025-03-16 19:11:21] [INFO] Processing image (1/986): frame_0895.png
[2025-03-16 19:11:21] [INFO] Image encoded in 0.07s
[2025-03-16 19:11:53] [INFO] API response received in 31.53s
[2025-03-16 19:11:53] [INFO] Processing image (2/986): frame_0840.png
[2025-03-16 19:11:53] [INFO] Image encoded in 0.28s
[2025-03-16 19:11:57] [INFO] API response received in 4.60s
[2025-03-16 19:11:57] [INFO] Processing image (3/986): frame_0258.png
[2025-03-16 19:11:58] [INFO] Image encoded in 0.15s
[2025-03-16 19:12:00] [INFO] API response received in 2.40s
[2025-03-16 19:12:00] [INFO] Processing image (4/986): frame_0400.png
[2025-03-16 19:12:00] [INFO] Image encoded in 0.05s
[2025-03-16 19:12:03] [INFO] API response received in 3.09s
[2025-03-16 19:12:03] [INFO] Processing image (5/986): frame_0266.png
[2025-03-16 19:12:03] [INFO] Image encoded in 0.06s
[2025-03-16 19:12:07] [INFO] API response received in 3.43s
[2025-03-16 19:12:0

In [7]:
# second start

In [None]:
#######################
# Check all the images in one drama folder
# Read and analyze through llama3.2-vision
#######################

import os
import json
import base64
import ollama
import time
import traceback
from pathlib import Path
from datetime import datetime

# 경로 설정
base_path = "/scratch/jsong132/Can_LLM_Learn_New_Language"
drama_folder_name = "house_of_cards"
version = "v2"

image_dir = Path(f'{base_path}/Data_Images/{drama_folder_name}')
output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}.json')

#######################
# model choose
#######################
used_model = "llama3.2-vision"

# 출력 디렉토리 생성
output_file.parent.mkdir(parents=True, exist_ok=True)

# 로깅 설정
def log(message, level="INFO"):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"[{timestamp}] [{level}] {message}")

# 초기화
results = []
VALID_EXTENSIONS = ('.png', '.jpg', '.jpeg')
total_images = len([f for f in image_dir.iterdir() if f.suffix.lower() in VALID_EXTENSIONS])
processed = 0

log(f"Starting image processing for {total_images} images")

# 이미지 처리
for image_path in image_dir.iterdir():
    if not (image_path.is_file() and image_path.suffix.lower() in VALID_EXTENSIONS):
        continue

    processed += 1
    log(f"Processing image ({processed}/{total_images}): {image_path.name}")
    start_time = time.time()
    
    try:
        # 이미지 인코딩
        encode_start = time.time()
        with open(image_path, "rb") as img_file:
            base64_image = base64.b64encode(img_file.read()).decode("utf-8")
        encode_time = time.time() - encode_start
        log(f"Image encoded in {encode_time:.2f}s")

        # API 요청
        api_start = time.time()
        used_model = "llama3.2-vision"
        response = client.chat(
        ###################### Choose Model ###################
            model="llama3.2-vision",
            # "llama3.2-vision:90b"
            # llama3.2-vision"
            messages=[{
                'role': 'user',
                'content': prompt,
                'images': [base64_image]
            }]
        )
        api_time = time.time() - api_start
        log(f"API response received in {api_time:.2f}s")

        # 결과 저장
        results.append({
            'image': str(image_path),
            'response': response['message']['content'],
            'processing_time': {
                'encoding': encode_time,
                'api_call': api_time,
                'total': time.time() - start_time
            },
            'status': 'success'
        })

    except Exception as e:
        error_msg = f"Error processing {image_path.name}: {str(e)}"
        error_trace = traceback.format_exc()
        log(error_msg, "ERROR")
        log(f"Error details:\n{error_trace}", "DEBUG")
        
        results.append({
            'image': str(image_path),
            'error': error_msg,
            'error_trace': error_trace,
            'status': 'failed'
        })

# 결과 저장
save_start = time.time()
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)
save_time = time.time() - save_start

log("=========================================")
log(f"Results saved to {output_file} in {save_time:.2f}s")
log(f"Processing completed. Success: {len([x for x in results if x['status']=='success'])}, Failed: {len([x for x in results if x['status']=='failed'])}")
log("=========================================")

#######################
# Organize by frame number
#######################

import json
from pathlib import Path

# 파일 경로 설정
input_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}.json')
output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}_organized.json')

# JSON 파일 읽기
with open(input_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 아이템 처리 함수
def process_item(item):
    try:
        filename = Path(item['image']).name
        
        # 파일명 구조: frame_0001.png → ['frame', '0001.png']
        parts = filename.split('_')
        
        # 숫자 부분 추출 (frame_0001.png → 0001)
        frame_number = int(parts[1].split('.')[0])  # frame_0001.png → 0001
        
        return {
            'image': filename,
            'frame_number': frame_number,
            'response': item['response'],
            'status': item['status']
        }
    except Exception as e:
        print(f"파일명 형식 오류: {filename} → {str(e)}")
        return None

# 데이터 처리 (오류 항목 필터링)
processed_data = [item for item in (process_item(i) for i in data) if item is not None]

# 숫자 순으로 정렬 (frame_number 기준)
sorted_data = sorted(processed_data, key=lambda x: x['frame_number'])

# 최종 출력 형식
final_data = [
    {
        'used_model': 'llama3.2-vision',  # 모델 이름을 하드코딩 (필요 시 수정)
        'image': sorted_data[0]['image'],
        'response': sorted_data[0]['response'],
    }
] + [
    {
        'image': item['image'],
        'response': item['response'],
    }
    for item in sorted_data[1:]
]

# JSON 파일로 저장
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(final_data, f, ensure_ascii=False, indent=4)
    
# 파일 삭제
if input_file.exists():  # 파일이 존재하는지 확인
    input_file.unlink()  # 파일 삭제
    print(f"파일 제거: {input_file}")
else:
    print(f"{input_file} 파일이 존재하지 않습니다.")
    
print(f"정렬 완료! 결과 파일: {output_file}")

################################################
# Merging llama-vision result + subtitle + timestamp
################################################
import json
from pathlib import Path

output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}_organized.json')

# llama_vision_data.json 파일 로드
with open(output_file, "r", encoding="utf-8") as f:
    llama_vision_data = json.load(f)

subtitle = Path(f'{base_path}/Data_Subtitles/{drama_folder_name}_organized.ko.json')
# subtitle.json 파일 로드
with open(subtitle, "r", encoding="utf-8") as f:
    subtitle_data = json.load(f)

# final_output.json으로 저장할 데이터 리스트 초기화
dataset = []

# 두 파일의 데이터를 매칭하여 dataset 생성
for result_item, subtitle_item in zip(llama_vision_data, subtitle_data):
    input_text = subtitle_item.get("context", "")  # subtitle_data.json의 "context"를 input으로
    timestamp = subtitle_item.get("timestamp", "")  # subtitle_data.json의 "timestamp"를 timestamp
    output_text = result_item.get("response", "")  # llama_vision_data.json의 "response"를 output으로
    
    # input, output, timestamp가 모두 비어있지 않은 경우만 추가
    if input_text and output_text:
        dataset.append({"timestamp": timestamp, "input": input_text, "output": output_text})


final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json')
# dataset.json 파일로 저장
with open(final_output, "w", encoding="utf-8") as f:
    json.dump(dataset, f, ensure_ascii=False, indent=4)

print(f"/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json 파일이 생성되었습니다.")

###############################################
# Reverse input <-> output
# Professor suggestion
################################################

import json
from pathlib import Path

# final_output 파일 경로 설정
final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json')

# final_output에서 데이터 읽기
with open(final_output, "r", encoding="utf-8") as f:
    dataset = json.load(f)

# input과 output 값을 교환하는 작업
reversed_dataset = []
for item in dataset:
    # input과 output을 서로 바꾸기
    reversed_item = {
        'timestamp': item['timestamp'],
        'input': item['output'],  # output을 input으로
        'output': item['input'],  # input을 output으로
    }
    reversed_dataset.append(reversed_item)

# 기존 dataset과 reversed_dataset을 합치기
combined_dataset = dataset + reversed_dataset

# reversed_final_output 파일 경로 설정
reversed_final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final_Reversed/{drama_folder_name}_reversed_final.json')

# combined dataset을 새로운 JSON 파일로 저장
with open(reversed_final_output, "w", encoding="utf-8") as f:
    json.dump(combined_dataset, f, ensure_ascii=False, indent=4)

print(f"/Refined_Datas/{version}/Data_Final_Reversed/{drama_folder_name}_reversed_final.json 파일이 생성되었습니다.")


In [12]:
# third start

In [None]:
#######################
# Check all the images in one drama folder
# Read and analyze through llama3.2-vision
#######################

import os
import json
import base64
import ollama
import time
import traceback
from pathlib import Path
from datetime import datetime

# 경로 설정
base_path = "/scratch/jsong132/Can_LLM_Learn_New_Language"
drama_folder_name = "Juilliard•NYC VLOG 석사 2학년 시이작!"
version = "v2"

image_dir = Path(f'{base_path}/Data_Images/{drama_folder_name}')
output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}.json')

#######################
# model choose
#######################
used_model = "llama3.2-vision"

# 출력 디렉토리 생성
output_file.parent.mkdir(parents=True, exist_ok=True)

# 로깅 설정
def log(message, level="INFO"):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"[{timestamp}] [{level}] {message}")

# 초기화
results = []
VALID_EXTENSIONS = ('.png', '.jpg', '.jpeg')
total_images = len([f for f in image_dir.iterdir() if f.suffix.lower() in VALID_EXTENSIONS])
processed = 0

log(f"Starting image processing for {total_images} images")

# 이미지 처리
for image_path in image_dir.iterdir():
    if not (image_path.is_file() and image_path.suffix.lower() in VALID_EXTENSIONS):
        continue

    processed += 1
    log(f"Processing image ({processed}/{total_images}): {image_path.name}")
    start_time = time.time()
    
    try:
        # 이미지 인코딩
        encode_start = time.time()
        with open(image_path, "rb") as img_file:
            base64_image = base64.b64encode(img_file.read()).decode("utf-8")
        encode_time = time.time() - encode_start
        log(f"Image encoded in {encode_time:.2f}s")

        # API 요청
        api_start = time.time()
        used_model = "llama3.2-vision"
        response = client.chat(
        ###################### Choose Model ###################
            model="llama3.2-vision",
            # "llama3.2-vision:90b"
            # llama3.2-vision"
            messages=[{
                'role': 'user',
                'content': prompt,
                'images': [base64_image]
            }]
        )
        api_time = time.time() - api_start
        log(f"API response received in {api_time:.2f}s")

        # 결과 저장
        results.append({
            'image': str(image_path),
            'response': response['message']['content'],
            'processing_time': {
                'encoding': encode_time,
                'api_call': api_time,
                'total': time.time() - start_time
            },
            'status': 'success'
        })

    except Exception as e:
        error_msg = f"Error processing {image_path.name}: {str(e)}"
        error_trace = traceback.format_exc()
        log(error_msg, "ERROR")
        log(f"Error details:\n{error_trace}", "DEBUG")
        
        results.append({
            'image': str(image_path),
            'error': error_msg,
            'error_trace': error_trace,
            'status': 'failed'
        })

# 결과 저장
save_start = time.time()
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)
save_time = time.time() - save_start

log("=========================================")
log(f"Results saved to {output_file} in {save_time:.2f}s")
log(f"Processing completed. Success: {len([x for x in results if x['status']=='success'])}, Failed: {len([x for x in results if x['status']=='failed'])}")
log("=========================================")

#######################
# Organize by frame number
#######################

import json
from pathlib import Path

# 파일 경로 설정
input_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}.json')
output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}_organized.json')

# JSON 파일 읽기
with open(input_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 아이템 처리 함수
def process_item(item):
    try:
        filename = Path(item['image']).name
        
        # 파일명 구조: frame_0001.png → ['frame', '0001.png']
        parts = filename.split('_')
        
        # 숫자 부분 추출 (frame_0001.png → 0001)
        frame_number = int(parts[1].split('.')[0])  # frame_0001.png → 0001
        
        return {
            'image': filename,
            'frame_number': frame_number,
            'response': item['response'],
            'status': item['status']
        }
    except Exception as e:
        print(f"파일명 형식 오류: {filename} → {str(e)}")
        return None

# 데이터 처리 (오류 항목 필터링)
processed_data = [item for item in (process_item(i) for i in data) if item is not None]

# 숫자 순으로 정렬 (frame_number 기준)
sorted_data = sorted(processed_data, key=lambda x: x['frame_number'])

# 최종 출력 형식
final_data = [
    {
        'used_model': 'llama3.2-vision',  # 모델 이름을 하드코딩 (필요 시 수정)
        'image': sorted_data[0]['image'],
        'response': sorted_data[0]['response'],
    }
] + [
    {
        'image': item['image'],
        'response': item['response'],
    }
    for item in sorted_data[1:]
]

# JSON 파일로 저장
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(final_data, f, ensure_ascii=False, indent=4)
    
# 파일 삭제
if input_file.exists():  # 파일이 존재하는지 확인
    input_file.unlink()  # 파일 삭제
    print(f"파일 제거: {input_file}")
else:
    print(f"{input_file} 파일이 존재하지 않습니다.")
    
print(f"정렬 완료! 결과 파일: {output_file}")

################################################
# Merging llama-vision result + subtitle + timestamp
################################################
import json
from pathlib import Path

output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}_organized.json')

# llama_vision_data.json 파일 로드
with open(output_file, "r", encoding="utf-8") as f:
    llama_vision_data = json.load(f)

subtitle = Path(f'{base_path}/Data_Subtitles/{drama_folder_name}_organized.ko.json')
# subtitle.json 파일 로드
with open(subtitle, "r", encoding="utf-8") as f:
    subtitle_data = json.load(f)

# final_output.json으로 저장할 데이터 리스트 초기화
dataset = []

# 두 파일의 데이터를 매칭하여 dataset 생성
for result_item, subtitle_item in zip(llama_vision_data, subtitle_data):
    input_text = subtitle_item.get("context", "")  # subtitle_data.json의 "context"를 input으로
    timestamp = subtitle_item.get("timestamp", "")  # subtitle_data.json의 "timestamp"를 timestamp
    output_text = result_item.get("response", "")  # llama_vision_data.json의 "response"를 output으로
    
    # input, output, timestamp가 모두 비어있지 않은 경우만 추가
    if input_text and output_text:
        dataset.append({"timestamp": timestamp, "input": input_text, "output": output_text})


final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json')
# dataset.json 파일로 저장
with open(final_output, "w", encoding="utf-8") as f:
    json.dump(dataset, f, ensure_ascii=False, indent=4)

print(f"/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json 파일이 생성되었습니다.")

###############################################
# Reverse input <-> output
# Professor suggestion
################################################

import json
from pathlib import Path

# final_output 파일 경로 설정
final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json')

# final_output에서 데이터 읽기
with open(final_output, "r", encoding="utf-8") as f:
    dataset = json.load(f)

# input과 output 값을 교환하는 작업
reversed_dataset = []
for item in dataset:
    # input과 output을 서로 바꾸기
    reversed_item = {
        'timestamp': item['timestamp'],
        'input': item['output'],  # output을 input으로
        'output': item['input'],  # input을 output으로
    }
    reversed_dataset.append(reversed_item)

# 기존 dataset과 reversed_dataset을 합치기
combined_dataset = dataset + reversed_dataset

# reversed_final_output 파일 경로 설정
reversed_final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final_Reversed/{drama_folder_name}_reversed_final.json')

# combined dataset을 새로운 JSON 파일로 저장
with open(reversed_final_output, "w", encoding="utf-8") as f:
    json.dump(combined_dataset, f, ensure_ascii=False, indent=4)

print(f"/Refined_Datas/{version}/Data_Final_Reversed/{drama_folder_name}_reversed_final.json 파일이 생성되었습니다.")


In [17]:
# Fourth start

In [None]:
#######################
# Check all the images in one drama folder
# Read and analyze through llama3.2-vision
#######################

import os
import json
import base64
import ollama
import time
import traceback
from pathlib import Path
from datetime import datetime

# 경로 설정
base_path = "/scratch/jsong132/Can_LLM_Learn_New_Language"
drama_folder_name = "test"
version = "v2"

image_dir = Path(f'{base_path}/Data_Images/{drama_folder_name}')
output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}.json')

#######################
# model choose
#######################
used_model = "llama3.2-vision"

# 출력 디렉토리 생성
output_file.parent.mkdir(parents=True, exist_ok=True)

# 로깅 설정
def log(message, level="INFO"):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"[{timestamp}] [{level}] {message}")

# 초기화
results = []
VALID_EXTENSIONS = ('.png', '.jpg', '.jpeg')
total_images = len([f for f in image_dir.iterdir() if f.suffix.lower() in VALID_EXTENSIONS])
processed = 0

log(f"Starting image processing for {total_images} images")

# 이미지 처리
for image_path in image_dir.iterdir():
    if not (image_path.is_file() and image_path.suffix.lower() in VALID_EXTENSIONS):
        continue

    processed += 1
    log(f"Processing image ({processed}/{total_images}): {image_path.name}")
    start_time = time.time()
    
    try:
        # 이미지 인코딩
        encode_start = time.time()
        with open(image_path, "rb") as img_file:
            base64_image = base64.b64encode(img_file.read()).decode("utf-8")
        encode_time = time.time() - encode_start
        log(f"Image encoded in {encode_time:.2f}s")

        # API 요청
        api_start = time.time()
        used_model = "llama3.2-vision"
        response = client.chat(
        ###################### Choose Model ###################
            model="llama3.2-vision",
            # "llama3.2-vision:90b"
            # llama3.2-vision"
            messages=[{
                'role': 'user',
                'content': prompt,
                'images': [base64_image]
            }]
        )
        api_time = time.time() - api_start
        log(f"API response received in {api_time:.2f}s")

        # 결과 저장
        results.append({
            'image': str(image_path),
            'response': response['message']['content'],
            'processing_time': {
                'encoding': encode_time,
                'api_call': api_time,
                'total': time.time() - start_time
            },
            'status': 'success'
        })

    except Exception as e:
        error_msg = f"Error processing {image_path.name}: {str(e)}"
        error_trace = traceback.format_exc()
        log(error_msg, "ERROR")
        log(f"Error details:\n{error_trace}", "DEBUG")
        
        results.append({
            'image': str(image_path),
            'error': error_msg,
            'error_trace': error_trace,
            'status': 'failed'
        })

# 결과 저장
save_start = time.time()
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)
save_time = time.time() - save_start

log("=========================================")
log(f"Results saved to {output_file} in {save_time:.2f}s")
log(f"Processing completed. Success: {len([x for x in results if x['status']=='success'])}, Failed: {len([x for x in results if x['status']=='failed'])}")
log("=========================================")

#######################
# Organize by frame number
#######################

import json
from pathlib import Path

# 파일 경로 설정
input_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}.json')
output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}_organized.json')

# JSON 파일 읽기
with open(input_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 아이템 처리 함수
def process_item(item):
    try:
        filename = Path(item['image']).name
        
        # 파일명 구조: frame_0001.png → ['frame', '0001.png']
        parts = filename.split('_')
        
        # 숫자 부분 추출 (frame_0001.png → 0001)
        frame_number = int(parts[1].split('.')[0])  # frame_0001.png → 0001
        
        return {
            'image': filename,
            'frame_number': frame_number,
            'response': item['response'],
            'status': item['status']
        }
    except Exception as e:
        print(f"파일명 형식 오류: {filename} → {str(e)}")
        return None

# 데이터 처리 (오류 항목 필터링)
processed_data = [item for item in (process_item(i) for i in data) if item is not None]

# 숫자 순으로 정렬 (frame_number 기준)
sorted_data = sorted(processed_data, key=lambda x: x['frame_number'])

# 최종 출력 형식
final_data = [
    {
        'used_model': 'llama3.2-vision',  # 모델 이름을 하드코딩 (필요 시 수정)
        'image': sorted_data[0]['image'],
        'response': sorted_data[0]['response'],
    }
] + [
    {
        'image': item['image'],
        'response': item['response'],
    }
    for item in sorted_data[1:]
]

# JSON 파일로 저장
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(final_data, f, ensure_ascii=False, indent=4)
    
# 파일 삭제
if input_file.exists():  # 파일이 존재하는지 확인
    input_file.unlink()  # 파일 삭제
    print(f"파일 제거: {input_file}")
else:
    print(f"{input_file} 파일이 존재하지 않습니다.")
    
print(f"정렬 완료! 결과 파일: {output_file}")

################################################
# Merging llama-vision result + subtitle + timestamp
################################################
import json
from pathlib import Path

output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}_organized.json')

# llama_vision_data.json 파일 로드
with open(output_file, "r", encoding="utf-8") as f:
    llama_vision_data = json.load(f)

subtitle = Path(f'{base_path}/Data_Subtitles/{drama_folder_name}_organized.ko.json')
# subtitle.json 파일 로드
with open(subtitle, "r", encoding="utf-8") as f:
    subtitle_data = json.load(f)

# final_output.json으로 저장할 데이터 리스트 초기화
dataset = []

# 두 파일의 데이터를 매칭하여 dataset 생성
for result_item, subtitle_item in zip(llama_vision_data, subtitle_data):
    input_text = subtitle_item.get("context", "")  # subtitle_data.json의 "context"를 input으로
    timestamp = subtitle_item.get("timestamp", "")  # subtitle_data.json의 "timestamp"를 timestamp
    output_text = result_item.get("response", "")  # llama_vision_data.json의 "response"를 output으로
    
    # input, output, timestamp가 모두 비어있지 않은 경우만 추가
    if input_text and output_text:
        dataset.append({"timestamp": timestamp, "input": input_text, "output": output_text})


final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json')
# dataset.json 파일로 저장
with open(final_output, "w", encoding="utf-8") as f:
    json.dump(dataset, f, ensure_ascii=False, indent=4)

print(f"/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json 파일이 생성되었습니다.")

###############################################
# Reverse input <-> output
# Professor suggestion
################################################

import json
from pathlib import Path

# final_output 파일 경로 설정
final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json')

# final_output에서 데이터 읽기
with open(final_output, "r", encoding="utf-8") as f:
    dataset = json.load(f)

# input과 output 값을 교환하는 작업
reversed_dataset = []
for item in dataset:
    # input과 output을 서로 바꾸기
    reversed_item = {
        'timestamp': item['timestamp'],
        'input': item['output'],  # output을 input으로
        'output': item['input'],  # input을 output으로
    }
    reversed_dataset.append(reversed_item)

# 기존 dataset과 reversed_dataset을 합치기
combined_dataset = dataset + reversed_dataset

# reversed_final_output 파일 경로 설정
reversed_final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final_Reversed/{drama_folder_name}_reversed_final.json')

# combined dataset을 새로운 JSON 파일로 저장
with open(reversed_final_output, "w", encoding="utf-8") as f:
    json.dump(combined_dataset, f, ensure_ascii=False, indent=4)

print(f"/Refined_Datas/{version}/Data_Final_Reversed/{drama_folder_name}_reversed_final.json 파일이 생성되었습니다.")


In [None]:
# Fifth Start

In [None]:
#######################
# Check all the images in one drama folder
# Read and analyze through llama3.2-vision
#######################

import os
import json
import base64
import ollama
import time
import traceback
from pathlib import Path
from datetime import datetime

# 경로 설정
base_path = "/scratch/jsong132/Can_LLM_Learn_New_Language"
drama_folder_name = "고물가시대 혜자로운 코스트코 장보기"
version = "v2"

image_dir = Path(f'{base_path}/Data_Images/{drama_folder_name}')
output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}.json')

#######################
# model choose
#######################
used_model = "llama3.2-vision"

# 출력 디렉토리 생성
output_file.parent.mkdir(parents=True, exist_ok=True)

# 로깅 설정
def log(message, level="INFO"):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"[{timestamp}] [{level}] {message}")

# 초기화
results = []
VALID_EXTENSIONS = ('.png', '.jpg', '.jpeg')
total_images = len([f for f in image_dir.iterdir() if f.suffix.lower() in VALID_EXTENSIONS])
processed = 0

log(f"Starting image processing for {total_images} images")

# 이미지 처리
for image_path in image_dir.iterdir():
    if not (image_path.is_file() and image_path.suffix.lower() in VALID_EXTENSIONS):
        continue

    processed += 1
    log(f"Processing image ({processed}/{total_images}): {image_path.name}")
    start_time = time.time()
    
    try:
        # 이미지 인코딩
        encode_start = time.time()
        with open(image_path, "rb") as img_file:
            base64_image = base64.b64encode(img_file.read()).decode("utf-8")
        encode_time = time.time() - encode_start
        log(f"Image encoded in {encode_time:.2f}s")

        # API 요청
        api_start = time.time()
        used_model = "llama3.2-vision"
        response = client.chat(
        ###################### Choose Model ###################
            model="llama3.2-vision",
            # "llama3.2-vision:90b"
            # llama3.2-vision"
            messages=[{
                'role': 'user',
                'content': prompt,
                'images': [base64_image]
            }]
        )
        api_time = time.time() - api_start
        log(f"API response received in {api_time:.2f}s")

        # 결과 저장
        results.append({
            'image': str(image_path),
            'response': response['message']['content'],
            'processing_time': {
                'encoding': encode_time,
                'api_call': api_time,
                'total': time.time() - start_time
            },
            'status': 'success'
        })

    except Exception as e:
        error_msg = f"Error processing {image_path.name}: {str(e)}"
        error_trace = traceback.format_exc()
        log(error_msg, "ERROR")
        log(f"Error details:\n{error_trace}", "DEBUG")
        
        results.append({
            'image': str(image_path),
            'error': error_msg,
            'error_trace': error_trace,
            'status': 'failed'
        })

# 결과 저장
save_start = time.time()
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)
save_time = time.time() - save_start

log("=========================================")
log(f"Results saved to {output_file} in {save_time:.2f}s")
log(f"Processing completed. Success: {len([x for x in results if x['status']=='success'])}, Failed: {len([x for x in results if x['status']=='failed'])}")
log("=========================================")

#######################
# Organize by frame number
#######################

import json
from pathlib import Path

# 파일 경로 설정
input_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}.json')
output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}_organized.json')

# JSON 파일 읽기
with open(input_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 아이템 처리 함수
def process_item(item):
    try:
        filename = Path(item['image']).name
        
        # 파일명 구조: frame_0001.png → ['frame', '0001.png']
        parts = filename.split('_')
        
        # 숫자 부분 추출 (frame_0001.png → 0001)
        frame_number = int(parts[1].split('.')[0])  # frame_0001.png → 0001
        
        return {
            'image': filename,
            'frame_number': frame_number,
            'response': item['response'],
            'status': item['status']
        }
    except Exception as e:
        print(f"파일명 형식 오류: {filename} → {str(e)}")
        return None

# 데이터 처리 (오류 항목 필터링)
processed_data = [item for item in (process_item(i) for i in data) if item is not None]

# 숫자 순으로 정렬 (frame_number 기준)
sorted_data = sorted(processed_data, key=lambda x: x['frame_number'])

# 최종 출력 형식
final_data = [
    {
        'used_model': 'llama3.2-vision',  # 모델 이름을 하드코딩 (필요 시 수정)
        'image': sorted_data[0]['image'],
        'response': sorted_data[0]['response'],
    }
] + [
    {
        'image': item['image'],
        'response': item['response'],
    }
    for item in sorted_data[1:]
]

# JSON 파일로 저장
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(final_data, f, ensure_ascii=False, indent=4)
    
# 파일 삭제
if input_file.exists():  # 파일이 존재하는지 확인
    input_file.unlink()  # 파일 삭제
    print(f"파일 제거: {input_file}")
else:
    print(f"{input_file} 파일이 존재하지 않습니다.")
    
print(f"정렬 완료! 결과 파일: {output_file}")

################################################
# Merging llama-vision result + subtitle + timestamp
################################################
import json
from pathlib import Path

output_file = Path(f'{base_path}/Refined_Datas/{version}/Data_llama_vision/{drama_folder_name}_organized.json')

# llama_vision_data.json 파일 로드
with open(output_file, "r", encoding="utf-8") as f:
    llama_vision_data = json.load(f)

subtitle = Path(f'{base_path}/Data_Subtitles/{drama_folder_name}_organized.ko.json')
# subtitle.json 파일 로드
with open(subtitle, "r", encoding="utf-8") as f:
    subtitle_data = json.load(f)

# final_output.json으로 저장할 데이터 리스트 초기화
dataset = []

# 두 파일의 데이터를 매칭하여 dataset 생성
for result_item, subtitle_item in zip(llama_vision_data, subtitle_data):
    input_text = subtitle_item.get("context", "")  # subtitle_data.json의 "context"를 input으로
    timestamp = subtitle_item.get("timestamp", "")  # subtitle_data.json의 "timestamp"를 timestamp
    output_text = result_item.get("response", "")  # llama_vision_data.json의 "response"를 output으로
    
    # input, output, timestamp가 모두 비어있지 않은 경우만 추가
    if input_text and output_text:
        dataset.append({"timestamp": timestamp, "input": input_text, "output": output_text})


final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json')
# dataset.json 파일로 저장
with open(final_output, "w", encoding="utf-8") as f:
    json.dump(dataset, f, ensure_ascii=False, indent=4)

print(f"/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json 파일이 생성되었습니다.")

###############################################
# Reverse input <-> output
# Professor suggestion
################################################

import json
from pathlib import Path

# final_output 파일 경로 설정
final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final/{drama_folder_name}_final.json')

# final_output에서 데이터 읽기
with open(final_output, "r", encoding="utf-8") as f:
    dataset = json.load(f)

# input과 output 값을 교환하는 작업
reversed_dataset = []
for item in dataset:
    # input과 output을 서로 바꾸기
    reversed_item = {
        'timestamp': item['timestamp'],
        'input': item['output'],  # output을 input으로
        'output': item['input'],  # input을 output으로
    }
    reversed_dataset.append(reversed_item)

# 기존 dataset과 reversed_dataset을 합치기
combined_dataset = dataset + reversed_dataset

# reversed_final_output 파일 경로 설정
reversed_final_output = Path(f'{base_path}/Refined_Datas/{version}/Data_Final_Reversed/{drama_folder_name}_reversed_final.json')

# combined dataset을 새로운 JSON 파일로 저장
with open(reversed_final_output, "w", encoding="utf-8") as f:
    json.dump(combined_dataset, f, ensure_ascii=False, indent=4)

print(f"/Refined_Datas/{version}/Data_Final_Reversed/{drama_folder_name}_reversed_final.json 파일이 생성되었습니다.")


In [10]:
###############################################
# Don't need to do
# Extra, this is not a main process
# Make .json file readible
# json_pretty.py
# Make json file readible.
################################################

import json
import textwrap

# JSON 파일 로드
with open(final_output, 'r', encoding='utf-8') as f:
    data = json.load(f)

# response 값을 50자씩 나누어 배열로 분할
for item in data:
    if 'response' in item:
        # 50자씩 나누기
        wrapped_text = textwrap.wrap(item['response'], width=100)
        item['response'] = wrapped_text

# 수정된 JSON 파일 저장
with open(output_pretty, 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=4)

print("JSON 파일이 수정되어 저장되었습니다.")


JSON 파일이 수정되어 저장되었습니다.
