In [1]:
!pip install "ultralytics<=8.3.40"

Collecting ultralytics<=8.3.40
  Downloading ultralytics-8.3.40-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics<=8.3.40)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics<=8.3.40)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics<=8.3.40)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics<=8.3.40)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics<=8.3.40)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
from ultralytics import YOLO
import os
import random
import shutil
import glob
import matplotlib.pyplot as plt
import base64
import cv2
import numpy as np
from PIL import Image
import io
import openai

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [4]:
data_folders = glob.glob("/content/drive/MyDrive/nlp_project_elte/data/*")

In [5]:
sections = {}
current_heading = None
current_content = []

with open(glob.glob("/content/drive/MyDrive/nlp_project_elte/data/*")[0], 'r', encoding='utf-8') as f:
    for line in f:
        if line.startswith("## "):  # Detect heading
            # Save previous section
            if current_heading:
                sections[current_heading] = ''.join(current_content).strip()
            # Start new section
            current_heading = line.strip()[3:]  # Remove '## ' prefix
            current_content = []
        else:
            current_content.append(line)

    # Save last section
    if current_heading:
        sections[current_heading] = ''.join(current_content).strip()

In [6]:
def convert_yolo_result_to_base64(yolo_result):
    result_image_array = yolo_result.plot(labels=False)
    result_image_pil = Image.fromarray(cv2.cvtColor(result_image_array, cv2.COLOR_BGR2RGB))

    with io.BytesIO() as buffer:
        result_image_pil.save(buffer, format="PNG")
        base64_bytes = buffer.getvalue()
        base64_str = base64.b64encode(base64_bytes).decode("utf-8")

    return base64_str  # No data:image/png;base64, prefix


In [7]:
from google.colab import userdata
api = userdata.get('OPENAI_API_KEY')
client = openai.OpenAI(api_key=api)

In [10]:
def describe(img_path, text):
    yolo_model = YOLO("/content/drive/MyDrive/nlp_project_elte/od/od/saved_model.pt")

    results = yolo_model.predict(source=img_path, save=False, conf=0.25)
    # Get base64-encoded string
    base64_data = convert_yolo_result_to_base64(results[0])
    system_msg = """
    You are a highly observant assistant trained in visual and technical analysis of vehicles. You will receive an image showing a detected vehicle (car, truck, or bus), sometimes accompanied by a technical description.

    Your goal is to generate a rich, accurate, and informative textual description using the following structure:

    ---

    1. **Vehicle Identification and Description**
      - If a technical description is provided, integrate it with your visual analysis. Verify details (e.g., brand, model, year, trim) and provide additional context from the image.
      - If no description is available, generate a detailed narrative based on visual clues alone.
      - Identify:
        - Make and model (e.g., Ford F-150, Toyota Prius)
        - Body type (e.g., sedan, SUV, pickup, minibus)
        - Color and paint condition
        - Visible damage, rust, or wear
        - Modifications (e.g., custom wheels, added accessories, decals)
        - License plate (blur if privacy required)

    2. **Condition Assessment**
      - Estimate the vehicle's condition (e.g., excellent, fair, poor) based on visible wear, cleanliness, or damage.
      - Note any distinguishing features (e.g., dents, broken headlights, missing parts).

    3. **(Optional) Market Price Estimation**
      - Provide an estimated price range based on the make, model, year, and condition.
      - Use a reference source like Kelley Blue Book, Edmunds, or used vehicle marketplaces.
      - Include source of estimation if applicable.

    4. **(Optional) Technical Commentary**
      - If a technical description is present, explain or correct any inconsistencies using visual evidence.
      - Add context (e.g., fuel type, engine class, emissions rating) if visible or inferred.

    5. **(Optional) Suggested Applications**
      - Recommend possible use cases (e.g., private transport, commercial delivery, off-road use).
      - If the vehicle appears damaged or old, suggest resale, repair, parts salvage, or donation.

    ---

    **Output Format Example:**

    Vehicle Summary:
    A silver 2019 Toyota Hiace minibus with commercial decals and minor front bumper damage.

    Detailed Description:
    The vehicle is a mid-sized passenger van with a high roof, silver body paint, and commercial markings on both sides. The front bumper appears slightly dented, and the paint shows signs of wear near the wheel arches. The tires are visibly used but intact. The license plate is partially visible, and the side mirrors are intact. No obvious aftermarket modifications.

    Condition Assessment:
    Good overall condition, with minor cosmetic damage. Likely used for commercial passenger transport.

    Market Price Estimation:
    Estimated value: $17,000 – $20,000
    Based on listings from [Autotrader, OLX, or local market].

    """
    # Send to OpenAI with correct MIME type
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": [
                    {"type": "text", "text": system_msg},
                ]
            },
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": f"Describe this image. {text}"},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{base64_data}"
                        }
                    }
                ]
            }
        ],
        max_tokens=500
    )

    # Print the description
    return response.choices[0].message.content


In [20]:
data = {"images": [], "descriptions": []}

for i in data_folders[3:]:
  ims = glob.glob(f"{i}/*.jpg")
  for j in ims:
    description = ""
    if i.split("/")[-1] in sections.keys():
      print(j)
      description = describe(j,sections[i.split("/")[-1]])
    else:
      description = describe(j, "")
    data["images"].append(j)
    data["descriptions"].append(description)


image 1/1 /content/drive/MyDrive/nlp_project_elte/data/165671/IMG_20241112_133308_anonimized.jpg: 480x640 2 Automobiles, 243.8ms
Speed: 5.7ms preprocess, 243.8ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 /content/drive/MyDrive/nlp_project_elte/data/165671/IMG_20241112_133319_anonimized.jpg: 480x640 3 Automobiles, 257.8ms
Speed: 5.7ms preprocess, 257.8ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 /content/drive/MyDrive/nlp_project_elte/data/165671/IMG_20241112_133327_anonimized.jpg: 480x640 1 Automobile, 260.4ms
Speed: 7.2ms preprocess, 260.4ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 /content/drive/MyDrive/nlp_project_elte/data/165671/IMG_20241112_133336_anonimized.jpg: 480x640 1 Automobile, 168.8ms
Speed: 4.4ms preprocess, 168.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 /content/drive/MyDrive/nlp_project_elte/data/165671/IMG_20241112_133333_anonimi

In [21]:
import pandas as pd
df = pd.DataFrame(data)

In [22]:
df.to_csv("/content/drive/MyDrive/nlp_project_elte/data2.csv")