In [58]:
import os
import json
import time
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
from typing import List
import instructor
from pydantic import BaseModel

class Category(BaseModel):
    primary: str
    secondary: str

class ResultDetail(BaseModel):
    exist: str
    category: Category | None = None
    freshness: str | None = None
    description: str | None = None

class ResultList(BaseModel):
    result: List[ResultDetail] = []
    description: str

import dashscope

import re
def extract_json_content_multiline(text):
    matched_content = re.search(r"```json\s([\s\S]+?)\s```", text)
    return matched_content.group(1) if matched_content else None

gpt_prompt1='''任务简介：
- 识别每张图片中的主要物品，判断其分类并评估新旧程度。

分类标准：
- 服装：大衣、皮衣、半袖、裤子、裙子、内衣裤、秋衣类、毛衣类、工装、校服。
- 家电：空气净化器、厨房家电、家居家电。
- 乐器：电子乐器、琴类。
- 玩具：小件（手办）、大件、毛绒玩具。
- 图书：儿童绘本、课外书、小说、套装书籍、课本。
- 手机：智能手机、功能机。
- 笔记本电脑：品牌电脑、非品牌电脑。

对于没有给出分类的物品，需要你自行判断一级分类（primary）和二级分类（secondary），此时exist设置为"否"。

回答格式（JSON）,切勿返回除JSON格式外的其他内容：
{
    "exist": "是/否",
    "category": {
        "primary": "一级分类",
        "secondary": "二级分类"
    },
    "freshness": "新/旧/中等",
    "description": "具体描述"
}
'''

def describe_multiple(images, prompt=gpt_prompt1):
    response = dashscope.MultiModalConversation.call(
        model="qwen-vl-plus",
        messages=[{
            "role": "user",
            "content": [{"text": prompt},] +
            [{"image": image} for image in images]
        }]
    )
    result = response.output.choices[0].message.content[0]["text"]
    print(result)
    text = extract_json_content_multiline(result)
    print(text)
    if text is None: return ResultDetail(exist="否", description=result)
    data = json.loads(text)
    if type(data) == dict:
        item = ResultDetail(**data)
        return item
    #elif type(data) == list and len(data) > 0:
    return data
        

In [50]:
urls = [f"https://milo-test.oss-cn-zhangjiakou.aliyuncs.com/hdd/batch1/image{i+1:03}.png" for i in range(241)]

In [51]:
from IPython.display import Image, display, HTML

In [52]:
%%time
items = []
for i in tqdm(range(241)):
    ret = describe_multiple(urls[i:i+1])
    items.append(ret)
    #time.sleep(1)

  7%|█████▋                                                                           | 17/241 [01:08<15:08,  4.05s/it]


JSONDecodeError: Extra data: line 9 column 2 (char 149)

In [53]:
print(ret, i)

exist='是' category=Category(primary='衣物', secondary='条纹衫') freshness='新' description='一件红黑相间的竖条纹短袖衬衫。' 17


In [59]:
 describe_multiple(urls[i:i+1])

```json
{
    "exist": "是",
    "category": {
        "primary": "衣物",
        "secondary": "上衣"
    },
    "freshness": "新",
    "description": "灰色高领长袖衬衫"
},
```
{
    "exist": "是",
    "category": {
        "primary": "衣物",
        "secondary": "上衣"
    },
    "freshness": "新",
    "description": "灰色高领长袖衬衫"
},


JSONDecodeError: Extra data: line 9 column 2 (char 149)

In [9]:
table = "<table><tr><th>Image</th><th>Result</th></tr>"
for i in range(1):
    table += f"<tr><td><img src='{urls[i]}' width='100'></td><td>{ret}</td></tr>"    

In [10]:
display(HTML(table))

Image,Result
,"exist='是' category=Category(primary='鞋子', secondary='运动鞋') freshness='旧' description='一双白色带有蓝色条纹装饰的运动鞋。'"


In [11]:
from tqdm import tqdm

In [12]:
for i in tqdm(range(100)):
    pass

100%|████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 197100.75it/s]


In [14]:
%%time
import time
time.sleep(1)

CPU times: total: 15.6 ms
Wall time: 1 s


In [26]:
display(Image(url=f"https://milo-test.oss-cn-zhangjiakou.aliyuncs.com/hdd/batch1/image{i+1:03}.png"))

In [27]:
describe_multiple(urls[i:i+1])

```json
[
    {
        "exist": "是",
        "category": {
            "primary": "图书",
            "secondary": "儿童绘本"
        },
        "freshness": "新",
        "description": "中国儿童百科全书科学与技术篇"
    },
    {
        "exist": "是",
        "category": {
            "primary": "图书",
            "secondary": "儿童绘本"
        },
        "freshness": "新",
        "description": "中国儿童百科全书文化生活篇"
    }
]
```

根据提供的信息，两张图都是关于《中国儿童百科全书》系列的两册。这两本书都属于“儿童绘本”类别，并且它们的新旧程度均为“新”。


JSONDecodeError: Extra data: line 21 column 1 (char 392)

In [28]:
s = '''```json
[
    {
        "exist": "是",
        "category": {
            "primary": "图书",
            "secondary": "儿童绘本"
        },
        "freshness": "新",
        "description": "中国儿童百科全书科学与技术篇"
    },
    {
        "exist": "是",
        "category": {
            "primary": "图书",
            "secondary": "儿童绘本"
        },
        "freshness": "新",
        "description": "中国儿童百科全书文化生活篇"
    }
]
```

根据提供的信息，两张图都是关于《中国儿童百科全书》系列的两册。这两本书都属于“儿童绘本”类别，并且它们的新旧程度均为“新”。'''

In [33]:
import re
def extract_json_content_multiline(text):
    matched_content = re.search(r"```json\s([\s\S]+?)\s```", text)
    return matched_content.group(1) if matched_content else None

In [35]:
text = extract_json_content_multiline(s)

In [37]:
item1 = json.loads(text)

In [41]:
type(item1) == dict

False

In [39]:
type(item1[0])

dict

In [48]:
ResultDetail(exist="否")

ResultDetail(exist='否', category=None, freshness=None, description=None)