In [4]:
import streamlit as st
from PIL import Image
import torch
from transformers import AutoModel, AutoTokenizer

# Model path
model_path = "openbmb/MiniCPM-Llama3-V-2_5"

# User and assistant names
U_NAME = "User"
A_NAME = "Assistant"

# Set page configuration
st.set_page_config(
    page_title="MiniCPM-Llama3-V-2_5 Streamlit",
    page_icon=":robot:",
    layout="wide"
)


# Load model and tokenizer
@st.cache_resource
def load_model_and_tokenizer():
    print(f"load_model_and_tokenizer from {model_path}")
    model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.float16).to(device="cuda")
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
    return model, tokenizer


# Initialize session state
if 'model' not in st.session_state:
    st.session_state.model, st.session_state.tokenizer = load_model_and_tokenizer()
    st.session_state.model.eval()
    print("model and tokenizer had loaded completed!")

# Initialize session state
if 'chat_history' not in st.session_state:
    st.session_state.chat_history = []

# Sidebar settings
sidebar_name = st.sidebar.title("MiniCPM-Llama3-V-2_5 Streamlit")
max_length = st.sidebar.slider("max_length", 0, 4096, 2048, step=2)
repetition_penalty = st.sidebar.slider("repetition_penalty", 0.0, 2.0, 1.05, step=0.01)
top_p = st.sidebar.slider("top_p", 0.0, 1.0, 0.8, step=0.01)
top_k = st.sidebar.slider("top_k", 0, 100, 100, step=1)
temperature = st.sidebar.slider("temperature", 0.0, 1.0, 0.7, step=0.01)

# Clear chat history button
buttonClean = st.sidebar.button("Clear chat history", key="clean")
if buttonClean:
    st.session_state.chat_history = []
    st.session_state.response = ""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    st.rerun()

# Display chat history
for i, message in enumerate(st.session_state.chat_history):
    if message["role"] == "user":
        with st.chat_message(name="user", avatar="user"):
            if message["image"] is not None:
                st.image(message["image"], caption='User uploaded image', width=448, use_column_width=False)
                continue
            elif message["content"] is not None:
                st.markdown(message["content"])
    else:
        with st.chat_message(name="model", avatar="assistant"):
            st.markdown(message["content"])

# Select mode
selected_mode = st.sidebar.selectbox("Select mode", ["Text", "Image"])
if selected_mode == "Image":
    # Image mode
    uploaded_image = st.sidebar.file_uploader("Upload image", key=1, type=["jpg", "jpeg", "png"],
                                              accept_multiple_files=False)
    if uploaded_image is not None:
        st.image(uploaded_image, caption='User uploaded image', width=468, use_column_width=False)
        # Add uploaded image to chat history
        st.session_state.chat_history.append({"role": "user", "content": None, "image": uploaded_image})

# User input box
user_text = st.chat_input("Enter your question")
if user_text:
    with st.chat_message(U_NAME, avatar="user"):
        st.session_state.chat_history.append({"role": "user", "content": user_text, "image": None})
        st.markdown(f"{U_NAME}: {user_text}")

    # Generate reply using the model
    model = st.session_state.model
    tokenizer = st.session_state.tokenizer
    imagefile = None

    with st.chat_message(A_NAME, avatar="assistant"):
        # If the previous message contains an image, pass the image to the model
        if len(st.session_state.chat_history) > 1 and st.session_state.chat_history[-2]["image"] is not None:
            uploaded_image = st.session_state.chat_history[-2]["image"]
            imagefile = Image.open(uploaded_image).convert('RGB')

        msgs = [{"role": "user", "content": user_text}]
        res = model.chat(image=imagefile, msgs=msgs, context=None, tokenizer=tokenizer,
                         sampling=True, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty,
                         temperature=temperature, stream=True)

        # Collect the generated_text str
        generated_text = st.write_stream(res)

        st.session_state.chat_history.append({"role": "model", "content": generated_text, "image": None})

    st.divider()

2024-09-13 23:45:58.326 
  command:

    streamlit run /Users/paakhim10/anaconda3/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]


load_model_and_tokenizer from openbmb/MiniCPM-Llama3-V-2_5




config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

configuration_minicpm.py:   0%|          | 0.00/4.06k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5:
- configuration_minicpm.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_minicpmv.py:   0%|          | 0.00/13.9k [00:00<?, ?B/s]

resampler.py:   0%|          | 0.00/35.8k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5:
- resampler.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5:
- modeling_minicpmv.py
- resampler.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/62.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/7 [00:00<?, ?it/s]

model-00001-of-00007.safetensors:   0%|          | 0.00/2.44G [00:00<?, ?B/s]

model-00002-of-00007.safetensors:   0%|          | 0.00/2.42G [00:00<?, ?B/s]

model-00003-of-00007.safetensors:   0%|          | 0.00/2.50G [00:00<?, ?B/s]

model-00004-of-00007.safetensors:   0%|          | 0.00/2.50G [00:00<?, ?B/s]

model-00005-of-00007.safetensors:   0%|          | 0.00/2.42G [00:00<?, ?B/s]

model-00006-of-00007.safetensors:   0%|          | 0.00/2.50G [00:00<?, ?B/s]

model-00007-of-00007.safetensors:   0%|          | 0.00/2.30G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]



AssertionError: Torch not compiled with CUDA enabled

In [3]:
import pytesseract
from pytesseract import Output
import cv2
img = cv2.imread('sampleimage2.png')

d = pytesseract.image_to_data(img, output_type=Output.DICT)
n_boxes = len(d['level'])
for i in range(n_boxes):
    (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
    cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)

cv2.imshow('img', img)
cv2.waitKey(0)

In [9]:
import pandas as pd

# Read the CSV files
df1 = pd.read_csv("../dummy.csv")
df2 = pd.read_csv("../resource/dataset/test.csv")

# Ensure both DataFrames have 'index' columns and are reset
df1 = df1.reset_index(drop=True)
df2 = df2.reset_index(drop=True)

# Find how many rows are missing in df1 compared to df2
rows_to_add = len(df2) - len(df1)

# If df1 is smaller than df2, add empty rows
if rows_to_add > 0:
    # Create a DataFrame of empty rows with the same 'index' as df2's extra rows
    empty_rows = pd.DataFrame({
        'index': df2['index'][-rows_to_add:],  # take the last few indices from df2
        'prediction': '' * rows_to_add     # fill with None (or use NaN)
    })
    
    # Append the empty rows to df1
    df1 = pd.concat([df1, empty_rows], ignore_index=True)

# Print the updated df1
print(df1)

         index prediction
0            0           
1            1           
2            2           
3            3           
4            4           
...        ...        ...
131182  131283           
131183  131284           
131184  131285           
131185  131286           
131186  131287           

[131187 rows x 2 columns]


In [5]:
df1 = pd.read_csv("../output_final.csv")
df2 = pd.read_csv("../resource/dataset/test.csv")

ParserError: Error tokenizing data. C error: Expected 2 fields in line 21, saw 3


In [39]:
print(df2[df1['prediction']!=df2['prediction']]['index'])
print(df1[df1['prediction']!=df2['prediction']]['index'])

Series([], Name: index, dtype: int64)
Series([], Name: index, dtype: int64)


In [10]:
df1.to_csv("output_3.csv", index = False)

In [5]:
import pandas as pd

df1 = pd.read_csv("./training/training_ocr_out.csv")
df2 = pd.read_csv("./../resource/dataset/train.csv")

df2 = df2.iloc[:3930]

In [10]:
df2['index'] = df2.index

In [13]:
df = pd.merge(df1, df2, on=['index'])
df.head()

Unnamed: 0,index,prediction,image_link,group_id,entity_name,entity_value
0,0,"['PROPOS', 'NATURE', 'INGREDIENT MENAGER', 'MU...",https://m.media-amazon.com/images/I/61I9XdN6OF...,748919,item_weight,500.0 gram
1,1,"['TEARRIFIC', 'LEBENSMITTELECHT', 'HDAY', 'GEP...",https://m.media-amazon.com/images/I/71gSRbyXmo...,916768,item_volume,1.0 cup
2,2,"['COMPOSITION', 'Serving Size:1 Tablet 0.709 g...",https://m.media-amazon.com/images/I/61BZ4zrjZX...,459516,item_weight,0.709 gram
3,3,['WarningConsult your physician before using t...,https://m.media-amazon.com/images/I/612mrlqiI4...,459516,item_weight,0.709 gram
4,4,"['Horbaach', 'HIGHSTRENGTH', 'PSYLLIOM', 'HUSK...",https://m.media-amazon.com/images/I/617Tl40LOX...,731432,item_weight,1400 milligram


In [15]:
df = df.drop(['group_id', 'image_link'], axis=1)

In [17]:
df.to_csv("train_ocr.csv", index=False)

In [18]:
import pandas as pd

In [20]:
data = pd.read_csv('./predictions.csv')

In [21]:
data.head()

Unnamed: 0,index,entity_name,prediction,actual_entity_value,text
0,0,item_weight,500.0 gram,500.0 gram,"['PROPOS', 'NATURE', 'INGREDIENT MENAGER', 'MU..."
1,1,item_volume,,1.0 cup,"['TEARRIFIC', 'LEBENSMITTELECHT', 'HDAY', 'GEP..."
2,2,item_weight,0.709 gram,0.709 gram,"['COMPOSITION', 'Serving Size:1 Tablet 0.709 g..."
3,3,item_weight,0.51 gram,0.709 gram,['WarningConsult your physician before using t...
4,4,item_weight,,1400 milligram,"['Horbaach', 'HIGHSTRENGTH', 'PSYLLIOM', 'HUSK..."


In [25]:
df = data[data['prediction']!=data['actual_entity_value']]

In [29]:
print(df['text'])

1       ['TEARRIFIC', 'LEBENSMITTELECHT', 'HDAY', 'GEP...
4       ['Horbaach', 'HIGHSTRENGTH', 'PSYLLIOM', 'HUSK...
5       ['Horbaach', 'HIGHSTRENGTH', 'Naturally-Source...
6       ['Horbaach', 'Directions: For adults, take two...
                              ...                        
3914    ['36KG', 'NETWEIGHT', '45KG', 'GROSS WEIGHT', ...
3915    ['36KG', 'NETWEIGHT', '45KG', 'GROSS WEIGHT', ...
3916    ['U.S.FOOD&DRUG', 'FDA', 'REGD.No.18823072222'...
3917    ['MUSCLE BUILDING', 'ISOLATE', 'PREMIUM', 'GOL...
3918    ['25GM', 'SOLATE', 'PROTEIN', 'GOLD', 'MWHE', ...
Name: text, Length: 2437, dtype: object


In [5]:
import pandas as pd

all_data = pd.read_csv("./../resource/dataset/test.csv")

In [6]:
df1 = all_data[:30000]
df2 = all_data[30001:60000]
df3 = all_data[60001:90000]

In [8]:
df1.to_csv("./split/data1.csv", index= False)
df2.to_csv("./split/data2.csv", index=False)
df3.to_csv("./split/data3.csv", index=False)