In [1]:
!pip install transformers
!pip install huggingface_hub
!pip install datasets
!pip install gradio


Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m25.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [2]:
import gradio as gr
import torch
import joblib
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import time





In [3]:
# Load the trained model and processor
processor = joblib.load('/content/drive/MyDrive/fine_tuned_processor.pkl')  # Load trained model
model = joblib.load('/content/drive/MyDrive/fine_tuned_model.pkl')  # Load processor
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()  # Set model to evaluation mode

BlipForConditionalGeneration(
  (vision_model): BlipVisionModel(
    (embeddings): BlipVisionEmbeddings(
      (patch_embedding): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    )
    (encoder): BlipEncoder(
      (layers): ModuleList(
        (0-11): 12 x BlipEncoderLayer(
          (self_attn): BlipAttention(
            (dropout): Dropout(p=0.0, inplace=False)
            (qkv): Linear(in_features=768, out_features=2304, bias=True)
            (projection): Linear(in_features=768, out_features=768, bias=True)
          )
          (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): BlipMLP(
            (activation_fn): GELUActivation()
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
          )
          (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        )
      )
    )
    (post_layernorm): LayerNorm((768,), eps=1e-0

In [None]:
# Generate caption and simulate product details
def generate_description(image):
    # Preprocess the image for the model
    start_time = time.time()  # Start latency timer
    inputs = processor(images=image, return_tensors="pt").to(device)

    # Generate a description for the image
    with torch.no_grad():  # No need to compute gradients for inference
        output = model.generate(**inputs, max_length=50)
        generated_caption = processor.decode(output[0], skip_special_tokens=True)

    if "|" in generated_caption:
        parts = generated_caption.split("|")
        product_description = parts[0].strip()  # The first part contains the main description
    else:
        product_description = generated_caption.strip()

    # Calculate latency
    latency = time.time() - start_time
    return product_description, latency, gr.update(visible=True)

# Collect feedback and rating
def collect_feedback(product_description, feedback, rating):
    feedback_summary = f"Feedback: {feedback}" if feedback else "No feedback provided."
    rating_summary = f"Rating: {rating}/5" if rating else "No rating provided."
    return feedback_summary, rating_summary

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("### VisionClarity- Upload an image to generate a product description"),
    gr.Markdown("##### Provide optional feedback and rate the generated description.")

    # Step 1: Image Upload and Description Generation
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="pil", label="Upload Image")  # Removed 'live' argument
        with gr.Column():
            description_output = gr.Textbox(label="Product Name & Description", interactive=False)
            latency_output = gr.Textbox(label="Latency", interactive=False)

    # Step 2: Feedback and Rating
    with gr.Row(visible=False) as feedback_section:
        with gr.Column():
            feedback_input = gr.Textbox(label="Your Feedback")
            rating_input = gr.Slider(minimum=1, maximum=5, step=1, label="Rate the Description (1-5)")
        with gr.Column():
            feedback_output = gr.Textbox(label="User Feedback", interactive=False)
            rating_output = gr.Textbox(label="User Rating", interactive=False)

    # Logic for Step 1: Generate Description and Show Feedback Section
    image_input.change(
        fn=generate_description,
        inputs=image_input,
        outputs=[description_output, latency_output, feedback_section]
    )

    # Logic for Step 2: Collect Feedback and Rating
    feedback_input.change(
        fn=collect_feedback,
        inputs=[description_output, feedback_input, rating_input],
        outputs=[feedback_output, rating_output]
    )
    rating_input.change(
        fn=collect_feedback,
        inputs=[description_output, feedback_input, rating_input],
        outputs=[feedback_output, rating_output]
    )

# Launch the Gradio interface

demo.launch(debug=True)

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://5f7d9b6d3339053b25.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
