In [None]:
!pip install -q streamlit transformers torch torchvision pillow

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m78.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m71.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m52.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m44.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
!pip install -q pyngrok

In [None]:
cd /content/drive/MyDrive/GenAI_Project/

/content/drive/MyDrive/GenAI_Project


In [None]:
%%writefile utils.py
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load BLIP (image captioning model)
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Load FLAN-T5 (text generation model)
t5_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
t5_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")

def generate_caption(image: Image.Image) -> str:
    inputs = blip_processor(image, return_tensors="pt")
    out = blip_model.generate(**inputs)
    caption = blip_processor.decode(out[0], skip_special_tokens=True)
    return caption

def classify_scene(caption: str) -> str:
    caption = caption.lower()
    if any(word in caption for word in ["mountain", "river", "forest", "sunset", "beach", "sky", "nature", "landscape", "tree"]):
        return "landscape"
    elif any(word in caption for word in ["person", "people", "man", "woman", "child", "face", "crowd"]):
        return "people"
    elif any(word in caption for word in ["building", "street", "city", "village", "car", "bridge"]):
        return "urban or rural"
    elif any(word in caption for word in ["abstract", "pattern", "shape", "color", "texture"]):
        return "abstract or artistic"
    else:
        return "general scene"

def style_prompt(caption: str, style: str, tone: str, length: str) -> str:
    tone_map = {
        "Neutral": "",
        "Dark": " with a dark or mysterious tone",
        "Dreamy": " in a dreamy, surreal style",
        "Humorous": " with a humorous, witty touch",
        "Sci-Fi": " imagining a futuristic, sci-fi world",
        "Romantic": " with a romantic and emotional feel",
    }

    length_map = {
        "Short": "Keep it brief.",
        "Medium": "Keep it moderately detailed.",
        "Long": "Make it detailed and elaborate.",
    }

    scene_type = classify_scene(caption)
    scene_context = f"This is likely a {scene_type}. "

    caption_clean = caption.strip().lower().capitalize()

    if style == "Poetic":
        prompt = f"{scene_context}Write a poem about the scene: '{caption_clean}'{tone_map[tone]}. {length_map[length]}"
    elif style == "Fictional":
        prompt = f"{scene_context}Write a fictional story inspired by: '{caption_clean}'{tone_map[tone]}. {length_map[length]}"
    elif style == "Interpretive":
        prompt = f"{scene_context}Interpret this image in a philosophical way based on: '{caption_clean}'{tone_map[tone]}. {length_map[length]}"
    else:
        prompt = f"{scene_context}Describe the image creatively: '{caption_clean}'{tone_map[tone]}. {length_map[length]}"

    return prompt

def generate_stylized_text(prompt: str) -> str:
    input_ids = t5_tokenizer(prompt, return_tensors="pt").input_ids
    outputs = t5_model.generate(input_ids, max_length=150, do_sample=True, top_k=50, top_p=0.95)
    result = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return result


Overwriting utils.py


In [None]:
%%writefile app.py
import streamlit as st
from PIL import Image
from utils import generate_caption, style_prompt, generate_stylized_text

st.set_page_config(page_title="ArtPrompt", layout="centered")
st.title("🎨 ArtPrompt")
st.subheader("Turn images into stories, poems, or interpretations using AI!")

uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])

if uploaded_file:
    image = Image.open(uploaded_file).convert("RGB")
    st.image(image, caption="Uploaded Image", use_column_width=True)

    style = st.selectbox("Choose a base style", ["Standard", "Poetic", "Interpretive", "Fictional"])
    tone = st.selectbox("Pick a tone or mood", ["Neutral", "Dark", "Dreamy", "Humorous", "Sci-Fi", "Romantic"])
    length = st.selectbox("Select output length", ["Short", "Medium", "Long"])


    if st.button("Generate Description"):
        with st.spinner("Generating..."):
            caption = generate_caption(image)
            prompt = style_prompt(caption, style, tone, length)
            output = generate_stylized_text(prompt)

        st.markdown("### 🧾 Generated Description")
        st.write(output)


Overwriting app.py


In [None]:
from pyngrok import conf, ngrok

conf.get_default().auth_token = "YOUR NGROK API KEY"


In [None]:
!pkill streamlit
from pyngrok import ngrok
ngrok.kill()

In [None]:
!streamlit run app.py &>/content/drive/MyDrive/GenAI_Project/logs.txt &

In [None]:
from pyngrok import ngrok
public_url = ngrok.connect("8501", "http")
print(f"App is live at 👉 {public_url}")

App is live at 👉 NgrokTunnel: "https://493c-34-125-96-155.ngrok-free.app" -> "http://localhost:8501"
