added genre and story style input params

sssingh · Oct 4, 2023 · 3253a7e · 3253a7e
1 parent 65f6ddd
commit 3253a7e
Show file tree

Hide file tree

Showing 6 changed files with 140 additions and 46 deletions.
diff --git a/README.md b/README.md
@@ -30,14 +30,16 @@ BLIP Image2Text model details can be found [here](https://huggingface.co/Sof22/i
 
 * It's important to note that this sample demonstration app is hosted on the free tiers of Huggingface Spaces, which means it is functional but may exhibit slower performance.
 * Additionally, when using the app for the first time or after an extended period (more than 1 hour), you might encounter an "Internal Error" message or receive a story unrelated to the provided image. This is a normal occurrence during the model loading process. Please wait a few seconds and try again; it should function as intended.
-* Please be aware that due to cost and resource constraints, the app currently has a maximum story length limit of 100 words per request.
+* Please be aware that due to cost and resource constraints, the app currently has a maximum story length limit of 200 words per request.
 
 App UI is shown below:
 
 <img src="https://github.com/sssingh/pic-to-story/blob/main/assets/story-teller-app.png?raw=true" width="1000" height="450"/><br><br> 
 
 **Dark Mode Toggle**: Activate it to switch between dark and light mode.  
 **Image Selector**: Click on it to pick an image from your computer, or drag and drop an image onto it directly. Click the 'X' to clear the selection and resets the app.  
+**Story Genre Dropdown**: Select the desired story genre from the dropdown list.  
+**Story Writing Style Dropdown**: Select the desired story writing style from the dropdown list.  
 **Story Length (in words) Slider**: Adjust the slider to specify the desired length of the generated story.  
 **Creativity Index Slider**: Modify the slider to indicate the desired level of creativity for the generated story. A range between 0.5 and 0.7 is recommended. Setting it to 1.0 results in highly creative, sometimes amusing output.  
 **Generate Story Button**: Press this button to initiate the story generation process.  

diff --git a/app.py b/app.py
@@ -30,21 +30,25 @@ def create_interface():
                 api_name=False,
             )
         with gr.Row():
-            with gr.Column(scale=5):
+            with gr.Column():
                 gr.Markdown(
                     """
-                    # Storyteller
+                    # The Storyteller
                     **This app can craft captivating narratives from captivating images, 
-                    potentially surpassing even Shakespearean standards. Select an image 
-                    that inspires a story, choose a story length (up to 100 words), and 
-                    adjust the creativity index to enhance its creative flair.**  
+                    potentially surpassing even Shakespearean standards.  
+                    <br>
+                    Select an `Image` that inspires a story, choose a `Story Genre`, 
+                    `Story Writing Style`, `Story Length (up to 200 words)`, and 
+                    adjust the `Creativity Index` to enhance its creative flair. Then 
+                    hit `Generate Story` button.
+                    Alternatively, just select one the pre-configured `Examples`**  
                     <br>
-                    ***Please exercise patience, as the models employed are extensive and may
-                    require a few seconds to load. If you encounter an unrelated story, 
-                    it is likely still loading; wait a moment and try again.***
+                    ***Please exercise patience, as the models employed are extensive 
+                    and may require a few seconds to load. If you encounter an unrelated 
+                    story, it is likely still loading; wait a moment and try again.***
                     """
                 )
-            with gr.Column(scale=2):
+            with gr.Column():
                 max_count = gr.Textbox(
                     label="Max allowed OpenAI requests:",
                     value=app_config.openai_max_access_count,
@@ -63,21 +67,34 @@ def create_interface():
                 image = gr.Image(
                     type="filepath",
                 )
-                # Word Count Slider
-                word_count = gr.Slider(
-                    label="Story Length (words):",
-                    minimum=25,
-                    maximum=100,
-                    value=50,
-                    step=5,
-                )
-                creativity = gr.Slider(
-                    label="Creativity Index:",
-                    minimum=0.3,
-                    maximum=1.0,
-                    value=0.7,
-                    step=0.1,
-                )
+                with gr.Row():
+                    with gr.Column():
+                        genre = gr.Dropdown(
+                            label="Story Genre: ",
+                            value="Poetry",
+                            choices=app_config.genre,
+                        )
+                        style = gr.Dropdown(
+                            label="Story Writing Style:",
+                            value="Cinematic",
+                            choices=app_config.writing_style_list,
+                        )
+                    with gr.Column():
+                        # Word Count Slider
+                        word_count = gr.Slider(
+                            label="Story Length (words):",
+                            minimum=30,
+                            maximum=200,
+                            value=50,
+                            step=10,
+                        )
+                        creativity = gr.Slider(
+                            label="Creativity Index:",
+                            minimum=0.3,
+                            maximum=1.0,
+                            value=0.7,
+                            step=0.1,
+                        )
                 with gr.Row():
                     submit_button = gr.Button(
                         value="Generate Story", elem_classes="orange-button"
@@ -87,24 +104,56 @@ def create_interface():
                 story = gr.Textbox(
                     label="Story:",
                     placeholder="Generated story will appear here.",
-                    lines=12,
+                    lines=21,
                 )
         with gr.Row():
             with gr.Accordion("Expand for examples:", open=False):
                 gr.Examples(
                     examples=[
-                        ["assets/examples/cheetah-deer.jpg", 50, 0.5],
-                        ["assets/examples/man-child-pet-dog.jpg", 100, 0.6],
-                        ["assets/examples/man-child.jpeg", 60, 1.0],
-                        ["assets/examples/men-fighting.jpg", 50, 0.4],
-                        ["assets/examples/teacher-school.jpg", 80, 0.7],
+                        [
+                            "assets/examples/cheetah-deer.jpg",
+                            "Horror",
+                            "Narrative",
+                            80,
+                            0.5,
+                        ],
+                        [
+                            "assets/examples/man-child-pet-dog.jpg",
+                            "Fiction",
+                            "Formal",
+                            100,
+                            0.6,
+                        ],
+                        [
+                            "assets/examples/man-child.jpeg",
+                            "Children Literature",
+                            "Symbolic",
+                            120,
+                            1.0,
+                        ],
+                        [
+                            "assets/examples/men-fighting.jpg",
+                            "Comedy",
+                            "Experimental",
+                            60,
+                            0.4,
+                        ],
+                        [
+                            "assets/examples/teacher-school.jpg",
+                            "Surrealism",
+                            "Non-linear",
+                            80,
+                            0.7,
+                        ],
                     ],
-                    inputs=[image, word_count, creativity],
-                    outputs=[story],
+                    fn=model.generate_story,
+                    inputs=[image, genre, style, word_count, creativity],
+                    outputs=[story, max_count, curr_count, available_count],
+                    run_on_click=True,
                 )
         submit_button.click(
             fn=model.generate_story,
-            inputs=[image, word_count, creativity],
+            inputs=[image, genre, style, word_count, creativity],
             outputs=[story, max_count, curr_count, available_count],
         )
         clear_button.click(

diff --git a/assets/story-teller-app.png b/assets/story-teller-app.png
diff --git a/assets/story-teller-examples.png b/assets/story-teller-examples.png
diff --git a/config.py b/config.py
@@ -17,6 +17,41 @@ class AppConfig:
     OPENAI_KEY = os.getenv("OPENAI_KEY")
     I2T_API_URL = os.getenv("I2T_API_URL")
     MONGO_CONN_STR = os.getenv("MONGO_CONN_STR")
+    genre_list = genre = [
+        "Adventure",
+        "Children Literature",
+        "Comedy",
+        "Drama",
+        "Fantasy",
+        "Fiction",
+        "Horror",
+        "Mystery",
+        "Non-fiction",
+        "Poetry",
+        "Romance",
+        "Satire",
+        "Surrealism",
+        "Urban Fantasy",
+    ]
+    writing_style_list = [
+        "Cinematic",
+        "Conversational",
+        "Descriptive",
+        "Experimental",
+        "First-Person",
+        "Formal",
+        "Informal",
+        "Metaphorical",
+        "Minimalist",
+        "Narrative",
+        "Non-linear",
+        "Objective",
+        "Sensory",
+        "Stream of Consciousness",
+        "Symbolic",
+        "Third-Person Limited",
+        "Third-Person Omniscient",
+    ]
 
 
 app_config = AppConfig()
diff --git a/model.py b/model.py
@@ -22,7 +22,7 @@ def __image2text(image):
     return response
 
 
-def __text2story(image_desc, word_count, creativity):
+def __text2story(image_desc, genre, style, word_count, creativity):
     """ "Generates a short story based on image description text prompt"""
     ## chat LLM model
     story_model = ChatOpenAI(
@@ -32,25 +32,29 @@ def __text2story(image_desc, word_count, creativity):
     )
     ## chat message prompts
     sys_prompt = PromptTemplate(
-        template="You are an expert storyteller that can generate"
-        + " imaginative {word_count} words long story from the input text",
-        input_variables=["word_count"],
+        template="""You are an expert story writer, write a maximum of {word_count} 
+        words long story in {genre} genre in {style} writing style, based on the user 
+        provided story-context.
+        """,
+        input_variables=["word_count", "genre", "style"],
     )
     system_msg_prompt = SystemMessagePromptTemplate(prompt=sys_prompt)
     human_prompt = PromptTemplate(
-        template="{image_desc}", input_variables=["image_desc"]
+        template="story-context: {context}", input_variables=["context"]
     )
     human_msg_prompt = HumanMessagePromptTemplate(prompt=human_prompt)
     chat_prompt = ChatPromptTemplate.from_messages(
         [system_msg_prompt, human_msg_prompt]
     )
     ## LLM chain
     story_chain = LLMChain(llm=story_model, prompt=chat_prompt)
-    response = story_chain.run(image_desc=image_desc, word_count=word_count)
+    response = story_chain.run(
+        genre=genre, style=style, word_count=word_count, context=image_desc
+    )
     return response
 
 
-def generate_story(image_file, word_count, creativity):
+def generate_story(image_file, genre, style, word_count, creativity):
     """Generates a story given an image"""
     # read image as bytes arrayS
     with open(image_file, "rb") as f:
@@ -59,7 +63,11 @@ def generate_story(image_file, word_count, creativity):
     image_desc = __image2text(image=input_image)
     # generate story from caption
     story = __text2story(
-        image_desc=image_desc, word_count=word_count, creativity=creativity
+        image_desc=image_desc,
+        genre=genre,
+        style=style,
+        word_count=word_count,
+        creativity=creativity,
     )
     # increment the openai access counter and compute count stats
     mongo.increment_curr_access_count()