From 9d3915d2a82ada023cd6f6f3b0f8ad381dc3641f Mon Sep 17 00:00:00 2001 From: Jessica Garson Date: Tue, 19 Aug 2025 10:52:45 -0400 Subject: [PATCH] Update supporting blog content (#481) --- .../using-ragas-with-elasticsearch/README.md | 51 ++ .../using-ragas-with-elasticsearch/books.json | 27 + .../ragas-elasticsearch-demo.ipynb | 801 ++++++++++++++++++ .../ragas_evaluation.csv | 5 + 4 files changed, 884 insertions(+) create mode 100644 supporting-blog-content/using-ragas-with-elasticsearch/README.md create mode 100644 supporting-blog-content/using-ragas-with-elasticsearch/books.json create mode 100644 supporting-blog-content/using-ragas-with-elasticsearch/ragas-elasticsearch-demo.ipynb create mode 100644 supporting-blog-content/using-ragas-with-elasticsearch/ragas_evaluation.csv diff --git a/supporting-blog-content/using-ragas-with-elasticsearch/README.md b/supporting-blog-content/using-ragas-with-elasticsearch/README.md new file mode 100644 index 00000000..f288b9ec --- /dev/null +++ b/supporting-blog-content/using-ragas-with-elasticsearch/README.md @@ -0,0 +1,51 @@ +# Using Ragas with Elasticsearch +Ragas is an evaluation framework that enables you to gain a deeper understanding of how your LLM application performs. Using evaluation methods, such as the Ragas framework, can help you determine if your LLM application is performing as intended and provide a sense of its accuracy. It can enable data-driven decisions about model selection, prompt engineering effectiveness, and retrieval system optimization. + +This repository contains a demo using a sample book dataset and Elasticsearch. + +## Setting up +- The version of Python that is used is Python 3.12.1 but you can use any version of Python higher than 3.10. +- This demo uses Elasticsearch version 9.0.3, but you can use any version of Elasticsearch that is higher than 8.10. +- You will also need an OpenAI API key for LLM-based metrics. You will want to configure an environment variable for your OpenAI API Key, which you can find on the API keys page in [OpenAI's developer portal](https://platform.openai.com/api-keys). + +## Stucture of the example +- **[books.json](books.json)**: The dataset used containing an sample of books. This is a subset of 25 books from Goodreads including the book title, the author's name, book description, publication year, and a Goodreads URL. +- **[ragas-elasticsearch-demo.ipynb](ragas-elasticsearch-demo.ipynb)**: Main Jupyter notebook for running ragas evaluations. It sets up the environment, loads data, runs sample queries, and computes evaluation metrics (context precision, faithfulness, answer relevancy) using ragas. +- **[ragas_evaluation.csv](ragas_evaluation.csv)**: Output file generated by the notebook, containing detailed results for each evaluation query, including metrics for context precision, faithfulness, and answer relevancy. + +## Changing the model +This example is using `GPT-4o`, you can easily change the model to another by adjusting the parameter `model=”model name”`. + +```python +chat_llm = ChatOpenAI( + model="gpt-4o", + temperature=0.1, + api_key=API_KEY +) +``` + +## Usage + +1. **Install Dependencies** + The notebook will install required dependencies automatically, but you can also install them manually: + + ```bash + pip install -q ragas datasets langchain elasticsearch openai langchain-openai + ``` + +3. **Run the Notebook** + Open `ragas-elasticsearch-demo.ipynb` in Jupyter and follow the instructions to run each cell. The notebook will: + + - Query your book index (via Elasticsearch) + - Run sample RAG queries + - Evaluate the responses using Ragas + - Output results to `ragas_evaluation.csv` + +5. **View Results** + + The results file contains detailed metrics for each test query. Use it to analyze the quality of your RAG pipeline and compare different configurations. The results will also be in a `csv` file. + + +## Troubleshooting +- Your model will need to be deployed first before the following code could run. To learn more about this be sure to check out our [documention on the subject](https://www.elastic.co/docs/explore-analyze/machine-learning/nlp/ml-nlp-deploy-model). +- If you encounter any problems running this in Colab or locally, it might be due to the dataset requiring a separate download. It can be found in the same folder as this example. diff --git a/supporting-blog-content/using-ragas-with-elasticsearch/books.json b/supporting-blog-content/using-ragas-with-elasticsearch/books.json new file mode 100644 index 00000000..5bc2fbea --- /dev/null +++ b/supporting-blog-content/using-ragas-with-elasticsearch/books.json @@ -0,0 +1,27 @@ +[ + {"book_title": "Lucky 7", "author_name": "Rae D. Magdon", "rating_score": 4.34, "rating_votes": 206, "review_number": 57, "book_description": "Elena Nevares is on the run. She\u2019s a jacker, someone who connects to virtual reality with their brain, and everyone else on her crew was murdered during a mission gone wrong. Sasha Young is planning a rescue. She\u2019s a handler, a team leader whose crew has been scattered by an evil corporation: Axys Generations. Together, they must find the rest of Sasha\u2019s crew: Cherry, the engineer and explosives expert; Rami, the master of disguise; Doc, the wunderkind Medical Officer; and Rock, the mechanically modified muscle. But Axys Generations has bigger plans than taking down Sasha\u2019s crew. Elena, Sasha, and the rest of the Lucky 7 must go on their most dangerous mission yet\u2014not for credits or tech, but to save the world.", "year_published": 2018, "genres": ["Science Fiction", "LGBT", "GLBT (Lesbian)", "Science Fiction (Cyberpunk)", "Romance", "Action", "Adult Fiction (Erotica)", "GLBT (Queer)", "Mystery (Crime)", "Adventure"], "url": "https://www.goodreads.com/book/show/38390922-lucky-7"}, + {"book_title": "Salvation Lost", "author_name": "Peter F. Hamilton", "rating_score": 4.34, "rating_votes": 5131, "review_number": 342, "book_description": "The comparative utopia of twenty-third century Earth is about to go dreadfully awry when a seemingly benign alien race is abruptly revealed to be one of the worst threats humanity has ever faced. Driven by an intense religious extremism, the Olyix are determined to bring everyone to their version of god as they see it. But they may have met their match in humanity, who are not about to go gently into that good night or spend the rest of their days cowering in hiding. As human ingenuity and determination rises to the challenge, collective humanity has only one goal--to wipe this apparently undefeatable enemy from the face of creation. Even if it means playing a ridiculously long game indeed.But in a chaotic universe, it is hard to plan for every eventuality, and it is always darkest before the dawn.", "year_published": 2019, "genres": ["Science Fiction", "Space (Space Opera)", "Fiction", "Science Fiction Fantasy", "Science Fiction (Aliens)", "Audiobook", "Space", "Novels", "Fantasy", "Science Fiction (Hard Science Fiction)"], "url": "https://www.goodreads.com/book/show/44020966-salvation-lost"}, + {"book_title": "Alien Warrior's Mate", "author_name": "Vi Voxley", "rating_score": 3.74, "rating_votes": 492, "review_number": 54, "book_description": "He's damn impossible and he won't stop at anything to claim her... Deliya is one of the best of the best. An elite warrior, few things have ever phased her. She works hard and plays hard, all for the day when fates decide it's time for her to meet her fated mate. But what if there's one man in this blasted universe who just won't stop getting under her skin? What if she can't help but lust after him? Fates be damned, isn't that what's important? Driven to always succeed, Darien is one of the commander's chosen few. Above all his fellow warriors, he serves at the leisure of his general. But lately, someone else has been commanding his time... Mad with need to claim her sexy body as his own, Darien will not stop at anything in his quest to keep Deliya safe. And more importantly, as his own. Fates be damned, he will have that woman. When death lurks behind every stone and in the hands of every enemy they encounter, can Darien and Deliya find their love, or will death claim them first? Alien Warrior's Mate is a standalone book with a happy ever after ending and no cliffhanger. There's plenty of steam in this book and unless you love pantymeltingly hot sex, you might want to steer clear... Or at least brace yourself for a hell of a ride!", "year_published": 2015, "genres": ["Science Fiction (Aliens)", "Romance", "Science Fiction", "Romance (Science Fiction Romance)", "Futuristic", "Adult Fiction (Erotica)", "War (Military Fiction)", "Space (Space Opera)", "Adult", "Short Stories"], "url": "https://www.goodreads.com/book/show/27411139-alien-warrior-s-mate"}, + {"book_title": "On the Steel Breeze", "author_name": "Alastair Reynolds", "rating_score": 4.03, "rating_votes": 5909, "review_number": 353, "book_description": "It is a thousand years in the future. Mankind is making its way out into the universe on massive generation ships.On the Steel Breeze is the follow-up to Blue Remembered Earth. It is both a sequel and a standalone novel, which just happens to be set in the same universe and revolves around members of the Akinya family.The central character, Chiku, is totally new, although she is closely related to characters in the first book. The action involves a 220-year expedition to an extrasolar planet aboard a caravan of huge iceteroid 'holoships', the tension between human and artificial intelligence... and, of course, elephants.Lots of elephants.", "year_published": 2013, "genres": ["Science Fiction", "Fiction", "Space (Space Opera)", "Audiobook", "Science Fiction (Hard Science Fiction)", "Science Fiction Fantasy", "Adventure", "Space", "Speculative Fiction", "Science Fiction (Aliens)"], "url": "https://www.goodreads.com/book/show/15999018-on-the-steel-breeze"}, + {"book_title": "Salvage Marines", "author_name": "Sean-Michael Argo", "rating_score": 3.56, "rating_votes": 316, "review_number": 29, "book_description": "To protect his young family from merciless corporate tyranny, Samuel Hyst signs up for the hazardous life of a salvage marine, the foot soldiers of a militarized deep space operation. Hazard pay is high for a reason, and Samuel must endure grueling tours of duty in the abandoned scrapyard quadrants of a war-torn universe. Endless wars rage across every galaxy, leaving damaged starships, rusted factories, and broken planets in their wake, all ripe for plunder by the desperate and the daring. Samuel must seize his destiny in frenzied combat against murderous scavengers, horrific monsters, and the elite mercenaries of rival corporations. Left with no choice but to fight, he must win freedom with a combat rifle and his blowtorch. ", "year_published": 2015, "genres": ["Science Fiction", "War (Military Fiction)", "Science Fiction (Military Science Fiction)"], "url": "https://www.goodreads.com/book/show/29952204-salvage-marines"}, + {"book_title": "Trade Secret", "author_name": "Sharon Lee", "rating_score": 4.07, "rating_votes": 1285, "review_number": 125, "book_description": "The Liaden Universe \u00ae Saga Continues! Star-spanning galactic trader Jethri Gobelyn adventures are the focus in book 17 by master storytellers Sharon Lee and Steve Miller. Sequel to Trade Pact.In a universe full of interstellar intrigue and burgeoning commerce novice Terran trader Jethri Gobelyn, adopted by a Liaden clan after an ill-directed bow of honor insulted the scion of a major Liaden house, is alive and whole to tell the tale. Convinced that the adoption has saved his life and made his future he settles into a comfortable and even elite routine, a Trader's Ring his goal.Even as Jethri's initiation into the mysteries and joys of Liaden Festival bring him to manhood he's forced to face Necessity and the facts of life: his adoption has also invigorated a net of unfinished Balance far more complex and potentially deadly than a simple Terran blood feud. He must embrace his Terran birthright as well as his Liaden connections while leaving behind the safety of the great Liaden trade ship Elthoria to defend his honor and that of shipmates past and present. Forced to sit Second Board as a back-up pilot on a Liaden Scout ship, Jethri's convinced he's already at wit's end\u2014when several familiar faces threaten all that he knows of himself, and all that he wishes to do.Celebrating 25 Years of the Liaden Universe\u00ae", "year_published": 2012, "genres": ["Science Fiction", "Space (Space Opera)", "Fiction", "Fantasy", "Science Fiction Fantasy", "Space", "Audiobook", "Romance", "Adventure", "Speculative Fiction"], "url": "https://www.goodreads.com/book/show/13544081-trade-secret"}, + {"book_title": "There Will Be Time", "author_name": "Poul Anderson", "rating_score": 3.67, "rating_votes": 1058, "review_number": 79, "book_description": " Time travel is impossible! There is no machine that can take you into time, past or future. But what if you are born a time traveler? Jack Havig did not know how he could cross the centuries merely by willing himself to. But the fact remained, he could. And, thought Jack, if I can travel through time, there must be others!So Jack Havig, human being extraordinaire, set out to see the world--the world of ancient Rome, of the Byzantine empire, of the American Indian tribes & ultimately the world of the future.Seeing the future, Jack found meaning in his life and a reason for his gift. He must seek out others like himself throughout the centuries and together they must try to affect the future of humankind. For that future threatened the extinction of humanity's entire civilization...", "year_published": 1972, "genres": ["Science Fiction", "Science Fiction (Time Travel)", "Fiction", "Fantasy", "Science Fiction Fantasy", "Literature (American)", "Speculative Fiction", "Anthologies (Collections)"], "url": "https://www.goodreads.com/book/show/602758.There_Will_Be_Time"}, + {"book_title": "Only in Death", "author_name": "Dan Abnett", "rating_score": 4.35, "rating_votes": 2109, "review_number": 55, "book_description": "As the crusade to liberate the Sabbat Worlds continues, Colonel-Commissar Gaunt leads the Tanith First-and-Only into an unforgiving new warzone - the fortress world of Jago.", "year_published": 2007, "genres": ["Science Fiction", "40k", "Fiction", "Fantasy", "Science Fiction (Military Science Fiction)", "War", "War (Military Fiction)", "Horror", "Science Fiction Fantasy", "Space (Space Opera)"], "url": "https://www.goodreads.com/book/show/1052306.Only_in_Death"}, + {"book_title": "His To Claim", "author_name": "Taylor Vaughn", "rating_score": 4.05, "rating_votes": 2201, "review_number": 335, "book_description": "\n\n\nMy alien overlord is huge, all-powerful, and determined to claim me!\n\n\nEvery human girl growing up on New Terrhan knows one thing\u2026 that she will be taken by one or several Xalthurian males when she turns twenty-one. The Xalthurians are rough, domineering, and huge all over. But, this is the deal our leaders struck, so that they would provide our human colony with desperately needed supplies...\nIf the baby born from our taking is a girl, we will be allowed to keep it. If the baby is a boy, it will be taken away, never to be seen again. That is the way it has always been, on our small planet and that is the way it will always be...\nI should accept my fate, but I can\u2019t! When they come for my sister\u2019s baby boy, I dare to defy the alien, they call \u201cTel\u201d. As it turns out, \u201cTel\u201d means prince. But rather than killing me for my insolence, the future overlord vows to return to the planet. When I am twenty-one...\n\n\nHe\u2019s huge, all-powerful, and determined to claim me!\n\n\n\n\nCan I stop him?\n\n\n\n\nAnd how will I answer when the question becomes, do I want to?\n\n\n", "year_published": 2019, "genres": ["Science Fiction (Aliens)", "Romance", "Science Fiction", "Adult Fiction (Erotica)", "Romance (Science Fiction Romance)", "Fantasy (Paranormal)", "Fantasy", "Romance (Paranormal Romance)", "Romance (Interracial Romance)", "Romance (Erotic Romance)"], "url": "https://www.goodreads.com/book/show/44645149-his-to-claim"}, + {"book_title": "Savage Drift", "author_name": "Emmy Laybourne", "rating_score": 4.07, "rating_votes": 6226, "review_number": 710, "book_description": "The stunningly fierce conclusion to Emmy Laybourne's Monument 14 trilogy.The survivors of the Monument 14 have finally made it to the safety of a Canadian refugee camp. Dean and Alex are cautiously starting to hope that a happy ending might be possible.But for Josie, separated from the group and trapped in a brutal prison camp for exposed Type Os, things have gone from bad to worse. Traumatized by her experiences, she has given up all hope of rescue or safety.Meanwhile, scared by the government's unusual interest in her pregnancy, Astrid (with her two protectors, Dean and Jake in tow) joins Niko on his desperate quest to be reunited with his lost love Josie.Author Emmy Laybourne reaches new heights of tension and romance in this action-packed conclusion to the Monument 14 trilogy.", "year_published": 2014, "genres": ["Young Adult", "Science Fiction (Dystopia)", "Apocalyptic (Post Apocalyptic)", "Science Fiction", "Adventure (Survival)", "Science Fiction (Apocalyptic)", "Fiction", "Romance", "Adventure", "Young Adult (Teen)"], "url": "https://www.goodreads.com/book/show/18215086-savage-drift"}, + {"book_title": "Light of the Jedi", "author_name": "Charles Soule", "rating_score": 4.2, "rating_votes": 3127, "review_number": 597, "book_description": "Two hundred years before the events of Star Wars: The Phantom Menace, in the era of the glorious High Republic, the noble and wise Jedi Knights must face a frightening threat to themselves, the galaxy, and the Force itself....From Book Sleeve:It is a golden age. Intrepid hyperspace scouts expand the reach of the Republic to the farthest stars, worlds flourish under the benevolent leadership of the Senate, and peace reigns, enforced by the wisdom and strength of the renowned order of Force users known as the Jedi. With the Jedi at the height of their power, the free citizens of the galaxy are confident in their ability to weather any storm. But even the brightest light can cast a shadow, and some storms defy any preparation.When a shocking catastrophe in hyperspace tears a ship to pieces, the flurry of shrapnel emerging from the disaster threatens an entire system. No sooner does the call for help go out than the Jedi race to the scene. The scope of the emergence, however, is enough to push even Jedi to their limit. As the sky breaks open and destruction rains down upon the peaceful alliance they helped to build, the Jedi must trust in the Force to see them through a day in which a single mistake could cost billions of lives.Even as the Jedi battle valiantly against calamity, something truly deadly grows beyond the boundary of the Republic. The hyperspace disaster is far more sinister than the Jedi could ever suspect. A threat hides in the darkness, far from the light of the age, and harbors a secret that could strike fear into even a Jedi's heart.", "year_published": 2021, "genres": ["Media Tie In (Star Wars)", "Science Fiction", "Fiction", "Fantasy", "Audiobook", "Space (Space Opera)", "Novels", "Space", "Canon", "Action"], "url": "https://www.goodreads.com/book/show/51637946-light-of-the-jedi"}, + {"book_title": "Mega Robo Bros", "author_name": "Neill Cameron", "rating_score": 3.85, "rating_votes": 140, "review_number": 37, "book_description": "London! The Future! Alex and Freddy are just like any other brothers. They squabble. They drive their parents crazy. There's only one difference...they're the most powerful robots on earth! But Alex and Freddy will soon discover that they're not the only super-powered robots around. An evil robotic mind is making sinister plans. And their lives won't stay peaceful for much longer...", "year_published": 2016, "genres": ["Sequential Art (Graphic Novels)", "Science Fiction", "Childrens (Middle Grade)", "Sequential Art (Comics)", "Adventure", "Childrens", "Fiction", "Humor", "Science Fiction (Robots)", "Family"], "url": "https://www.goodreads.com/book/show/30181785-mega-robo-bros"}, + {"book_title": "Transmetropolitan, Vol. 1: Back on the Street", "author_name": "Warren Ellis", "rating_score": 4.21, "rating_votes": 41612, "review_number": 1125, "book_description": "After years of self-imposed exile from a civilization rife with degradation and indecency, cynical journalist Spider Jerusalem is forced to return to a job that he hates and a city that he loathes. Working as an investigative reporter for the newspaper The Word, Spider attacks the injustices of his surreal 23rd Century surroundings. Combining black humor, life-threatening situations, and moral ambiguity, this book is the first look into the mind of an outlaw journalist and the world he seeks to destroy.", "year_published": 1998, "genres": ["Sequential Art (Comics)", "Sequential Art (Graphic Novels)", "Science Fiction", "Fiction", "Science Fiction (Cyberpunk)", "Graphic Novels Comics", "Comics (Comic Book)", "Science Fiction (Dystopia)", "Sequential Art (Comix)", "Humor"], "url": "https://www.goodreads.com/book/show/22416.Transmetropolitan_Vol_1"}, + {"book_title": "The Queen of Traitors", "author_name": "Laura Thalassa", "rating_score": 4.09, "rating_votes": 9247, "review_number": 614, "book_description": "They say I am a queen.They say I married a monster. That I did so willingly.They say he cannot die, that he does not age. That together, we rule a ravaged world.They say I used to be one of them, but now I am a traitor. They say many things, all which I cannot remember, but \u2026 I fear what they say is true.Then there is what they don\u2019t say, what I see in their eyes\u2014The king terrifies them. He cannot be stopped. And, most worrisome of all \u2026He is coming for me.", "year_published": 2016, "genres": ["New Adult", "Romance", "Science Fiction (Dystopia)", "Fantasy", "Apocalyptic (Post Apocalyptic)", "Science Fiction", "Fiction", "Dark", "War", "Fantasy (Paranormal)"], "url": "https://www.goodreads.com/book/show/23867347-the-queen-of-traitors"}, + {"book_title": "The Island of Doctor Moreau", "author_name": "H.G. Wells", "rating_score": 3.73, "rating_votes": 99517, "review_number": 4423, "book_description": "Ranked among the classic novels of the English language and the inspiration for several unforgettable movies, this early work of H. G. Wells was greeted in 1896 by howls of protest from reviewers, who found it horrifying and blasphemous. They wanted to know more about the wondrous possibilities of science shown in his first book, The Time Machine, not its potential for misuse and terror. In The Island of Dr. Moreau, a shipwrecked gentleman named Edward Prendick, stranded on a Pacific island lorded over by the notorious Dr. Moreau, confronts dark secrets, strange creatures, and a reason to run for his life.While this riveting tale was intended to be a commentary on evolution, divine creation, and the tension between human nature and culture, modern readers familiar with genetic engineering will marvel at Wells\u2019s prediction of the ethical issues raised by producing \u201csmarter\u201d human beings or bringing back extinct species. These levels of interpretation add a richness to Prendick\u2019s adventures on Dr. Moreau\u2019s island of lost souls without distracting from what is still a rip-roaring good read.", "year_published": 1896, "genres": ["Classics", "Science Fiction", "Fiction", "Horror", "Fantasy", "Literature", "Science Fiction Fantasy", "Literature (19th Century)", "Novels", "Adventure"], "url": "https://www.goodreads.com/book/show/29981.The_Island_of_Doctor_Moreau"}, + {"book_title": "Human Nature", "author_name": "Jonathan Green", "rating_score": 3.6, "rating_votes": 108, "review_number": 6, "book_description": "In the closing years of the 20th century the British Empire's rule is still going strong. Queen Victoria is about to celebrate her 160th birthday, kept alive by advanced steam technology. London is a fantastical sprawling metropolis where dirigibles roam the skies, robot bobbies enforce the law and dinosaurs are on display in London zoo. Welcome to Magna Britannia, a steam driven world full of fantastical creations and shady villains. Here dashing dandies and mustachioed villains battle for supremacy while below the city strange things stir in the flooded tunnels of the old London Underground. The Whitby Mermaid has been stolen from Cruickshank's Cabinet of Curiosities and consulting detective Gabriel Wraith is on the case. And he's not the only one, for wherever there is a mystery to be solved, Ulysses Quicksilver is never far away. What does the theft of the mermaid have to do with the mysterious House of Monkeys? And what of the enigmatic criminal known as the Magpie? Ulysses embarks upon an adventure that takes him to Whitby where the mermaid was supposedly caught. But there are worse things awaiting him there than mermaids. The moors of Ghestdale are haunted by the savage Barghest beast, while in the abandoned mines beneath the Umbridge estate, abominations lurk in the darkness. And Ulysses Quicksilver is about to discover that the worst horrors are those spawned by Man's own selfish nature.", "year_published": 2009, "genres": ["Science Fiction (Steampunk)", "Science Fiction", "Fiction", "Adventure", "Science Fiction (Alternate History)", "Fantasy", "Mystery"], "url": "https://www.goodreads.com/book/show/6072232-human-nature"}, + {"book_title": "Legion", "author_name": "Dan Abnett", "rating_score": 4.11, "rating_votes": 7023, "review_number": 299, "book_description": "A Great War is coming, and it will engulf the Imperium of Man. The Space Marines of the Alpha Legion, the last and most secretive of all the Astartes brotherhoods, arrive on a heathen world to support the Imperial Army in a pacification campaign against strange and uncanny forces. But what drives the Alpha Legion? Can they be trusted, and what side will they choose when the Great War begins? Loyalties are put to the test, and the cunning schemes of an alien intelligence revealed in this latest instalment of the ground-breaking SF series by Dan Abnett, as the fate of mankind hangs in the balance.", "year_published": 2008, "genres": ["Science Fiction", "40k", "Fiction", "Fantasy", "War", "Audiobook", "Science Fiction Fantasy", "Science Fiction (Military Science Fiction)", "Space (Space Opera)", "Novels"], "url": "https://www.goodreads.com/book/show/1678745.Legion"}, + {"book_title": "Wolfsbane", "author_name": "Guy Haley", "rating_score": 4.21, "rating_votes": 628, "review_number": 44, "book_description": "The time has come. Leman Russ, primarch of the Space Wolves, withdraws his Legion from Terra and makes all haste for Horus\u2019s position, to try and end the traitor once and for all.The time has come for Leman Russ, primarch of the Space Wolves, to fulfil his vow and attempt to stop Warmaster Horus before he breaks through to the Segmentum Solar.\u00a0\u00a0In the face of opposition from three of his brother primarchs, Russ withdraws the Space Wolves legion from Terra and makes all haste for Horus\u2019s position. Reports from Malcador the Sigillite\u2019s agents suggest that Horus is utterly changed, and infused with a diabolical power so great that no man can stand against him. A warrior of Fenris would never willingly abandon his oaths, but with Horus beyond the touch of mortal blades, the Lord of Winter and War may have doomed himself for the sake of honour\u2026", "year_published": 2018, "genres": ["Science Fiction", "40k", "Fiction", "Audiobook", "Action", "Fantasy", "Science Fiction (Military Science Fiction)"], "url": "https://www.goodreads.com/book/show/38747895-wolfsbane"}, + {"book_title": "Lamb: The Gospel According to Biff, Christ's Childhood Pal", "author_name": "Christopher Moore", "rating_score": 4.23, "rating_votes": 155779, "review_number": 11979, "book_description": "The birth of Jesus has been well chronicled, as have his glorious teachings, acts, and divine sacrifice after his thirtieth birthday. But no one knows about the early life of the Son of God, the missing years\u2014except Biff, the Messiah's best bud, who has been resurrected to tell the story in the divinely hilarious yet heartfelt work \"reminiscent of Vonnegut and Douglas Adams\" (Philadelphia Inquirer).Verily, the story Biff has to tell is a miraculous one, filled with remarkable journeys, magic, healings, kung fu, corpse reanimations, demons, and hot babes. Even the considerable wiles and devotion of the Savior's pal may not be enough to divert Joshua from his tragic destiny. But there's no one who loves Josh more\u2014except maybe \"Maggie,\" Mary of Magdala\u2014and Biff isn't about to let his extraordinary pal suffer and ascend without a fight.", "year_published": 2002, "genres": ["Fiction", "Humor", "Historical (Historical Fiction)", "Fantasy", "Religion", "Humor (Comedy)", "Historical", "Audiobook", "Adult", "Novels"], "url": "https://www.goodreads.com/book/show/28881.Lamb"}, + {"book_title": "Our Pet", "author_name": "S.M. Matthews", "rating_score": 3.82, "rating_votes": 719, "review_number": 78, "book_description": "Abducted by aliens? Yes. Mistakenly bought as a family pet? Yep. Actually, unexpectedly starting to enjoy the ride? I just might be. I've been abducted, sold, and then sold again, all before I even knew what was going on. Luckily for me, I might just have been bought by the three nicest....guys....in the universe. Maisy has to adapt to her new life traveling through space, lucky for her she has a set of three alien brothers to help her along...big, furry and very affectionate, these brothers only come as a set. *Contains themes of a very adult nature, meant for mature readers only. Warning - alien biology.* ", "year_published": 2019, "genres": ["Polyamorous (Reverse Harem)", "Science Fiction (Aliens)", "Romance", "Science Fiction", "Romance (Science Fiction Romance)", "Erotica (Menage)", "Romance (Paranormal Romance)", "Adult Fiction (Erotica)"], "url": "https://www.goodreads.com/book/show/51127899-our-pet"}, + {"book_title": "The Aylesford Skull", "author_name": "James P. Blaylock", "rating_score": 3.59, "rating_votes": 366, "review_number": 75, "book_description": "It is the summer of 1883 and Professor Langdon St. Ives - brilliant but eccentric scientist and explorer - is at home in Aylesford with his family. However, a few miles to the north a steam launch has been taken by pirates above Egypt Bay; the crew murdered and pitched overboard. In Aylesford itself a grave is opened and possibly robbed of the skull. The suspected grave robber, the infamous Dr. Ignacio Narbondo, is an old nemesis of Langdon St. Ives.\u00a0When Dr. Narbondo returns to kidnap his four-year-old son Eddie and then vanishes into the night, St. Ives and his factotum Hasbro race to London in pursuit...", "year_published": 2013, "genres": ["Science Fiction (Steampunk)", "Fantasy", "Science Fiction", "Fiction", "Fantasy (Magic)", "Historical (Historical Fiction)", "Mystery", "Fantasy (Supernatural)", "Aircraft (Airships)", "Science Fiction (Alternate History)"], "url": "https://www.goodreads.com/book/show/13531598-the-aylesford-skull"}, + {"book_title": "Ghosts of War", "author_name": "George Mann", "rating_score": 3.58, "rating_votes": 391, "review_number": 50, "book_description": "Ghosts of War picks up the story a month after the end of Ghosts of Manhattan. New York City is being plagued by a pack of ferocious brass raptors \u2013 strange, skeleton-like creations with bat-like wings that swoop out of the sky, attacking people and carrying them away into the night. The Ghost has been tracking these bizarre machines, and is close to finding their origin: a deranged military scientist who is slowly rebuilding himself as a machine. However, this scientist is not working alone, and his scheme involves more than a handful of abductions. He is part of a plot to escalate the cold war with Britain into a full-blown conflict, and he is building a weapon \u2013 a weapon that will fracture dimensional space and allow the monstrous creatures that live on the other side to spill through. He and his co-conspirators \u2013 a cabal of senators and businessmen who seek to benefit from the war \u2013 intend to harness these creatures and use them as a means to crush the British. But the Ghost knows only too well how dangerous these creatures can be, and the threat they represent not just to Britain, but the world. The Ghost\u2019s efforts to put an end to the conspiracy bring him into an uneasy alliance with a male British spy, who is loose in Manhattan, protecting the interests of his country. He also has the unlikely assistance of Ginny, a drunken ex-lover and sharpshooter, who walks back into his life, having disappeared six years earlier in mysterious circumstances. Suffering from increasingly lucid flashbacks to WWI and subjected to rooftop chases, a battle with a mechanized madman, and the constant threat of airborne predators, and with the fate of the world hanging in the balance, can the Ghost derail the conspiracy and prevent the war with the British from escalating beyond control?", "year_published": 2011, "genres": ["Science Fiction (Steampunk)", "Fantasy", "Science Fiction", "Mystery", "Mystery (Noir)", "Science Fiction (Alternate History)", "Thriller", "Science Fiction Fantasy", "Mystery (Detective)", "Horror"], "url": "https://www.goodreads.com/book/show/9727683-ghosts-of-war"}, + {"book_title": "The Book of Time", "author_name": "Guillaume Pr\u00e9vost", "rating_score": 3.75, "rating_votes": 2127, "review_number": 237, "book_description": "Scholastic introduces the next name in YA adventure: Guillaume Prevost, whose Book of Time trilogy is one part DA VINCI CODE, one part DRACULA, one part THE TIME-TRAVELER'S WIFE, and all thrilling.A statue; a coin; an old book. They look as dusty as everything else in the Faulkner Antiquarian Bookstore, where 14-year-old Sam Faulkner seeks his father, who's been missing for days. But when Sam slips the coin into the statue, he's swept back in time -- to Scotland in 800 A.D. -- where he must find both the statue and another coin in order to return to the present. It's the first step in an adventure that will take him to ancient Egypt, World War I, even Dracula's castle -- and a mystery that will end only when Sam saves his father, or loses him in time . . .", "year_published": 2006, "genres": ["Fantasy", "Science Fiction (Time Travel)", "Young Adult", "Fiction", "Adventure", "Science Fiction", "Childrens", "Historical (Historical Fiction)", "Mystery", "Childrens (Middle Grade)"], "url": "https://www.goodreads.com/book/show/1383797.The_Book_of_Time"}, + {"book_title": "Because It Is My Blood", "author_name": "Gabrielle Zevin", "rating_score": 3.91, "rating_votes": 4611, "review_number": 524, "book_description": "\u201cEvery time I think I\u2019m out, they pull me back in.\u201d- Michael Corleone, The GodfatherFreed from jail, Anya hopes that things will get back to normal. But life on the outside is even more dangerous than life behind bars. Some of her gangland family want revenge for the crime for which she has done time: the shooting of her uncle. Forced to flee the country, Anya hides out in a cacao plantation in Mexico. There she learns the secrets of the chocolate trade, a trade that is illegal and deadly in her native New York. There too she discovers that seemingly random acts of violence carried out across the world have a single target: her family. As innocent bystanders get caught in the crossfire Anya must act fast and decisively to stop it, no matter what the danger to herself.", "year_published": 2012, "genres": ["Young Adult", "Science Fiction (Dystopia)", "Romance", "Science Fiction", "Fiction", "Mystery (Crime)", "Young Adult (Teen)", "Mystery", "Audiobook", "Fantasy"], "url": "https://www.goodreads.com/book/show/13163011-because-it-is-my-blood"}, + {"book_title": "The Annals of the Heechee", "author_name": "Frederik Pohl", "rating_score": 3.75, "rating_votes": 3928, "review_number": 111, "book_description": "Advanced Heechee technology had enabled Robinette Broadhead to live after death as a machine-stored personality, enjoying his life by flitting along the wires from party to party with a host of other machine-people. But suddenly his decadent existence ends when an all powerful alien race intent on the utter destruction of all intelligent life reappears after eons of silence, and threatens the lives of all heechee and humans. Even Robin, virtually immortal and with unlimited access to millennia of accumulated data, cannot discover how to stop these aliens. It began to seem that only a face to face meeting could determine the future of the entire universe....", "year_published": 1987, "genres": ["Science Fiction", "Fiction", "Space (Space Opera)", "Science Fiction Fantasy", "Science Fiction (Aliens)", "Speculative Fiction", "Audiobook", "Science Fiction (Hard Science Fiction)", "Novels", "Space"], "url": "https://www.goodreads.com/book/show/77463.The_Annals_of_the_Heechee"} +] \ No newline at end of file diff --git a/supporting-blog-content/using-ragas-with-elasticsearch/ragas-elasticsearch-demo.ipynb b/supporting-blog-content/using-ragas-with-elasticsearch/ragas-elasticsearch-demo.ipynb new file mode 100644 index 00000000..f1a70411 --- /dev/null +++ b/supporting-blog-content/using-ragas-with-elasticsearch/ragas-elasticsearch-demo.ipynb @@ -0,0 +1,801 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a5bfb7d8", + "metadata": {}, + "source": [ + "# Evaluating your Elasticsearch LLM applications with Ragas\n", + "\n", + "## Installing the required packages\n", + "\n", + "You will first want to install the packages required for this application. These include the following:\n", + "\n", + "- The [Elasticsearch Python client ](https://www.elastic.co/docs/reference/elasticsearch/clients/python)will be helpful for authenticating and connecting to Elasticsearch and for vector search. \n", + "\n", + "- [Ragas](https://docs.ragas.io/en/stable/) is used for evaluating the quality of the LLM applications using standard metrics\n", + "\n", + "- The Hugging Face library [datasets](https://huggingface.co/docs/datasets/en/index) is used to create a robust evaluation dataset \n", + "\n", + "- [Langchain-OpenAI](https://python.langchain.com/api_reference/openai/llms/langchain_openai.llms.base.OpenAI.html), which is used both for generating answers to user questions and for  evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b21e3c09-f269-4c42-adc2-35f429d05ed1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.1.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "!pip install -q elasticsearch ragas datasets langchain-openai" + ] + }, + { + "cell_type": "markdown", + "id": "b3ccc0c0", + "metadata": {}, + "source": [ + "## Import modules\n", + "\n", + "After installing the required packages, import modules for handling environment variables, parsing JSON data, securely entering sensitive values, connecting to Elasticsearch, evaluating RAG applications and their metrics, creating evaluation datasets, and enabling chat model capabilities." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4fd48912-9200-4fe9-8162-b0aefe697055", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "from getpass import getpass\n", + "from elasticsearch import Elasticsearch\n", + "from ragas import evaluate\n", + "from ragas.metrics import faithfulness, context_recall, context_precision\n", + "from datasets import Dataset\n", + "from langchain_openai import ChatOpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "006ad4a6-9cf7-4506-82d2-992000f049fa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Host: ········\n", + "API Key: ········\n" + ] + } + ], + "source": [ + "# Create an Elasticsearch client by securely prompting for the host URL and API key\n", + "\n", + "es = Elasticsearch(\n", + " getpass(\"Host: \"),\n", + " api_key=getpass(\"API Key: \"),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e0cd5f5-5cad-4ad1-ab9d-4c260faeaff1", + "metadata": {}, + "outputs": [], + "source": [ + "# Name of the Elasticsearch index where book data is stored\n", + "\n", + "index_name = \"ragas-books\"" + ] + }, + { + "cell_type": "markdown", + "id": "b4ea039c", + "metadata": {}, + "source": [ + "Your model will need to be deployed first before the following code could run. To learn more about this be sure to check out our [documention on the subject](https://www.elastic.co/docs/explore-analyze/machine-learning/nlp/ml-nlp-deploy-model)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d02ee988-1b08-4c43-b39d-88c1a6bacf72", + "metadata": {}, + "outputs": [], + "source": [ + "# Convert a text query into an embedding vector using e5.\n", + "\n", + "\n", + "def embed_query(text: str):\n", + " res = es.ml.infer_trained_model(\n", + " model_id=\".multilingual-e5-small_linux-x86_64\",\n", + " body={\"docs\": [{\"text_field\": f\"query: {text}\"}]},\n", + " )\n", + "\n", + " vec = res[\"inference_results\"][0][\"predicted_value\"]\n", + "\n", + " return vec" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "507e64cb-40a3-4657-9984-d9ab264014a7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Deleted existing index 'ragas-books'\n", + "Created index 'ragas-books'\n" + ] + } + ], + "source": [ + "# If the Elasticsearch index already exists, delete it to start fresh\n", + "\n", + "if es.indices.exists(index=index_name):\n", + " es.indices.delete(index=index_name)\n", + " print(f\"Deleted existing index '{index_name}'\")\n", + "\n", + "\n", + "# Create a new index with a mapping that defines the fields and their types\n", + "\n", + "es.indices.create(\n", + " index=index_name,\n", + " body={\n", + " \"mappings\": {\n", + " \"properties\": {\n", + " \"book_title\": {\"type\": \"text\"},\n", + " \"author_name\": {\"type\": \"text\"},\n", + " \"book_description\": {\"type\": \"text\"},\n", + " \"rating_score\": {\"type\": \"float\"},\n", + " \"embedding\": {\n", + " \"type\": \"dense_vector\",\n", + " \"dims\": 384,\n", + " \"index\": True,\n", + " \"similarity\": \"cosine\",\n", + " },\n", + " }\n", + " }\n", + " },\n", + ")\n", + "print(f\"Created index '{index_name}'\")" + ] + }, + { + "cell_type": "markdown", + "id": "e300a8e0", + "metadata": {}, + "source": [ + "If you encounter any problems running this in Colab or locally, it might be due to the dataset requiring a separate download. It can be found in the same folder as this example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ace8e9fe-c5b8-45c0-bbf7-f6b845fadd61", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Indexed 1: Lucky 7\n", + "Indexed 2: Salvation Lost\n", + "Indexed 3: Alien Warrior's Mate\n", + "Indexed 4: On the Steel Breeze\n", + "Indexed 5: Salvage Marines\n", + "Indexed 6: Trade Secret\n", + "Indexed 7: There Will Be Time\n", + "Indexed 8: Only in Death\n", + "Indexed 9: His To Claim\n", + "Indexed 10: Savage Drift\n", + "Indexed 11: Light of the Jedi\n", + "Indexed 12: Mega Robo Bros\n", + "Indexed 13: Transmetropolitan, Vol. 1: Back on the Street\n", + "Indexed 14: The Queen of Traitors\n", + "Indexed 15: The Island of Doctor Moreau\n", + "Indexed 16: Human Nature\n", + "Indexed 17: Legion\n", + "Indexed 18: Wolfsbane\n", + "Indexed 19: Lamb: The Gospel According to Biff, Christ's Childhood Pal\n", + "Indexed 20: Our Pet\n", + "Indexed 21: The Aylesford Skull\n", + "Indexed 22: Ghosts of War\n", + "Indexed 23: The Book of Time\n", + "Indexed 24: Because It Is My Blood\n", + "Indexed 25: The Annals of the Heechee\n" + ] + } + ], + "source": [ + "# Load the book data from a local JSON file into a Python list of dictionaries\n", + "\n", + "with open(\"books.json\") as f:\n", + " books = json.load(f)\n", + "\n", + "# Loop through each book record and index it into Elasticsearch\n", + "\n", + "for i, book in enumerate(books, 1):\n", + " try:\n", + " book[\"embedding\"] = embed_query(book[\"book_description\"])\n", + " es.index(index=index_name, document=book)\n", + " print(f\"Indexed {i}: {book['book_title']}\")\n", + " except Exception as e:\n", + " print(f\"Failed to index '{book.get('book_title', 'Unknown')}': {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b431670a-86d2-4226-a4bf-8b6aeaa54cab", + "metadata": {}, + "outputs": [], + "source": [ + "# Runs a kNN search in Elasticsearch to find the most relevant books.\n", + "\n", + "\n", + "def vector_search(query, top_k=3):\n", + " query_vector = embed_query(query)\n", + "\n", + " body = {\n", + " \"knn\": {\n", + " \"field\": \"embedding\",\n", + " \"k\": top_k,\n", + " \"num_candidates\": 100,\n", + " \"query_vector\": query_vector,\n", + " },\n", + " \"_source\": [\"book_title\", \"author_name\", \"book_description\", \"rating_score\"],\n", + " }\n", + "\n", + " res = es.search(index=index_name, body=body)\n", + " hits = res[\"hits\"][\"hits\"]\n", + " contexts, books_info = [], []\n", + "\n", + " for hit in hits:\n", + " book = hit[\"_source\"]\n", + " context = (\n", + " f\"{book['book_title']} by {book['author_name']}: {book['book_description']}\"\n", + " )\n", + " contexts.append(context)\n", + " books_info.append(book)\n", + "\n", + " return contexts, books_info" + ] + }, + { + "cell_type": "markdown", + "id": "d3caa03c", + "metadata": {}, + "source": [ + "## Implementing the RAG generation component\n", + "\n", + "This section sets up the RAG generation pipeline by configuring the OpenAI API key, creating a chat model, defining functions to generate answers from retrieved context, analyze question intent, score books against user preferences, and produce ground truth responses for evaluation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7368d27e-4347-4ba5-8a1d-08500edb37a9", + "metadata": {}, + "outputs": [], + "source": [ + "# Ensure the OpenAI API key is available in the environment\n", + "\n", + "\n", + "if \"OPENAI_API_KEY\" not in os.environ:\n", + " os.environ[\"OPENAI_API_KEY\"] = getpass(\"OPENAI_API_KEY: \")\n", + "\n", + "\n", + "# Retrieve the API key from the environment\n", + "\n", + "API_KEY = os.environ[\"OPENAI_API_KEY\"]\n", + "\n", + "# Create a ChatOpenAI client for generating answers\n", + "\n", + "chat_llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.1, api_key=API_KEY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6119e008-b231-4f10-aff9-d5e5acc1480a", + "metadata": {}, + "outputs": [], + "source": [ + "# Uses the retrieved contexts to generate a book recommendation via the LLM\n", + "\n", + "\n", + "def generate_answer(question, contexts):\n", + " context_text = \"\\n\\n\".join(contexts)\n", + "\n", + " prompt = f\"\"\"You are a helpful assistant that recommends books.\n", + "Use only the information from the context below to answer the question.\n", + "Do not include any books, authors, or details that are not explicitly present in the context.\n", + "\n", + "Repeat the exact book title and author from the context in your answer.\n", + "\n", + "Context:\n", + "{context_text}\n", + "\n", + "Question:\n", + "{question}\n", + "\n", + "Answer:\"\"\"\n", + " response = chat_llm.invoke(prompt)\n", + "\n", + " return response.content.strip()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9312c85-dafb-476b-809c-82f5042a1ed6", + "metadata": {}, + "outputs": [], + "source": [ + "# Detects genres, quality preferences, and author focus from the user’s question\n", + "\n", + "\n", + "def analyze_question_intent(question):\n", + " question_lower = question.lower()\n", + "\n", + " intent_patterns = {\n", + " \"genre_specific\": {\n", + " \"science fiction\": [\n", + " \"science fiction\",\n", + " \"sci-fi\",\n", + " \"space\",\n", + " \"future\",\n", + " \"alien\",\n", + " \"technology\",\n", + " ],\n", + " \"fantasy\": [\"fantasy\", \"magic\", \"dragon\", \"wizard\", \"medieval\", \"kingdom\"],\n", + " \"mystery\": [\n", + " \"mystery\",\n", + " \"detective\",\n", + " \"crime\",\n", + " \"murder\",\n", + " \"investigation\",\n", + " \"thriller\",\n", + " ],\n", + " \"romance\": [\"romance\", \"love\", \"relationship\", \"romantic\"],\n", + " \"horror\": [\"horror\", \"scary\", \"ghost\", \"supernatural\", \"fear\"],\n", + " \"historical\": [\"historical\", \"history\", \"war\", \"period\", \"ancient\"],\n", + " \"biography\": [\"biography\", \"memoir\", \"life story\", \"autobiography\"],\n", + " \"non-fiction\": [\n", + " \"non-fiction\",\n", + " \"nonfiction\",\n", + " \"factual\",\n", + " \"real\",\n", + " \"educational\",\n", + " ],\n", + " },\n", + " \"quality_indicators\": {\n", + " \"high_rating\": [\n", + " \"high rating\",\n", + " \"highly rated\",\n", + " \"best rated\",\n", + " \"top rated\",\n", + " \"excellent\",\n", + " ],\n", + " \"popular\": [\"popular\", \"bestseller\", \"well-known\", \"famous\", \"acclaimed\"],\n", + " \"award_winning\": [\"award\", \"prize\", \"winner\", \"acclaimed\", \"celebrated\"],\n", + " \"classic\": [\"classic\", \"timeless\", \"masterpiece\", \"legendary\"],\n", + " \"recent\": [\"recent\", \"new\", \"latest\", \"modern\", \"contemporary\"],\n", + " },\n", + " \"author_focus\": [\"author\", \"writer\", \"by\", \"written by\"],\n", + " }\n", + "\n", + " detected_genres = []\n", + " for genre, keywords in intent_patterns[\"genre_specific\"].items():\n", + " if any(keyword in question_lower for keyword in keywords):\n", + " detected_genres.append(genre)\n", + "\n", + " quality_preferences = []\n", + " for quality_type, keywords in intent_patterns[\"quality_indicators\"].items():\n", + " if any(keyword in question_lower for keyword in keywords):\n", + " quality_preferences.append(quality_type)\n", + "\n", + " author_focused = any(\n", + " keyword in question_lower for keyword in intent_patterns[\"author_focus\"]\n", + " )\n", + "\n", + " return {\n", + " \"genres\": detected_genres,\n", + " \"quality_preferences\": quality_preferences,\n", + " \"author_focused\": author_focused,\n", + " \"question_lower\": question_lower,\n", + " \"genre_keywords\": intent_patterns[\"genre_specific\"],\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08baf9af-1b73-4897-8113-e7ff3b7e7594", + "metadata": {}, + "outputs": [], + "source": [ + "# Scores books based on intent matching, rating, and content relevance.\n", + "\n", + "\n", + "def calculate_book_score(book, intent_data):\n", + " score = 0\n", + " reasons = []\n", + "\n", + " rating = float(book.get(\"rating_score\", 0))\n", + " score += rating * 10\n", + "\n", + " book_title = book.get(\"book_title\", \"\").lower()\n", + " book_desc = book.get(\"book_description\", \"\").lower()\n", + " author_name = book.get(\"author_name\", \"\")\n", + "\n", + " for genre in intent_data[\"genres\"]:\n", + " genre_keywords = intent_data[\"genre_keywords\"][genre]\n", + " if any(\n", + " keyword in book_desc or keyword in book_title for keyword in genre_keywords\n", + " ):\n", + " score += 30\n", + " reasons.append(f\"matches {genre} genre\")\n", + " break\n", + "\n", + " if \"high_rating\" in intent_data[\"quality_preferences\"] and rating >= 4.0:\n", + " score += 20\n", + " reasons.append(\"high rating\")\n", + "\n", + " if \"popular\" in intent_data[\"quality_preferences\"] and len(book_desc) > 200:\n", + " score += 15\n", + " reasons.append(\"comprehensive description suggests popularity\")\n", + "\n", + " if intent_data[\"author_focused\"] and author_name:\n", + " score += 10\n", + " reasons.append(\"has clear author attribution\")\n", + "\n", + " stop_words = [\n", + " \"a\",\n", + " \"an\",\n", + " \"the\",\n", + " \"is\",\n", + " \"are\",\n", + " \"what\",\n", + " \"can\",\n", + " \"you\",\n", + " \"me\",\n", + " \"i\",\n", + " \"book\",\n", + " \"books\",\n", + " ]\n", + " question_words = [\n", + " word for word in intent_data[\"question_lower\"].split() if word not in stop_words\n", + " ]\n", + "\n", + " desc_matches = sum(1 for word in question_words if word in book_desc)\n", + " if desc_matches > 0:\n", + " score += desc_matches * 5\n", + " reasons.append(f\"description matches {desc_matches} key terms\")\n", + "\n", + " return {\"book\": book, \"score\": score, \"reasons\": reasons, \"rating\": rating}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "438e24fb-5f54-4b82-905b-833186283392", + "metadata": {}, + "outputs": [], + "source": [ + "# Creates a textual recommendation from the top-scoring books.\n", + "\n", + "\n", + "def generate_ground_truth_response(top_books, intent_data):\n", + " if not top_books:\n", + " return \"No relevant books found.\"\n", + "\n", + " top_book = top_books[0]\n", + " book = top_book[\"book\"]\n", + " title = book[\"book_title\"]\n", + " author = book[\"author_name\"]\n", + " rating = top_book[\"rating\"]\n", + "\n", + " detected_genres = intent_data[\"genres\"]\n", + " quality_preferences = intent_data[\"quality_preferences\"]\n", + " author_focused = intent_data[\"author_focused\"]\n", + "\n", + " if detected_genres:\n", + " genre = detected_genres[0]\n", + " if \"high_rating\" in quality_preferences or rating >= 4.0:\n", + " response = f\"For a highly-rated {genre} book, I recommend '{title}' by {author} (rating: {rating:.1f}).\"\n", + " else:\n", + " response = f\"A good {genre} book from the available options is '{title}' by {author}.\"\n", + "\n", + " elif \"high_rating\" in quality_preferences:\n", + " response = f\"Among the highest-rated books available, '{title}' by {author} stands out with a {rating:.1f} rating.\"\n", + "\n", + " elif \"popular\" in quality_preferences:\n", + " response = f\"'{title}' by {author} appears to be a popular choice based on the comprehensive information available.\"\n", + "\n", + " elif author_focused:\n", + " response = f\"I recommend '{title}' by the author {author}.\"\n", + "\n", + " else:\n", + " if rating >= 4.0:\n", + " response = f\"I recommend '{title}' by {author}, which has a strong rating of {rating:.1f}.\"\n", + " else:\n", + " response = f\"Based on the available books, '{title}' by {author} would be a good choice.\"\n", + "\n", + " if len(top_books) > 1 and top_books[1][\"score\"] > top_books[0][\"score\"] * 0.8:\n", + " second_book = top_books[1][\"book\"]\n", + " response += f\" You might also consider '{second_book['book_title']}' by {second_book['author_name']}.\"\n", + "\n", + " return response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a5f736d-3896-4ec9-86fe-a6afcd7a47b4", + "metadata": {}, + "outputs": [], + "source": [ + "# Orchestrates scoring and response generation for ground truth creation.\n", + "\n", + "\n", + "def create_ground_truth(question, books_info):\n", + " if not books_info:\n", + " return \"No relevant books found.\"\n", + "\n", + " intent_data = analyze_question_intent(question)\n", + "\n", + " book_scores = []\n", + " for book in books_info:\n", + " scored_book = calculate_book_score(book, intent_data)\n", + " book_scores.append(scored_book)\n", + "\n", + " book_scores.sort(key=lambda x: x[\"score\"], reverse=True)\n", + "\n", + " return generate_ground_truth_response(book_scores, intent_data)" + ] + }, + { + "cell_type": "markdown", + "id": "beeafb3f", + "metadata": {}, + "source": [ + "## Running the demo\n", + "\n", + "This section runs the RAG demo by asking sample questions, retrieving context, generating answers, creating ground truths, building an evaluation dataset, running Ragas metrics, and printing and saving the evaluation results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "114a1635-6f8d-4597-a0e5-827b5e21e0f6", + "metadata": {}, + "outputs": [], + "source": [ + "# Executes the full demo, collects results, and runs a Ragas evaluation.\n", + "\n", + "\n", + "def run_ragas_demo():\n", + " print(\"🚀 Demo:\\n\")\n", + "\n", + " demo_questions = [\n", + " \"What's a good science fiction book with high ratings?\",\n", + " \"Can you suggest a fantasy book by a popular author?\",\n", + " \"What's a highly rated mystery novel?\",\n", + " \"Recommend a book with good reviews\",\n", + " ]\n", + "\n", + " questions, contexts_list, answers, ground_truths = [], [], [], []\n", + "\n", + " for i, question in enumerate(demo_questions, 1):\n", + " print(f\"\\n📚 Question {i}: {question}\")\n", + "\n", + " try:\n", + " contexts, books_info = vector_search(question, top_k=3)\n", + " if not contexts:\n", + " print(f\"No contexts found for question {i}\")\n", + " continue\n", + "\n", + " answer = generate_answer(question, contexts)\n", + " print(f\"Answer: {answer[:100]}...\")\n", + " ground_truth = create_ground_truth(question, books_info)\n", + " print(f\"Ground Truth: {ground_truth}\")\n", + " questions.append(question)\n", + " contexts_list.append(contexts)\n", + " answers.append(answer)\n", + " ground_truths.append(ground_truth)\n", + "\n", + " except Exception as e:\n", + " print(f\"Error processing question {i}: {e}\")\n", + " continue\n", + "\n", + " if not questions:\n", + " print(\"\\nNo valid Q&A pairs generated.\")\n", + " return None\n", + "\n", + " eval_dataset = Dataset.from_dict(\n", + " {\n", + " \"question\": questions,\n", + " \"contexts\": contexts_list,\n", + " \"answer\": answers,\n", + " \"ground_truth\": ground_truths,\n", + " }\n", + " )\n", + "\n", + " print(\"\\n✨ Running Ragas evaluation...\")\n", + " try:\n", + " result = evaluate(\n", + " dataset=eval_dataset,\n", + " metrics=[context_precision, faithfulness, context_recall],\n", + " llm=chat_llm,\n", + " embeddings=None,\n", + " )\n", + "\n", + " df = result.to_pandas()\n", + "\n", + " print(\"\\n✨ Ragas Evaluation Results:\")\n", + " print(df)\n", + "\n", + " print(\"\\✨ Averages:\")\n", + "\n", + " for metric, value in df.mean(numeric_only=True).items():\n", + " print(f\"{metric}: {value:.3f}\")\n", + "\n", + " df.to_csv(\"ragas_evaluation.csv\", index=False)\n", + " print(\"\\nResults saved to 'ragas_evaluation.csv'\")\n", + " return result\n", + "\n", + " except Exception as e:\n", + " print(f\"Ragas evaluation failed: {e}\")\n", + " return None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db4685c5-7cc0-4e2a-875f-bc233d6d4591", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🚀 Demo:\n", + "\n", + "\n", + "📚 Question 1: What's a good science fiction book with high ratings?\n", + "Answer: \"Light of the Jedi\" by Charles Soule...\n", + "Ground Truth: For a highly-rated science fiction book, I recommend 'Light of the Jedi' by Charles Soule (rating: 4.2). You might also consider 'Legion' by Dan Abnett.\n", + "\n", + "📚 Question 2: Can you suggest a fantasy book by a popular author?\n", + "Answer: Based on the context provided, I recommend \"The Book of Time\" by Guillaume Prévost....\n", + "Ground Truth: A good fantasy book from the available options is 'Our Pet' by S.M. Matthews. You might also consider 'The Book of Time' by Guillaume Prévost.\n", + "\n", + "📚 Question 3: What's a highly rated mystery novel?\n", + "Answer: Human Nature by Jonathan Green...\n", + "Ground Truth: For a highly-rated mystery book, I recommend 'Human Nature' by Jonathan Green (rating: 3.6). You might also consider 'Lamb: The Gospel According to Biff, Christ's Childhood Pal' by Christopher Moore.\n", + "\n", + "📚 Question 4: Recommend a book with good reviews\n", + "Answer: The Island of Doctor Moreau by H.G. Wells...\n", + "Ground Truth: Based on the available books, 'The Island of Doctor Moreau' by H.G. Wells would be a good choice. You might also consider 'Savage Drift' by Emmy Laybourne.\n", + "\n", + "✨ Running Ragas evaluation...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "80d98ec6b77248a3b1762429b3b2a0c8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Evaluating: 0%| | 0/12 [00:00