In [None]:
import pandas as pd

In [None]:
data_df = pd.read_csv("../data/original_data.csv")

In [None]:
data_df.duplicated().sum()

0

In [None]:
data_df = data_df[data_df['Description'].notna()].reset_index(drop=True)

In [None]:
# Split Category strings into lists (split on comma, strip whitespace)
def parse_categories(val):
    if pd.isna(val):
        return []
    if isinstance(val, str):
        parts = [p.strip() for p in val.split(',') if p.strip()]
        return parts
    if isinstance(val, list):
        return val
    return [str(val)]

data_df['Category'] = data_df['Category'].apply(parse_categories)

In [None]:
data_df

Unnamed: 0,Title,Authors,Description,Category,Publisher,Price Starting With ($),Publish Date (Month),Publish Date (Year)
0,Journey Through Heartsongs,"By Stepanek, Mattie J. T.",Collects poems written by the eleven-year-old ...,"[Poetry, General]",VSP Books,19.96,September,2001
1,In Search of Melancholy Baby,"By Aksyonov, Vassily, Heim, Michael Henry, and...",The Russian author offers an affectionate chro...,"[Biography & Autobiography, General]",Random House,4.99,June,1987
2,The Dieter's Guide to Weight Loss During Sex,"By Smith, Richard","A humor classic, this tongue-in-cheek diet pla...","[Health & Fitness, Diet & Nutrition, Diets]",Workman Publishing Company,4.99,January,1978
3,Germs : Biological Weapons and America's Secre...,"By Miller, Judith, Engelberg, Stephen, and Bro...","Deadly germs sprayed in shopping malls, bomb-l...","[Technology & Engineering, Military Science]",Simon & Schuster,4.99,October,2001
4,The Good Book: Reading the Bible with Mind and...,"By Gomes, Peter J.","""The Bible and the social and moral consequenc...","[Religion, Biblical Biography, General]",Harper Perennial,5.29,May,1998
...,...,...,...,...,...,...,...,...
70199,Like A Sister: A Novel,"By Daugharty, Janice",Sister cannot say exactly when or where she wa...,"[Fiction, Literary]",Harper,5.37,November,1999
70200,Creating Web Pages Simplified (3-D Visual Series),"By Maran, Ruth, Whitehead, Paul, and Marangrap...","An ""owner's manual"" for first-time Web page cr...","[Computers, Internet, General]",Hungry Minds Inc,5.95,January,1997
70201,EVA: The Real Key to Creating Wealth,"By Ehrbar, Al","Called ""today's hottest financial idea and get...","[Business & Economics, Corporate Finance, Gene...",Wiley,29.96,October,1998
70202,The Essentials of Spanish (REA's Language Seri...,"By Mouat, Ricardo Gutierrez",REA’s Essentials provide quick and easy access...,"[Foreign Language Study, Spanish]",Research & Education Association,5.29,January,1998


In [None]:
data_df['Authors'] = data_df['Authors'].apply(lambda x: x.replace("By ", "") if pd.notna(x) else x)

In [None]:
# Filter out rows with invalid author formats (all uppercase, no commas)
def is_valid_author_format(val):
    if pd.isna(val):
        return True  # Keep NaN values for now

    val_str = str(val).strip()

    # Check if string is all uppercase (likely invalid format)
    if val_str.isupper():
        return False

    # Must contain at least one comma for proper "Lastname, Firstname" format
    if ',' not in val_str:
        return False

    return True

data_df = data_df[data_df['Authors'].apply(is_valid_author_format)].reset_index(drop=True)

In [None]:
import re

def parse_authors(val):
    if pd.isna(val):
        return []

    # First split by " and " to separate multiple authors
    authors = re.split(r'\s+and\s+', str(val))

    # Then split each author segment by comma
    all_authors = []
    for author in authors:
        parts = author.split(',')
        parts = [name.strip() for name in parts if name.strip()]
        all_authors.extend(parts)

    # Filter out parts that are ONLY known titles/roles or contain museum/institution keywords
    # Common academic/editorial roles: EDT, COR, ILT, TRN, FRW, etc.
    filtered_authors = []
    known_titles = ['EDT', 'COR', 'ILT', 'TRN', 'FRW', 'PHD', 'DR', 'MD', 'PROF', 'ESQ']
    institution_keywords = ['MUSEUM', 'LIBRARY', 'SOCIETY', 'INSTITUTE', 'FOUNDATION', 'ASSOCIATION', 'UNIVERSITY']

    for author in all_authors:
        stripped = author.strip()
        # Remove parentheses and dots for comparison
        cleaned = re.sub(r'[\(\)\.]', '', stripped).upper().strip()

        # Skip only if it exactly matches a known title
        if cleaned in known_titles:
            continue

        # Skip if it contains institution keywords
        if any(keyword in stripped.upper() for keyword in institution_keywords):
            continue

        # Remove parentheses and their contents from the author name
        cleaned_author = re.sub(r'\s*\([^)]*\)', '', author).strip()
        if cleaned_author:  # Only add if there's something left after removing parentheses
            filtered_authors.append(cleaned_author)

    # Join every two consecutive elements to form full names (Lastname + Firstname)
    full_names = []
    for i in range(0, len(filtered_authors), 2):
        if i + 1 < len(filtered_authors):
            # Join lastname and firstname
            full_names.append(f"{filtered_authors[i]}, {filtered_authors[i+1]}")
        else:
            # Odd number of parts, keep the last one as is
            full_names.append(filtered_authors[i])

    return full_names

In [None]:
data_df['Authors'] = data_df['Authors'].apply(parse_authors)

In [None]:
data_df.to_csv("../data/processed_data.csv", index=False)

Unnamed: 0,Title,Authors,Description,Category,Publisher,Price Starting With ($),Publish Date (Month),Publish Date (Year)
0,Journey Through Heartsongs,"[Stepanek, Mattie J. T.]",Collects poems written by the eleven-year-old ...,"[Poetry, General]",VSP Books,19.96,September,2001
1,In Search of Melancholy Baby,"[Aksyonov, Vassily, Heim, Michael Henry, Bouis...",The Russian author offers an affectionate chro...,"[Biography & Autobiography, General]",Random House,4.99,June,1987
2,The Dieter's Guide to Weight Loss During Sex,"[Smith, Richard]","A humor classic, this tongue-in-cheek diet pla...","[Health & Fitness, Diet & Nutrition, Diets]",Workman Publishing Company,4.99,January,1978
3,Germs : Biological Weapons and America's Secre...,"[Miller, Judith, Engelberg, Stephen, Broad, Wi...","Deadly germs sprayed in shopping malls, bomb-l...","[Technology & Engineering, Military Science]",Simon & Schuster,4.99,October,2001
4,The Good Book: Reading the Bible with Mind and...,"[Gomes, Peter J.]","""The Bible and the social and moral consequenc...","[Religion, Biblical Biography, General]",Harper Perennial,5.29,May,1998
...,...,...,...,...,...,...,...,...
67650,Like A Sister: A Novel,"[Daugharty, Janice]",Sister cannot say exactly when or where she wa...,"[Fiction, Literary]",Harper,5.37,November,1999
67651,Creating Web Pages Simplified (3-D Visual Series),"[Maran, Ruth, Whitehead, Paul, Marangraphics I...","An ""owner's manual"" for first-time Web page cr...","[Computers, Internet, General]",Hungry Minds Inc,5.95,January,1997
67652,EVA: The Real Key to Creating Wealth,"[Ehrbar, Al]","Called ""today's hottest financial idea and get...","[Business & Economics, Corporate Finance, Gene...",Wiley,29.96,October,1998
67653,The Essentials of Spanish (REA's Language Seri...,"[Mouat, Ricardo Gutierrez]",REA’s Essentials provide quick and easy access...,"[Foreign Language Study, Spanish]",Research & Education Association,5.29,January,1998


In [None]:
data_df = pd.read_csv("/content/processed_data.csv")

Checking for incomplete or too short descriptions.

In [None]:
import re
import pandas as pd

def is_bad_description(text):
    # Not a string or blank
    if not isinstance(text, str) or not text.strip():
        return True

    cleaned = text.strip()

    # 1. Exactly "#NAME?" or similar Excel errors
    if cleaned.upper() in ["#NAME?", "#REF!", "#N/A", "#NA", "#NULL!", "#DIV/0!"]:
        return True

    # 2. Contains any hash "#..."
    if "##" in cleaned:
        return True

    # 3. Description is basically punctuation only
    if cleaned in [".", "..", "...", "-", "--", "_"]:
        return True

    # 4. Fewer than 5 words
    if len(cleaned.split()) < 5:
        return True

    return False


In [None]:
bad_desc_mask = data_df['Description'].apply(is_bad_description)
bad_descriptions_df = data_df[bad_desc_mask].copy()
len(bad_descriptions_df)

28

In [None]:
bad_descriptions_df[['Title', 'Authors', 'Description']].head(28)

Unnamed: 0,Title,Authors,Description
7780,Triumph: Getting Back,"['Morra, Marion', 'Potts, Eve']",sychological aspects.
8417,This Planet Is Mine: Teaching Environmental Aw...,"['Metzger, Mary', 'Whittaker, Cinthya P.']",Teaches children environmental awareness
9341,North with Lee and Jackson,"['Kegel, James A.']",Discusses the South's strategy
12142,Annuals: New Color Ideas for Home and Garden (...,"['Freeman, Patricia', 'Rickard, John M.']",#NAME?
18040,"Fences, Walls & Gates (Black & Decker Outdoor ...","['Jerri Ferris, Tim Himsel']",#NAME?
19447,Recipes to Lower Your Fat Thermostat: The Offi...,"['Gaunt, Larene']",Includes healthy recipes
21680,When Love Is Forever (Mini Square Books),"['Exley, Helen', 'Clarke, Juliette']",Inspirational quotations
22046,The Revival Slim and Beautiful Diet: How An In...,"['Tabor, Aaron', 'Tabor, Suzanne']",2
26318,Oh! Christmas Trees,"['Weiland, Barbara']",Oh! Christmas Trees
28827,Sex in History,"['Tannahill, Reay']",Thoroughly fascinating.—New York Post


In [None]:
# Manual fix of missing book descriptions
# 1) Dictionary mapping the specific index to the new description
manual_desc_updates = {

    7780: "A practical guide to coping with illness and medical challenges, this book explores the emotional and psychological obstacles people face as they work toward recovery and resilience.",
    8417: "An educational resource designed to help children understand their role in caring for the planet, offering accessible activities and lessons that build environmental awareness.",
    9341: "A historical study of Confederate military strategy, examining the campaigns led by Robert E. Lee and Stonewall Jackson during the American Civil War.",
    12142: "A visual reference for gardeners featuring new and creative ideas for using annuals. Includes colorful plant suggestions, design tips, and techniques for adding seasonal impact to the home landscape.",
    18040: "A hands-on guide to planning and building fences, walls, and gates for outdoor spaces. Provides step-by-step instructions, construction advice, and design inspiration for homeowners.",
    19447: "A collection of simple and nutritious recipes aimed at reducing dietary fat. The book emphasizes healthy ingredients, practical cooking methods, and everyday meals for improving overall wellness.",
    21680: "A small gift book celebrating the permanence of love, featuring inspirational quotations and reflections intended to uplift and encourage long-term relationships.",
    22046: "An overview of a diet program centered on a specific nutritional bean extract. The authors explain how the plan supports energy, weight management, and improved metabolic health.",
    26318: "A craft and decorative guide filled with ideas for creating handmade Christmas tree ornaments and festive holiday decorations.",
    28827: "A historical survey exploring the role of sexuality in human societies, drawing on research, cultural analysis, and case studies from ancient to modern times.",
    28915: "A humorous collection of quotations and observations about fatherhood, compiled to celebrate and gently tease dads of all ages.",
    33211: "A bright and simple early-learning board book introducing young children to colors through clear images and easy-to-recognize objects.",
    35391: "A curated collection of memorable and motivational quotations about golf, drawing inspiration from players, writers, and enthusiasts of the sport.",
    35943: "A beginner-friendly mystery story for young readers in which a child detective solves a spooky case at a haunted hotel, uncovering clues and learning problem-solving skills along the way.",
    38871: "A small gift book celebrating the bond between grandmothers and grandchildren, filled with warm, loving quotations suitable for sharing and gifting.",
    39126: "A themed collection of quotations reflecting the beauty, spirit, and companionship associated with horses, ideal for equestrians and gift-giving occasions.",
    41060: "A craft book based on the American Girl Samantha series, featuring historical craft projects inspired by turn-of-the-century household traditions and activities.",
    44181: "A suspense novel in the Lucas series involving a private investigator drawn into a dangerous case. The story blends crime, tension, and personal stakes.",
    44610: "A gift-book collection of heartfelt quotations about the unique and lasting bond between mothers and sons.",
    46144: "A professional reference guide summarizing the most commonly used psychotropic medications. Includes dosage information, clinical applications, side effects, and treatment guidance.",
    49614: "A large activity book for children filled with creative projects, puzzles, coloring pages, and hands-on exercises designed to encourage imaginative play.",
    51463: "A quilting guide from a small-town quilt shop, offering patterns, practical techniques, and inspiration for both new and experienced quilters.",
    57814: "An early-learning concept book that introduces young children to the idea of time using simple explanations, everyday examples, and engaging photographs.",
    62154: "A lighthearted collection of computer-themed jokes and cartoons aimed at readers who enjoy technology-related humor.",
    62520: "A concise introduction to the ancient Maya civilization, highlighting its history, cultural achievements, and archaeological discoveries.",
    62582: "A collection of inspirational quotations celebrating the special relationship shared between sisters, intended for reflection or gift-giving.",
    62762: "A tactile board book for babies featuring animals found in the zoo. Designed to encourage sensory exploration through simple text and touch-and-feel elements.",
    64489: "A simple and reassuring picture book for young children about what firefighters do, introducing their equipment, duties, and role in helping the community."

}
# 2) Original dataframe update
for index, new_desc in manual_desc_updates.items():
    if index in data_df.index:
        data_df.loc[index, 'Description'] = new_desc

Language detection

In [None]:
!pip install langdetect pandas tqdm



In [None]:
from langdetect import detect, detect_langs, LangDetectException
from tqdm.auto import tqdm

tqdm.pandas()

In [None]:
def detect_language(text):
    """
    Returns (lang, prob) for a given text or (None, None) if detection fails.
    """
    if not isinstance(text, str) or not text.strip():
        return None, None

    # Very short texts tend to give random results
    if len(text) < 20:
        return None, None

    try:
        # detect_langs returns a list
        langs = detect_langs(text)
        top = langs[0]
        lang = top.lang
        prob = top.prob
        return lang, prob
    except LangDetectException:
        return None, None


In [None]:
data_lang = data_df.copy()

In [None]:
# Detection of language
data_lang[['lang', 'lang_conf']] = data_lang['Description'].progress_apply(
    lambda x: pd.Series(detect_language(x))
)

  0%|          | 0/67655 [00:00<?, ?it/s]

In [None]:
# Counts per language
print(data_lang['lang'].value_counts(dropna=False))

lang
en    67621
es       17
fr        6
de        3
it        3
pt        2
ca        1
af        1
et        1
Name: count, dtype: int64


In [None]:
# Non english texts detected
non_en = data_lang[(data_lang['lang'] != 'en') & (data_lang['lang_conf'] > 0.0)]
non_en[['Title', 'Authors', 'Description', 'lang', 'lang_conf']].head(5)

Unnamed: 0,Title,Authors,Description,lang,lang_conf
152,The Biblical Road to Blessing,"['Hinn, Benny']",El inalterable amor de Dios y su deseo de bend...,es,0.999994
4462,Eminence,"['Kienzle, William X.']",Father Koesler and Lieutenant Alonzo,de,0.999994
4654,Dropping Your Guard: The Value of Open Relatio...,"['Swindoll, Charles R.']",Una alternativa refrescante a las heridas y te...,es,0.999996
5624,Complete Spy,"['McGarvey, Robert', 'McGarvey, Caitlin E.']","Describes listening devices, surveillance came...",fr,0.571427
8769,Curas de la cocina latina,"['Prevention Magazine Health Books, Delgado', ...",Curas de lo Cocina Latina: Desde el Aguacate ...,es,0.999998


In [None]:
manual_desc_updates_lang = {
    152: ( "A devotional book that explains the unchanging love of God and His desire "
        "to bless His children. Hinn explores how obedience, faith, and biblical "
        "giving open the way to experiencing God’s promises in everyday life."
    ),
    4462: ( "In this eleventh Father Koesler mystery, a small new monastery in Detroit "
        "suddenly attracts crowds and large donations after a supposed miracle gives "
        "sight to a blind woman. Asked to look into the situation, Father Koesler "
        "must determine whether the community’s charismatic leader is a genuine "
        "man of God or at the center of a dangerous deception."
    ),
    4654: ( "Swindoll challenges readers to drop their emotional defenses and remove the "
        "masks they wear in relationships. Through biblical insight and practical "
        "examples, he shows how authenticity, honesty, and vulnerability can heal "
        "old hurts and lead to deeper, more meaningful connections with others."
    ),
    5624: ( "An insider’s guide to modern espionage gear, explaining how listening "
        "devices, surveillance cameras, alarm systems, explosives detectors, safes, "
        "protective clothing, weapons, disguises, and other equipment are used. "
        "The authors also point readers to sources for specialized spy-tech tools."
    ),
    8769: ("A health guide built around traditional Latin American foods and remedies. "
        "Drawing on nutrition research and medical experts, it highlights everyday "
        "ingredients that may help prevent or ease common conditions such as diabetes, "
        "high cholesterol, high blood pressure, and excess weight, while offering "
        "practical advice and recipes for healthier eating."
    ),
    9147: ("A men’s devotional about the strength that comes from shared faith. Wagner "
        "explores five foundational truths that help men build deep friendships and "
        "supportive communities rooted in a common commitment to Jesus Christ."
    ),
    11068: ( "Using the Old Testament story of Nehemiah, Swindoll draws out principles of "
        "effective leadership at work, in ministry, and at home. He shows how wise "
        "leaders handle motivation, discouragement, opposition, and adversity with "
        "integrity while keeping their vision focused on God’s purposes."
    ),
    11183: (
        "A short, practical guide that offers simple ideas for talking with children "
        "about God. Temple suggests conversational, everyday ways to introduce faith, "
        "answer questions, and make discussion about spiritual things feel natural "
        "and open in the home."
    ),

    11435: ( "Journalist María Antonieta Collins reflects on her own turbulent encounters "
        "with ex-husbands, ex-wives, and ex-partners, especially when her husband’s "
        "former spouse suddenly reappears and disrupts their marriage. Blending "
        "memoir, humor, and advice, she offers guidance on how to set boundaries, "
        "protect one’s heart, and make wise decisions when past relationships refuse "
        "to stay in the past."
    ),
    13384: ( "A diver’s guide to Hawaii’s underwater world, describing colorful coral "
        "reefs, dramatic lava tunnels and pinnacles, and the many unique species "
        "found at popular dive sites throughout the islands."
    ),
    13937: ( "In this memoir, news anchor María Elena Salinas recounts her rise from a "
        "working-class Los Angeles neighborhood to becoming one of the most recognized "
        "Spanish-language journalists in the United States. Along the way she confronts "
        "family secrets—especially the revelation that her beloved father had once been "
        "a Catholic priest—and reflects on identity, faith, career, and the cost of truth."
    ),
    16562: ("A near-future science-fiction novel told through e-mail exchanges between a "
        "grad student and an experimental artificial intelligence called EDGAR. As the "
        "AI grows more self-aware, their relationship draws the attention of government "
        "agencies and raises unsettling questions about consciousness, control, and the "
        "moral limits of machine intelligence."
    ),
    25816: ("A compact Precious Moments gift Bible featuring angel-themed artwork by Sam "
        "Butcher. It includes the full Reina-Valera 1960 text, full-color illustrations, "
        "presentation pages, and space to record personal reflections, making it a "
        "keepsake for those who love these classic characters."
    ),
    26115: ("A Spanish-language edition of the classic picture book Goodnight Moon. In a "
        "quiet green room, a little bunny says goodnight to everything around him— "
        "from the moon and the stars to mittens and kittens—creating a soothing, "
        "rhythmic bedtime story for young children and adults to share."
    ),
    39813: ( "A detailed narrative history of the Battle of Gettysburg, guiding readers "
        "step by step through the three days of fighting. Stackpole combines maps, "
        "photographs, and eyewitness accounts to explain the movements of the armies, "
        "the decisions of their commanders, and the experiences of soldiers on both sides."
    ),
    40850: ("The second level of a standards-based English course for adult beginners. "
        "Stand Out 2 integrates language development with real-life projects, listening "
        "and pronunciation practice, grammar in context, and supplemental exercises, "
        "helping learners communicate at work, at home, and in the community."
    ),
    42424: ( "Drawing on his experience with men’s ministry, Patrick Morley explains how "
        "men think, what they struggle with, and what they often wish they could say "
        "to their wives. The book helps women better understand the spiritual, "
        "emotional, and practical needs of their husbands so they can strengthen "
        "their marriages."
    ),

    42581: ("A devotional based on the brief Old Testament prayer of Jabez in 1 Chronicles 4:10. "
        "Wilkinson explores how asking God to bless, enlarge one’s territory, and keep "
        "harm away can reshape a believer’s expectations and open doors to a life of "
        "greater fruitfulness and service."
    ),

    45093: ("A novelist’s wife disappears without explanation, leaving him obsessed with "
        "finding her and with the idea that she has become his ‘Zahir’—a consuming "
        "fixation. His search leads from the comfortable world of Paris to distant "
        "Kazakhstan, where he begins to question success, freedom, marriage, and the "
        "true nature of love."
    ),
    46809: "Alexander wakes up to a piece of gum stuck in his hair and soon discovers that everything about this day will be terrible. From losing his best friend to unpleasant surprises at school and dinner, Alexander’s string of misfortunes makes for a humorous and relatable story about handling bad days.",
    50761: "A standards-based English course designed for adult beginners. The book integrates language development with real-life projects, listening activities, pronunciation practice, and supplemental exercises, offering a practical approach to communication skills at home, work, and in daily routines.",
    50909: "A behind-the-scenes look at Roxanne Pulitzer’s highly publicized marriage and divorce, exploring the glamorous and turbulent world of Palm Beach society and the personal struggles that shaped her story.",
    54301: "An instructional guide to bowling that covers essential skills, techniques, and strategies. Includes explanations of rules, equipment, approach mechanics, and tips for improving performance.",
    57769: "A faith-centered guide encouraging readers to discover God’s purpose in their lives and participate more fully in spiritual revival. The book emphasizes personal vision, ministry involvement, and renewed commitment.",
    60608: "A creative reference for artists and hobbyists featuring decorative painting methods, color combinations, and project ideas. Includes inspiration from various artists and practical exercises.",
    61617: "A retelling of a Dominican legend about the ciguapas—mysterious sea-dwelling beings who visit land only at night. The story follows Guapa, a curious ciguapa whose love for sweet foods nearly exposes her tribe’s secret: their backward-facing feet that protect them from humans. A magical tale blending folklore, adventure, and cultural heritage.",
    65352: "A moving novel about Don Júbilo, a man gifted with the ability to understand the hidden emotions behind people’s words. Drawing from the life of Laura Esquivel’s father, the story explores communication, love, marriage, and the healing power of listening as past wounds threaten to break a family apart.",
    65897: "A devotional for women offering weekly reflections on faith, encouragement, and spiritual confidence. Through fifty-two short chapters, readers are reminded of God’s love, protection, and guidance through life’s challenges. Includes a full Spanish-language text in this edition."

}


In [None]:
# Manual translation/correction to english.
# 2) Original dataframe update
for index, correction in manual_desc_updates_lang.items():
    if index in data_df.index:
        data_df.loc[index, 'Description'] = correction

In [None]:
# Low confidence en texts
suspicious_en = data_lang[(data_lang['lang'] == 'en') & (data_lang['lang_conf'] < 0.8)]
suspicious_en[['Title', 'Description', 'lang_conf']].head(10)
#no change needed

Unnamed: 0,Title,Description,lang_conf
1683,The Classic American Quilt Collection: Baskets...,Provides detailed instructions for making a va...,0.714282
7299,Interior Lighting (Ortho Books),"Offers design suggestions, describes a variety...",0.714285
13307,The Insider's Guide to Buying a New or Used Car,"Covers car loans, trade-ins, dealer costs, tes...",0.714285
13558,The Modern Dance: Seven Statements of Belief,"Seven Statements of Belief: Jose Limon, Anna S...",0.571428
19764,Savory to Sweet: Pies & Tarts (The Creative Cook),"Shares recipes for barquettes, tartlets, quich...",0.714284
19924,The Clerihews of Paul Horgan,"Gathers brief, humorous poems about Marie Anto...",0.571427
27697,The Baby's Lap Book,An anthology of familiar nursery rhymes,0.571427
29778,The Metropolitan Opera Book of Mozart Operas,"Seven librettos include ""Don Giovanni,"" ""Cosi ...",0.571427
29790,Chasing Rickshaws,Looks at rickshaws and the people who operate ...,0.714284
32553,Pooh Goes Visiting (A Winnie-The-Pooh Story Book),"When Winnie-the-Pooh visits his friend Rabbit,...",0.714283


In [None]:
data_df.to_csv("/content/processed_data_2.csv", index=False)