In [7]:

from dotenv import load_dotenv
from utils.db_handler import DatabaseHandler
import pandas as pd
import spacy

# creating database handler instance
my_db_handler = DatabaseHandler()
table_name = "optigame_products"

In [8]:
# returning data from the database
df = my_db_handler.retrieve_all_from_table(table_name)
df.head(10)

Unnamed: 0,reviews_count,price,rating,id,description,asin,title,sales_volume
0,3c943db5-ad7e-45da-85a5-2363584335b1,B07MV4NN5Z,"Big Potato The Chameleon, Award-Winning Board ...",15.99,4.7,2K+ bought in past month,,7063
1,0ce92bc9-af3e-4e03-a362-75dfdac41f84,B09NYD15ZH,Skillmatics Rapid Rumble - Fast-Paced Board Ga...,19.97,4.6,1K+ bought in past month,,3147
2,44ae72ff-b3cb-45b2-b38f-a814e6a26d3e,B076HK9H7Z,"Sorry! Kids Board Game, Family Board Games for...",7.49,4.8,10K+ bought in past month,"Slide, collide and score to win the Sorry! gam...",31332
3,99f3cd2b-7e42-4abd-a76f-32ae6855834a,B00D7OAOYI,"Hasbro Gaming Trouble Kids Board Game, Pop-o-M...",8.99,4.7,10K+ bought in past month,Product Description The Trouble game is racing...,37102
4,ceb3ba0f-3bdf-45eb-8d93-637369b6d069,B00000DMF5,Hasbro Gaming Candy Land Kingdom of Sweet Adve...,10.49,4.8,10K+ bought in past month,Product Description If you loved playing the C...,35049
5,4ce2ba59-d635-4021-b5ae-037f4c2fc4fa,B00D8STBHY,"Hasbro Gaming Connect 4 Classic Grid,4 in a Ro...",8.99,4.8,9K+ bought in past month,Product Description Go 4 the win! The Connect ...,78198
6,4030f2ef-db79-41c2-8bd0-5cb202da0c38,B0B5HLZ8T4,"Monopoly Game, Family Board Games for 2 to 6 P...",19.92,4.8,6K+ bought in past month,"Buy, sell, dream, and scheme the way to riches...",3579
7,3edbf10f-2cb6-4f54-ac45-46398d8096fb,B00ABA0ZOA,Jenga Game | The Original Wood Block Game with...,10.4,4.8,#1 Top Rated,Product Description Want a game experience tha...,100759
8,57234401-010f-42a9-b773-e98ef129cd4d,B077Z1R28P,Taco Cat Goat Cheese Pizza,9.84,4.8,10K+ bought in past month,This is the social card game that is going to ...,51819
9,25e4faba-3811-4e1e-87b6-90fdae232236,B07P6MZPK3,Mattel Games UNO Card Game in a Collectible St...,10.99,4.8,10K+ bought in past month,UNO is the classic family card game that's eas...,71007


### Defining Helper Functions

In [9]:
def extract_common_nouns(df: pd.DataFrame, title_column: str = "title") -> pd.DataFrame:
    """
    Extracts common nouns from the title column of a DataFrame.

    Args:
        df (pd.DataFrame): The input DataFrame containing a 'title' column.
        title_column (str): The name of the column containing text data.

    Returns:
        pd.DataFrame: A DataFrame with an additional 'common_nouns' column containing lists of common nouns.
    """
    # Ensure the title column exists
    if title_column not in df.columns:
        raise ValueError(f"Column '{title_column}' not found in DataFrame.")

    # Function to extract common nouns from a single text
    def get_common_nouns(text):
        doc = nlp(text)
        return [token.text for token in doc if token.pos_ == "NOUN"]

    # Apply the function to the title column
    df["common_nouns"] = df[title_column].apply(lambda x: get_common_nouns(str(x)))
    return df


### Running Spacy Model

In [12]:
column_of_interest = "title"

# Load the spaCy English model
nlp = spacy.load("en_core_web_sm")

df_with_nouns = extract_common_nouns(df, title_column=column_of_interest)
df_with_nouns.head()

Unnamed: 0,reviews_count,price,rating,id,description,asin,title,sales_volume,common_nouns
0,3c943db5-ad7e-45da-85a5-2363584335b1,B07MV4NN5Z,"Big Potato The Chameleon, Award-Winning Board ...",15.99,4.7,2K+ bought in past month,,7063,[]
1,0ce92bc9-af3e-4e03-a362-75dfdac41f84,B09NYD15ZH,Skillmatics Rapid Rumble - Fast-Paced Board Ga...,19.97,4.6,1K+ bought in past month,,3147,[]
2,44ae72ff-b3cb-45b2-b38f-a814e6a26d3e,B076HK9H7Z,"Sorry! Kids Board Game, Family Board Games for...",7.49,4.8,10K+ bought in past month,"Slide, collide and score to win the Sorry! gam...",31332,"[Slide, collide, score, game, players, opponen..."
3,99f3cd2b-7e42-4abd-a76f-32ae6855834a,B00D7OAOYI,"Hasbro Gaming Trouble Kids Board Game, Pop-o-M...",8.99,4.7,10K+ bought in past month,Product Description The Trouble game is racing...,37102,"[Product, game, peg, fun, excitement, player, ..."
4,ceb3ba0f-3bdf-45eb-8d93-637369b6d069,B00000DMF5,Hasbro Gaming Candy Land Kingdom of Sweet Adve...,10.49,4.8,10K+ bought in past month,Product Description If you loved playing the C...,35049,"[Product, Description, board, game, child, exp..."
