In [159]:
import json
import pandas as pd
import os
import sqlite3
from itertools import islice
from sklearn.metrics.pairwise import cosine_similarity

from enum import Enum
from typing import List
from pydantic import BaseModel, Field

#!pip install openai -U
from openai import OpenAI
client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
model = "gpt-4o-2024-08-06"

In [15]:
# Get human emotions    
 
# Define the Pydantic model for the API response
class EmotionsResponse(BaseModel):
    Characteristics: List[str] = Field(None, description="List of non-redundant human emotions.")

def get_emotions(model: str) -> List[str]:
    """Gets a list of 50 unique and non-redundant human emotions using the specified gpt model."""
    
    # Define system and user prompts
    system_prompt = "Find 50 different, exclusive and unique human emotions. "\
    "For example, pick joy or happiness, pick Shame or Embarrassment, pick Envy or Jealousy, "\
    "pick Hate or disgust or hatered or Resentment. "\

    user_prompt = "Select 50 different and unique human emotions."

    try:
        #Call the API to get the completion
        completion = client.beta.chat.completions.parse(
            model= model,
            messages=[
                {"role": "system", "content": "Be a helpful assistant."},
                {"role": "system", "content": system_prompt},
                {"role": "system", "content": "make sure to include either joy or happiness, not both."},
                {"role": "system", "content": "make sure to include either Shame or Embarrassment, not both"},
                {"role": "system", "content": "make sure to include either Envy or Jealousy, not both"},
                {"role": "system", "content": "make sure to include either Hate or disgust or hatered or Resentment"},
                {"role": "system", "content": "Check again to remove redundant emotions. I only want unique emotions."},
                {"role": "user", "content": user_prompt}
            ],
            response_format=EmotionsResponse
        )

        #output returns in the defined pydantic style
        output = completion.choices[0].message.parsed
        return output.json()
    
    except Exception as e:
        # Handle exceptions such as API errors, etc
        print(f"An error occurred: {e}")
        return json.dumps({})

# Example usage
emotions = get_emotions(model= model)

In [16]:
#Get 100 best selling American clothing brands 

# Define the Pydantic model for the API response
class BrandsResponse(BaseModel):
    Brands: List[str] = Field(None, description="Brands as a list of strings.")

def get_brands(model: str) -> List[str]:
    """Get 100 best selling American clothing brands using the specified gpt model."""

    try:
        #Call the API to get the completion
        completion = client.beta.chat.completions.parse(
            model= model,
            messages=[
                {"role": "system", "content": "Be a helpful assistant."},
                {"role": "system", "content": "Find 100 non-redundant best selling American clothing brands."},
                {"role": "system", "content": "DONT MAKE ANY MISTAKES, check if you did any."},
                {"role": "user", "content": "Give me 100 best selling American clothing brands."}
            ],
            response_format=BrandsResponse
        )

        #output returns in the defined pydantic style
        output = completion.choices[0].message.parsed
        return output.json()
    
    except Exception as e:
        # Handle exceptions such as API errors, etc
        print(f"An error occurred: {e}")
        return json.dumps({})

# Example usage
brands = get_brands(model= model)

In [17]:
emotions_ls = list(json.loads(emotions).values())[0]
brands_ls = list(json.loads(brands).values())[0]

In [38]:
# Embedding brand in emotions space: Get association scores between an input and list of emotions

Characteristic = Enum('Characteristic', dict([(emotion, emotion) for emotion in emotions_ls]))

class EmotionalAssociationScore(BaseModel):
    emotion: Characteristic
    score: float

class EmotionalAssociationScores(BaseModel):
    associations: List[EmotionalAssociationScore] = Field(description="A list of emotions and associated scores")

def emotional_association_scores(
        thing, 
        model,
        emotions
    ):
    
    prompt = f"Assign emotional association scores between {0} and {len(emotions)} for the provided thing. "\
    "Assign a score for each of the following emotions. Briefly, explain the reason behind the association score."\
    "Ensure the scores reflect the association strength for the specified thing. "\
    "Thing: "\
    f"{thing}"
            
    completion = client.beta.chat.completions.parse(
        model = model,
        messages=[
            {"role": "system", "content": "Be a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        response_format=EmotionalAssociationScores,
    )
    #output returns in the defined pydantic style
    output = completion.choices[0].message.parsed
    return thing, output.json()

In [None]:
#not using this for the moment
# #Embedding brands in emotions space: 
# # tried nested prompt but decided to go with one prompt and a list comprehension
# emotions= emotions_ls
# associations_brands = [emotional_association_scores(thing, model, emotions) for thing in brands_ls[:3]]


In [None]:
def get_df(thing, model, emotions):
    gpt = emotional_association_scores(thing, model, emotions)
    data = list(json.loads(gpt[1]).values())[0]
    df = pd.DataFrame(data)
    df.rename(columns = {'score': gpt[0]}, inplace=True)
    df.set_index('emotion', inplace=True)
    return df

def get_dfs(things_ls, model, emotions):
    merged_df = pd.DataFrame()
    for thing in things_ls:
        new_df = get_df(thing, model, emotions)
        if merged_df.empty:
            merged_df = new_df
        else:
            merged_df = pd.merge(merged_df, new_df, left_index=True, right_index=True, how='outer')
    return merged_df


things_ls = brands_ls
dfs = get_dfs(things_ls, model, emotions)
# Drop columns with NaN values
dfs_cleaned = dfs.dropna(axis=1)

dfs_cleaned 

In [None]:
# # Set pandas to display all rows and columns
# pd.set_option('display.max_rows', None)  # Show all rows
# pd.set_option('display.max_columns', None)  # Show all columns
# pd.set_option('display.width', None)  # Adjust display width to prevent column cutting
# pd.set_option('display.max_colwidth', None)  # Show full content in columns
# dfs.isna().sum()


In [None]:
# #Confirmed no need to l2 norm vectors for sklearn's cosine similarity:
# # Define your original vectors
# A = np.array([[2, 3]])
# B = np.array([[5, 4]])

# # Calculate cosine similarity without normalization
# cosine_sim_without_norm = cosine_similarity(A, B)

# # L2 normalize the vectors
# A_normalized = A / np.linalg.norm(A)
# B_normalized = B / np.linalg.norm(B)

# # Calculate cosine similarity with normalization
# cosine_sim_with_norm = cosine_similarity(A_normalized, B_normalized)

# # Print the outputs
# print("Cosine Similarity without normalization:")
# print(cosine_sim_without_norm[0][0])  # Output from unnormalized vectors

# print("\nCosine Similarity with normalization:")
# print(cosine_sim_with_norm[0][0])      # Output from normalized vectors
# cosine_sim_without_norm[0][0]==cosine_sim_with_norm[0][0]

In [None]:

def get_similarity(df, dfs):
    similarities = dict()

    # Reshape Series to 2D array (required by cosine_similarity)
    s1 = df.values.reshape(1, -1)

    for col in list(dfs.columns):
        # Reshape
        s2= dfs[col].values.reshape(1, -1)

        cosine_sim = cosine_similarity(s1, s2)
        similarities[col]= cosine_sim[0][0]

    sorted_dict = dict(sorted(similarities.items(), key=lambda item: item[1], reverse = True))

    # Get the top 3 (highest similarity)
    top_3 = list(dict(islice(sorted_dict.items(), 3)).keys())

    return top_3

get_similarity(df, dfs_cleaned)


In [24]:
#The cosine similarity ranges from -1 to 1, where:
#1 indicates identical vectors (i.e., vectors point in the same direction).
#0 indicates orthogonality (i.e., vectors are at a 90-degree angle to each other, no similarity).
#-1 indicates opposite directions (i.e., vectors point in exactly opposite directions).
#represents similarity between feature vectors, quantifying similarity between two vectors based on their direction, 
# irrespective of their magnitude.

#embeddings happen in a much smaller space of emotions as oppossed to ordinary, more common embeddings in a large space as more commonly done with openai api (read)

In [None]:
# a method
#embedding dimension is emotions
#talk about options
#get the brands, go through 50 emotins at a time
#cosine: normalize first: l2 norm = 1
#give instructions on readme on where key goes 
#first have everything in pandas df, then think about database
# one module or package w 1 .py 
#adaptors that take in pydantic datatypes and will make into sql
#argparse


In [None]:
# #to check emotion redundancy by looking at example groups 
#[i for i in list(json.loads(emotions).values())[0] if i in ['Joy', 'Happiness', 'Shame', 'Embarrassment', 'Envy', 'Jealousy' , 'Hate', 'disgust', 'hatered', 'Resentment']]

In [None]:
#test0

#Retrieve emotions from datbase or through openAI API
# if os.path.exists('emotions.json'):
#     with open('emotions.json', 'r') as f:
#         emotions_json = json.load(f)
# else:
#     emotions_json = get_emotions(model, api_key)

#test sqlite
# with sqlite3.connect(os.path.abspath('database.db')) as conn:
#     # Write the DataFrame to the database
#     df.to_sql('mytable', conn, if_exists='replace', index=False)
#     #cursor = conn.cursor()
#     #cursor.execute('SELECT SQLITE_VERSION()')
#     #data = cursor.fetchone()
#     #print('SQLite version:', data)

# query = "SELECT * FROM mytable"
# with sqlite3.connect(os.path.abspath('database.db')) as conn:
#     df_test= pd.read_sql_query(query, conn)


In [224]:
#test0
#test

class EmotionsResponse(BaseModel):
    #None as default if value not provided
    Emotions: List[str] = Field(None, description="List of non-redundant human emotions.") 

def get_emotions(model: str, api_key: str) -> List[str]:
    """Gets a list of 50 unique and non-redundant human emotions using the specified gpt model."""
    client = OpenAI(api_key=api_key)

    system_prompt = "Find 50 different, exclusive and unique human emotions. "\
    "For example, pick joy or happiness, pick Shame or Embarrassment, pick Envy or Jealousy, "\
    "pick Hate or disgust or hatered or Resentment. "\

    user_prompt = "Select 50 different and unique human emotions."

    try:
        completion = client.beta.chat.completions.parse(
            model= model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            response_format=EmotionsResponse
        )

        #output in the defined pydantic style
        output = completion.choices[0].message.parsed
        return output.json()
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return json.dumps({})

def get_emotions_df(model, api_key):
    emotions_json = get_emotions(model, api_key)
    emotions = list(json.loads(emotions_json).values())[0]
    emotions_df = pd.DataFrame(emotions, columns = ['emotion'])
    emotions_df['emotion_id'] = emotions_df.index
    emotions_df = emotions_df[['emotion_id','emotion']]
    return emotions_df

In [264]:

#test1
#Get 100 best selling American clothing brands using the Pydantic model for the API response
class BrandResponse(BaseModel):
    brand: str = Field(description="name as a string.")
    information: str = Field(description="Brand information as a string.")
class BrandsResponse(BaseModel):
    brands: List[BrandResponse] = Field(description="A list of names and information.")

def get_brands(model: str, api_key: str) -> List[str]:
    """Get 5 best selling American clothing brands using the specified gpt model. Provide a brief information about each brand."""
    client = OpenAI(api_key=api_key)
    try:
        #Call the API to get the completion
        completion = client.beta.chat.completions.parse(
            model= model,
            messages=[
                {"role": "system", "content": "Find 5 non-redundant best selling American clothing brands."},
                {"role": "user", "content": "Give me 5 best selling American clothing brands and a brief information about each brand."}
            ],
            response_format=BrandsResponse
        )
        #output in the defined pydantic style
        output = completion.choices[0].message.parsed

        return output.json()
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return json.dumps({})

In [296]:
#test2
# Embedding and getting association scores between an input and list of emotions
def emotional_association_scores(
        thing, 
        model,
        emotions, api_key
    ):

    client = OpenAI(api_key=api_key)

    Characteristic = Enum('Characteristic', dict([(emotion, emotion) for emotion in emotions]))

    class EmotionalAssociationScore(BaseModel):
        emotion: Characteristic
        score: float

    class EmotionalAssociationScores(BaseModel):
        associations: List[EmotionalAssociationScore] = Field(description="A list of emotions and associated scores")
        explanation: str = Field(description="Briefly explaining the reason behind the association scores.")

    prompt = f"Assign emotional association scores between {0} and {len(emotions)} for the provided thing. "\
    "Assign a score for each of the following emotions. Briefly, explain the reason behind the association score."\
    "Ensure the scores reflect the association strength for the specified thing. "\
    "Thing: "\
    f"{thing}"
            
    completion = client.beta.chat.completions.parse(
        model = model,
        messages=[
            {"role": "system", "content": "Be a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        response_format=EmotionalAssociationScores,
    )
    #output in the defined pydantic style
    output = completion.choices[0].message.parsed
    return thing, output.json()

In [397]:
emotional_association_scores(thing, model, emotions, api_key)

('Nike',
 '{"associations": [{"emotion": "Joy", "score": 45.0}, {"emotion": "Sorrow", "score": 5.0}, {"emotion": "Fear", "score": 10.0}, {"emotion": "Disgust", "score": 2.0}, {"emotion": "Surprise", "score": 30.0}, {"emotion": "Trust", "score": 35.0}, {"emotion": "Anticipation", "score": 40.0}, {"emotion": "Anger", "score": 5.0}, {"emotion": "Contentment", "score": 25.0}, {"emotion": "Excitement", "score": 40.0}, {"emotion": "Yearning", "score": 20.0}, {"emotion": "Nostalgia", "score": 15.0}, {"emotion": "Awe", "score": 30.0}, {"emotion": "Hope", "score": 32.0}, {"emotion": "Relief", "score": 20.0}, {"emotion": "Gratitude", "score": 25.0}, {"emotion": "Love", "score": 38.0}, {"emotion": "Hatred", "score": 1.0}, {"emotion": "Resentment", "score": 3.0}, {"emotion": "Guilt", "score": 4.0}, {"emotion": "Shame", "score": 3.0}, {"emotion": "Embarrassment", "score": 2.0}, {"emotion": "Jealousy", "score": 5.0}, {"emotion": "Envy", "score": 10.0}, {"emotion": "Pride", "score": 42.0}, {"emotion"

In [445]:
#test get_df
def get_one(thing, model, emotions, api_key):
    gpt = emotional_association_scores(thing, model, emotions, api_key)
    gpt_j = json.loads(gpt[1])
    df = pd.DataFrame(list(gpt_j.values())[0])
    df.rename(columns = {'score': gpt[0]}, inplace=True)
    df.set_index('emotion', inplace=True)
    name = gpt[0]
    scoreinfo = gpt_j['explanation']
    return {name: scoreinfo}, df

def get_all(things, model, emotions, api_key):
    scoreinfo = []
    merged_df = pd.DataFrame()
    for thing in things:
        out = get_one(thing, model, emotions, api_key)
        scoreinfo.append(out[0])
        new_df = out[1]
        if merged_df.empty:
            merged_df = new_df
        else:
            merged_df = pd.merge(merged_df, new_df, left_index=True, right_index=True, how='outer')
    return scoreinfo, merged_df

#merged_df.reset_index(inplace= True)

def get_brands_scores(model, api_key, emotions):
    brands = pd.DataFrame(list(json.loads(get_brands(model, api_key)).values())[0])
    brands.reset_index(inplace= True)
    brands.rename({'information':'info', 'index':'id'}, axis = 1, inplace = True)
    brands['gpt'] = model

    all = get_all(brands['brand'], model, emotions, api_key)
    scoreinfo= all[0]
    scores_df = all[1]
    scoreinfo_df= pd.DataFrame([(k,v) for data in scoreinfo for k,v in data.items()], columns = ['brand', 'scores_info'])
    brands = pd.merge(brands, scoreinfo_df, how = 'left', on ='brand' )
    brands = brands[['id','brand','info', 'scores_info','gpt']]
    return (brands, scores_df)

In [474]:
out, o = get_brands_scores(model, api_key, emotions[:3])

In [475]:
out

Unnamed: 0,id,brand,info,scores_info,gpt
0,0,Nike,"Founded in 1964, Nike is a global leader in at...",Nike is a popular sportswear brand associated ...,gpt-4o-2024-08-06
1,1,Ralph Lauren,"Established in 1967, Ralph Lauren is renowned ...",Ralph Lauren is primarily associated with luxu...,gpt-4o-2024-08-06
2,2,Levi's,"Founded in 1853, Levi's is recognized for crea...",Levi's is commonly associated with Joy because...,gpt-4o-2024-08-06
3,3,Calvin Klein,"Created in 1968, Calvin Klein is synonymous wi...",Calvin Klein is primarily associated with fash...,gpt-4o-2024-08-06
4,4,Under Armour,"Founded in 1996, Under Armour is a top player ...","Under Armour, being a popular sportswear brand...",gpt-4o-2024-08-06


In [476]:
o

Unnamed: 0_level_0,Nike,Ralph Lauren,Levi's,Calvin Klein,Under Armour
emotion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Joy,3.0,2.5,2.0,3.0,2.5
Sorrow,1.0,0.5,1.0,1.0,1.0
Fear,1.0,1.0,0.5,0.0,0.5


In [479]:
o.index.name

'emotion'

In [477]:
o1 = o.reset_index()

In [484]:
o1

Unnamed: 0,emotion,Nike,Ralph Lauren,Levi's,Calvin Klein,Under Armour
0,Joy,3.0,2.5,2.0,3.0,2.5
1,Sorrow,1.0,0.5,1.0,1.0,1.0
2,Fear,1.0,1.0,0.5,0.0,0.5


In [485]:
pd.melt(o.reset_index(), id_vars='emotion', value_vars =list(o.columns))


Unnamed: 0,emotion,variable,value
0,Joy,Nike,3.0
1,Sorrow,Nike,1.0
2,Fear,Nike,1.0
3,Joy,Ralph Lauren,2.5
4,Sorrow,Ralph Lauren,0.5
5,Fear,Ralph Lauren,1.0
6,Joy,Levi's,2.0
7,Sorrow,Levi's,1.0
8,Fear,Levi's,0.5
9,Joy,Calvin Klein,3.0


In [450]:
# def check_emotions_exists(model, api_key, db_name, mytable):
#     with sqlite3.connect(os.path.abspath(db_name)) as conn:
#         cursor = conn.cursor()
#         cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?", (mytable,))
#         if cursor.fetchone() is not None:
#             print(f'Reading {mytable} from database...')
#             query = f'SELECT * FROM {mytable}' 
#             df = pd.read_sql_query(query, conn)
#         else:
#             print(f"{mytable} doesn't exist so generating one...")
            
#     return df

def check_data_exists(model, api_key, db_name, update_brand_list):
    with sqlite3.connect(os.path.abspath(db_name)) as conn:
        cursor = conn.cursor()
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name in ('emotions', 'brands', 'association_scores')")
        tables = cursor.fetchall()
        if len(tables) == 3:
            query = "SELECT * FROM 'emotions'"
            emotions_df = pd.read_sql_query(query, conn) 

            if update_brand_list == 'no':
                print('Reading from database...')
                query = "SELECT * FROM 'association_scores'" 
                scores_df = pd.read_sql_query(query, conn)
            else:
                print('Generating brands data...')
                brands, scores_df = get_brands_scores(model, api_key, emotions_df['emotion'][:3])
                brands.to_sql('brands', conn, if_exists = 'replace', index=False)
                scores_df.to_sql('association_scores', conn, if_exists = 'replace', index=False)

        else:
            print("Brands data doesn't exist so generating...")
            emotions_df = get_emotions_df(model, api_key)
            emotions_df.to_sql('emotions', conn, if_exists = 'replace', index=False)

            brands, scores_df = get_brands_scores(model, api_key, emotions_df['emotion'][:3])
            brands.to_sql('brands', conn, if_exists = 'replace', index=False)
            scores_df.to_sql('association_scores', conn, if_exists = 'replace', index=False)

    return (emotions_df, scores_df)




# Example usage
model = "gpt-4o-2024-08-06"
api_key=os.environ.get('OPENAI_API_KEY')
db_name = 'database.db'
update_brand_list = 'no'
number = 3
thing ='summer'



SyntaxError: invalid syntax (3010771359.py, line 29)

In [448]:
update_brand_list = 'no'
number = 3
emotions_df, scores_df = check_data_exists(model, api_key, db_name, update_brand_list)
scores_df.head()

Unnamed: 0_level_0,Nike,Levi's,Ralph Lauren,Under Armour,Calvin Klein
emotion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Joy,45.0,40.0,40.0,35.0,40.0
Sadness,10.0,10.0,10.0,10.0,5.0
Anger,12.0,5.0,5.0,5.0,10.0
Fear,15.0,8.0,8.0,15.0,8.0
Surprise,35.0,25.0,15.0,20.0,20.0
Disgust,8.0,5.0,3.0,5.0,5.0
Trust,40.0,42.0,35.0,38.0,30.0
Anticipation,30.0,30.0,30.0,30.0,35.0
Shame,10.0,6.0,5.0,5.0,5.0
Envy,28.0,20.0,25.0,18.0,25.0


In [447]:
emotions_df.head()

Unnamed: 0,emotion_id,emotion
0,0,Joy
1,1,Sadness
2,2,Anger
3,3,Fear
4,4,Surprise
5,5,Disgust
6,6,Trust
7,7,Anticipation
8,8,Shame
9,9,Envy


In [452]:
update_brand_list = 'no'
number = 3
emotions_df, scores_df = check_data_exists(model, api_key, db_name, update_brand_list)
scores_df.head()

Reading from database...


Unnamed: 0,Nike,Levi's,Ralph Lauren,Under Armour,Calvin Klein
0,45.0,40.0,40.0,35.0,40.0
1,10.0,10.0,10.0,10.0,5.0
2,12.0,5.0,5.0,5.0,10.0
3,15.0,8.0,8.0,15.0,8.0
4,35.0,25.0,15.0,20.0,20.0


In [453]:
emotions_df.head()

Unnamed: 0,emotion_id,emotion
0,0,Joy
1,1,Sadness
2,2,Anger
3,3,Fear
4,4,Surprise


In [451]:
with sqlite3.connect(os.path.abspath(db_name)) as conn:
    cursor = conn.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    table_names = [table[0] for table in tables]
    print("Tables in the database:", table_names)

Tables in the database: ['emotions', 'brands', 'association_scores']


In [None]:
df = get_df(thing, model, emotions, api_key)
df_cleaned = df.dropna(axis=1)

dfs = get_dfs(things, model, emotions, api_key)
dfs_cleaned = dfs.dropna(axis=1)


result = get_similarity(df_cleaned, dfs_cleaned, number)
result

In [454]:
with sqlite3.connect(os.path.abspath(db_name)) as conn:
    cursor = conn.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name ='brands';")
    query = "SELECT * FROM 'brands'" 
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,id,brand,info,scores_info,gpt
0,0,Nike,"Founded in 1964, Nike is a multinational corpo...","Nike, as a popular and successful brand, evoke...",gpt-4o-2024-08-06
1,1,Levi's,"Founded in 1853, Levi's is renowned for its de...",Levi's is a well-established brand known for i...,gpt-4o-2024-08-06
2,2,Ralph Lauren,"Established in 1967, Ralph Lauren is a luxury ...","Ralph Lauren, as a renowned fashion brand, is ...",gpt-4o-2024-08-06
3,3,Under Armour,"Founded in 1996, Under Armour is a leading bra...",Under Armour is a well-regarded athletic wear ...,gpt-4o-2024-08-06
4,4,Calvin Klein,"Launched in 1968, Calvin Klein is an iconic fa...","Calvin Klein, being a renowned fashion brand, ...",gpt-4o-2024-08-06


In [None]:
#check the databse tables, drop tables and check again: testing check_emotions_exists and check_brands_exists
# with sqlite3.connect(os.path.abspath(db_name)) as conn:
#     cursor = conn.cursor()
#     cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
#     tables = cursor.fetchall()
#     table_names = [table[0] for table in tables]
#     print("Tables in the database:", table_names)
#     for table in table_names:
#         query = f'SELECT * FROM {table}' 
#         table = pd.read_sql_query(query, conn)
#         print(table.head())
        
# # Drop all tables and check again the above works
# with sqlite3.connect(os.path.abspath(db_name)) as conn:
#     cursor = conn.cursor()
#     cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
#     tables = cursor.fetchall()
#     table_names = [table[0] for table in tables]
#     print("Tables in the database:", table_names)
#     for table in table_names:
#         cursor.execute(f"DROP TABLE IF EXISTS {table};")
#         print(f"Table {table} dropped")
#     conn.commit()

Tables in the database: []


In [None]:

def get_similarity(df, dfs, number):
    dfs.set_index('emotion', inplace=True)

    similarities = dict()
    df_cosine = df[[col for col in df.columns if 'explanation' not in col]]
    # Reshape Series to 2D array (required by cosine_similarity)
    s1 = df_cosine.values.reshape(1, -1)

    for col in list(dfs.columns):
        dfs_cosine = dfs[[col for col in dfs.columns if 'explanation' not in col and 'gpt' not in col]]
        s2= dfs_cosine[col].values.reshape(1, -1)
        cosine_sim = cosine_similarity(s1, s2)
        similarities[col]= cosine_sim[0][0]

    sorted_dict = dict(sorted(similarities.items(), key=lambda item: item[1], reverse = True))

    # Get the top number of recommendations based on similarity
    recommendations = list(dict(islice(sorted_dict.items(), number)).keys())
    return recommendations

In [382]:
emotions_df

Unnamed: 0,emotion_id,emotion
0,0,Joy
1,1,Sorrow
2,2,Fear
3,3,Disgust
4,4,Surprise
5,5,Trust
6,6,Anticipation
7,7,Anger
8,8,Contentment
9,9,Excitement


In [383]:
brands_df

Unnamed: 0,brand_id,brand,brand_info,scores_info,gpt_version
0,0,Nike,Founded in 1964 and headquartered in Beaverton...,"Nike is often associated with joy, pride, and ...",gpt-4o-2024-08-06
1,1,Ralph Lauren,"Established in 1967, Ralph Lauren is synonymou...",Ralph Lauren is primarily associated with luxu...,gpt-4o-2024-08-06
2,2,Levi's,"Founded in 1853 in San Francisco, California, ...","Levi’s, as a brand, evokes varying emotional a...",gpt-4o-2024-08-06
3,3,Tommy Hilfiger,"Started in 1985, Tommy Hilfiger is known for i...","Tommy Hilfiger, as a global brand, is often as...",gpt-4o-2024-08-06
4,4,Under Armour,"Founded in 1996 and based in Baltimore, Maryla...","Under Armour is a popular athletic brand, and ...",gpt-4o-2024-08-06
