In [538]:
import json
import pandas as pd
import os
import sqlite3
from itertools import islice
from sklearn.metrics.pairwise import cosine_similarity

from enum import Enum
from typing import List
from pydantic import BaseModel, Field

#!pip install openai -U
from openai import OpenAI
key=os.environ.get('OPENAI_API_KEY')
client = OpenAI(api_key=key)
model = "gpt-4o-2024-08-06"

In [524]:
# Get human emotions    
 
# Define the Pydantic model for the API response
class EmotionsResponse(BaseModel):
    Characteristics: List[str] = Field(None, description="List of non-redundant human emotions.")

def get_emotions(model: str) -> List[str]:
    """Gets a list of 50 unique and non-redundant human emotions using the specified gpt model."""
    
    # Define system and user prompts
    system_prompt = "Find 50 different, exclusive and unique human emotions. "\
    "For example, pick joy or happiness, pick Shame or Embarrassment, pick Envy or Jealousy, "\
    "pick Hate or disgust or hatered or Resentment. "\

    user_prompt = "Select 50 different and unique human emotions."

    try:
        #Call the API to get the completion
        completion = client.beta.chat.completions.parse(
            model= model,
            messages=[
                {"role": "system", "content": "Be a helpful assistant."},
                {"role": "system", "content": system_prompt},
                {"role": "system", "content": "make sure to include either joy or happiness, not both."},
                {"role": "system", "content": "make sure to include either Shame or Embarrassment, not both"},
                {"role": "system", "content": "make sure to include either Envy or Jealousy, not both"},
                {"role": "system", "content": "make sure to include either Hate or disgust or hatered or Resentment"},
                {"role": "system", "content": "Check again to remove redundant emotions. I only want unique emotions."},
                {"role": "user", "content": user_prompt}
            ],
            response_format=EmotionsResponse
        )

        #output returns in the defined pydantic style
        output = completion.choices[0].message.parsed
        return output.json()
    
    except Exception as e:
        # Handle exceptions such as API errors, etc
        print(f"An error occurred: {e}")
        return json.dumps({})

# Example usage
emotions = get_emotions(model= model)

In [526]:
#Get 100 best selling American clothing brands 

# Define the Pydantic model for the API response
class BrandsResponse(BaseModel):
    Brands: List[str] = Field(None, description="Brands as a list of strings.")

def get_brands(model: str) -> List[str]:
    """Get 100 best selling American clothing brands using the specified gpt model."""

    try:
        #Call the API to get the completion
        completion = client.beta.chat.completions.parse(
            model= model,
            messages=[
                {"role": "system", "content": "Be a helpful assistant."},
                {"role": "system", "content": "Find 100 non-redundant best selling American clothing brands."},
                {"role": "system", "content": "DONT MAKE ANY MISTAKES, check if you did any."},
                {"role": "user", "content": "Give me 100 best selling American clothing brands."}
            ],
            response_format=BrandsResponse
        )

        #output returns in the defined pydantic style
        output = completion.choices[0].message.parsed
        return output.json()
    
    except Exception as e:
        # Handle exceptions such as API errors, etc
        print(f"An error occurred: {e}")
        return json.dumps({})

# Example usage
brands = get_brands(model= model)

In [527]:
brands

'{"Brands": ["Levi\'s", "Ralph Lauren", "Nike", "Tommy Hilfiger", "Calvin Klein", "Under Armour", "Hanes", "Columbia", "Carhartt", "Polo Ralph Lauren", "GAP", "Old Navy", "Michael Kors", "Coach", "Vans", "Converse", "North Face", "Patagonia", "Abercrombie & Fitch", "American Eagle Outfitters", "Brooks Brothers", "Kate Spade", "Vera Bradley", "Wrangler", "Dockers", "Hollister", "A\\u00e9ropostale", "L.L. Bean", "J.Crew", "Lacoste", "Express", "Banana Republic", "Forever 21", "Victoria\'s Secret", "Guess", "Lululemon", "New Balance", "Champion", "Fila", "Eddie Bauer", "Urban Outfitters", "Lucky Brand", "Anthropologie", "Free People", "Tory Burch", "Skechers", "Steve Madden", "Cole Haan", "Marc Jacobs", "Stuart Weitzman", "Joe\'s Jeans", "True Religion", "Carter\'s", "Gap Kids", "Justice", "Athleta", "ALDO", "PacSun", "G-Star RAW", "Keds", "Reebok", "Crocs", "Aldo", "Rockport", "American Apparel", "Ann Taylor", "Lane Bryant", "Leggs", "Lee", "Izod", "Pendleton", "Orvis", "Ted Baker", "Jen

In [528]:
emotions_ls = list(json.loads(emotions).values())[0]
brands_ls = list(json.loads(brands).values())[0]

In [564]:
# Embedding brand in emotions space: Get association scores between an input and list of emotions

Characteristic = Enum('Characteristic', dict([(emotion, emotion) for emotion in emotions_ls]))

class EmotionalAssociationScore(BaseModel):
    emotion: Characteristic
    score: float

class EmotionalAssociationScores(BaseModel):
    associations: List[EmotionalAssociationScore] = Field(description="A list of emotions and associated scores")

def emotional_association_scores(
        thing, 
        model,
        emotions
    ):
    
    prompt = f"Assign emotional association scores between {0} and {len(emotions)} for the provided thing. "\
    "Assign a score for each of the following emotions. Briefly, explain the reason behind the association score."\
    "Ensure the scores reflect the association strength for the specified thing. "\
    "Thing: "\
    f"{thing}"
            
    completion = client.beta.chat.completions.parse(
        model = model,
        messages=[
            {"role": "system", "content": "Be a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        response_format=EmotionalAssociationScores,
    )
    #output returns in the defined pydantic style
    output = completion.choices[0].message.parsed
    return thing, output.json()

In [565]:
emotional_association_scores(
        thing, 
        model,
        emotions[:2]
    )

('summer',
 '{"associations": [{"emotion": "Joy", "score": 2.0}, {"emotion": "Shame", "score": 0.0}, {"emotion": "Envy", "score": 0.5}, {"emotion": "Resentment", "score": 0.2}, {"emotion": "Surprise", "score": 0.5}, {"emotion": "Fear", "score": 0.2}, {"emotion": "Sadness", "score": 0.3}, {"emotion": "Anger", "score": 0.1}, {"emotion": "Love", "score": 2.0}, {"emotion": "Hope", "score": 1.5}, {"emotion": "Pride", "score": 0.5}, {"emotion": "Gratitude", "score": 1.0}, {"emotion": "Curiosity", "score": 0.8}, {"emotion": "Anxiety", "score": 0.4}, {"emotion": "Contentment", "score": 1.5}, {"emotion": "Confusion", "score": 0.2}, {"emotion": "Boredom", "score": 0.3}, {"emotion": "Disappointment", "score": 0.2}, {"emotion": "Ecstasy", "score": 1.2}, {"emotion": "Empathy", "score": 0.3}, {"emotion": "Nostalgia", "score": 1.5}, {"emotion": "Loneliness", "score": 0.5}, {"emotion": "Regret", "score": 0.2}, {"emotion": "Guilt", "score": 0.1}, {"emotion": "Admiration", "score": 0.7}, {"emotion": "An

In [None]:
#not using this for the moment
# #Embedding brands in emotions space: 
# # tried nested prompt but decided to go with one prompt and a list comprehension
# emotions= emotions_ls
# associations_brands = [emotional_association_scores(thing, model, emotions) for thing in brands_ls[:3]]


In [None]:
def get_df(thing, model, emotions):
    gpt = emotional_association_scores(thing, model, emotions)
    data = list(json.loads(gpt[1]).values())[0]
    df = pd.DataFrame(data)
    df.rename(columns = {'score': gpt[0]}, inplace=True)
    df.set_index('emotion', inplace=True)
    return df

def get_dfs(things_ls, model, emotions):
    merged_df = pd.DataFrame()
    for thing in things_ls:
        new_df = get_df(thing, model, emotions)
        if merged_df.empty:
            merged_df = new_df
        else:
            merged_df = pd.merge(merged_df, new_df, left_index=True, right_index=True, how='outer')
    return merged_df


things_ls = brands_ls
dfs = get_dfs(things_ls, model, emotions)
# Drop columns with NaN values
dfs_cleaned = dfs.dropna(axis=1)

dfs_cleaned 

In [None]:
# # Set pandas to display all rows and columns
# pd.set_option('display.max_rows', None)  # Show all rows
# pd.set_option('display.max_columns', None)  # Show all columns
# pd.set_option('display.width', None)  # Adjust display width to prevent column cutting
# pd.set_option('display.max_colwidth', None)  # Show full content in columns
# dfs.isna().sum()


In [None]:
# #Confirmed no need to l2 norm vectors for sklearn's cosine similarity:
# # Define your original vectors
# A = np.array([[2, 3]])
# B = np.array([[5, 4]])

# # Calculate cosine similarity without normalization
# cosine_sim_without_norm = cosine_similarity(A, B)

# # L2 normalize the vectors
# A_normalized = A / np.linalg.norm(A)
# B_normalized = B / np.linalg.norm(B)

# # Calculate cosine similarity with normalization
# cosine_sim_with_norm = cosine_similarity(A_normalized, B_normalized)

# # Print the outputs
# print("Cosine Similarity without normalization:")
# print(cosine_sim_without_norm[0][0])  # Output from unnormalized vectors

# print("\nCosine Similarity with normalization:")
# print(cosine_sim_with_norm[0][0])      # Output from normalized vectors
# cosine_sim_without_norm[0][0]==cosine_sim_with_norm[0][0]

In [None]:

def get_similarity(df, dfs):
    similarities = dict()

    # Reshape Series to 2D array (required by cosine_similarity)
    s1 = df.values.reshape(1, -1)

    for col in list(dfs.columns):
        # Reshape
        s2= dfs[col].values.reshape(1, -1)

        cosine_sim = cosine_similarity(s1, s2)
        similarities[col]= cosine_sim[0][0]

    sorted_dict = dict(sorted(similarities.items(), key=lambda item: item[1], reverse = True))

    # Get the top 3 (highest similarity)
    top_3 = list(dict(islice(sorted_dict.items(), 3)).keys())

    return top_3

get_similarity(df, dfs_cleaned)


In [24]:
#The cosine similarity ranges from -1 to 1, where:
#1 indicates identical vectors (i.e., vectors point in the same direction).
#0 indicates orthogonality (i.e., vectors are at a 90-degree angle to each other, no similarity).
#-1 indicates opposite directions (i.e., vectors point in exactly opposite directions).
#represents similarity between feature vectors, quantifying similarity between two vectors based on their direction, 
# irrespective of their magnitude.

#embeddings happen in a much smaller space of emotions as oppossed to ordinary, more common embeddings in a large space as more commonly done with openai api (read)

In [None]:
# a method
#embedding dimension is emotions
#talk about options
#get the brands, go through 50 emotins at a time
#cosine: normalize first: l2 norm = 1
#give instructions on readme on where key goes 
#first have everything in pandas df, then think about database
# one module or package w 1 .py 
#adaptors that take in pydantic datatypes and will make into sql
#argparse


In [None]:
# #to check emotion redundancy by looking at example groups 
#[i for i in list(json.loads(emotions).values())[0] if i in ['Joy', 'Happiness', 'Shame', 'Embarrassment', 'Envy', 'Jealousy' , 'Hate', 'disgust', 'hatered', 'Resentment']]

In [None]:
#test0

#Retrieve emotions from datbase or through openAI API
# if os.path.exists('emotions.json'):
#     with open('emotions.json', 'r') as f:
#         emotions_json = json.load(f)
# else:
#     emotions_json = get_emotions(model, api_key)

#test sqlite
# with sqlite3.connect(os.path.abspath('database.db')) as conn:
#     # Write the DataFrame to the database
#     df.to_sql('mytable', conn, if_exists='replace', index=False)
#     #cursor = conn.cursor()
#     #cursor.execute('SELECT SQLITE_VERSION()')
#     #data = cursor.fetchone()
#     #print('SQLite version:', data)

# query = "SELECT * FROM mytable"
# with sqlite3.connect(os.path.abspath('database.db')) as conn:
#     df_test= pd.read_sql_query(query, conn)


In [612]:
#test0
#test

class EmotionsResponse(BaseModel):
    #None as default if value not provided
    Emotions: List[str] = Field(None, description="List of non-redundant human emotions.") 

def get_emotions(model: str, api_key: str) -> List[str]:
    """Gets a list of 50 unique and non-redundant human emotions using the specified gpt model."""
    client = OpenAI(api_key=api_key)

    system_prompt = "Find 50 different, exclusive and unique human emotions. "\
    "For example, pick joy or happiness, pick Shame or Embarrassment, pick Envy or Jealousy, "\
    "pick Hate or disgust or hatered or Resentment. "\

    user_prompt = "Select 50 different and unique human emotions."

    try:
        completion = client.beta.chat.completions.parse(
            model= model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            response_format=EmotionsResponse
        )

        #output in the defined pydantic style
        output = completion.choices[0].message.parsed
        return output.json()
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return json.dumps({})

def get_emotions_df(model, api_key):
    emotions_json = get_emotions(model, api_key)
    emotions = list(json.loads(emotions_json).values())[0]
    emotions_df = pd.DataFrame(emotions, columns = ['emotion'])
    emotions_df['emotion_id'] = emotions_df.index
    emotions_df = emotions_df[['emotion_id','emotion']]
    return emotions_df

In [613]:
emotions_df = get_emotions_df(model, api_key)

In [663]:

#test1
#Get 100 best selling American clothing brands using the Pydantic model for the API response
class BrandResponse(BaseModel):
    name: str = Field(description="brand name as a string.")
    brand_info: str = Field(description="Brand information as a string.")
class BrandsResponse(BaseModel):
    brands: List[BrandResponse] = Field(description="A list of names and information.")

def get_brands(model: str, api_key: str) -> List[str]:
    """Get 5 best selling American clothing brands using the specified gpt model. Provide a brief information about each brand."""
    client = OpenAI(api_key=api_key)
    try:
        #Call the API to get the completion
        completion = client.beta.chat.completions.parse(
            model= model,
            messages=[
                {"role": "system", "content": "Find 5 non-redundant best selling American clothing brands."},
                {"role": "user", "content": "Give me 5 best selling American clothing brands and a brief information about each brand."}
            ],
            response_format=BrandsResponse
        )
        #output in the defined pydantic style
        output = completion.choices[0].message.parsed

        return output.json()
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return json.dumps({})

In [621]:
out_brands = get_brands(model, api_key)

In [622]:
out_brands

'{"brands": [{"name": "Nike", "brand_info": "Founded in 1964, Nike is a global leader in sportswear and athletic shoes. The brand is widely recognized for its innovative designs and performance-enhancing technologies."}, {"name": "Ralph Lauren", "brand_info": "Established in 1967 by Ralph Lauren, this brand is synonymous with timeless American style, offering a wide range of products from apparel to home furnishings, known for its iconic Polo shirts."}, {"name": "Levi\'s", "brand_info": "Dating back to 1853, Levi\'s is renowned for its durable and stylish denim products. The brand is most famous for creating the quintessential American blue jeans."}, {"name": "Tommy Hilfiger", "brand_info": "Founded in 1985, Tommy Hilfiger is celebrated for its classic American preppy style with a modern twist, offering high-quality garments often characterized by the brand\'s signature red, white, and blue logo."}, {"name": "Calvin Klein", "brand_info": "Since its inception in 1968, Calvin Klein has b

In [628]:
#test2
# Embedding and getting association scores between an input and list of emotions

def emotional_association_scores(
        thing, 
        model,
        emotions, api_key
    ):
 
    Characteristic = Enum('Characteristic', dict([(emotion, emotion) for emotion in emotions]))

    class EmotionalAssociationScore(BaseModel):
        emotion: Characteristic
        score: float

    class EmotionalAssociationScores(BaseModel):
        associations: List[EmotionalAssociationScore] = Field(description="List of dictionaries e.g. [{'emotion':'sadness', 'score': 4.0}]")
        explanation: str = Field(description="String explaining the association scores.")
    
    client = OpenAI(api_key=api_key)

    prompt = f"Assign emotional association scores between {0} and {len(emotions)} for the provided thing. "\
    "Assign a score for each of the given emotions. Briefly explain the reason behind the association scores."\
    "Ensure the scores reflect the association strength for the specified thing. "\
    "Thing: "\
    f"{thing}"
            
    completion = client.beta.chat.completions.parse(
        model = model,
        messages=[
            {"role": "system", "content": "Be a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        response_format=EmotionalAssociationScores,
    )
    #output in the defined pydantic style
    output = completion.choices[0].message.parsed
    return output.json()

In [603]:
thing ='summer'
api_key=key
emotions_df = get_emotions_df(model, api_key)
emotions = list(emotions_df['emotion'].values)
out = emotional_association_scores(thing, model, emotions[:2], api_key)
out

('summer',
 '{"associations": [{"emotion": "Joy", "score": 1.8}, {"emotion": "Sadness", "score": 0.2}], "explanation": "Summer is often associated with joy due to the warm weather, holidays, and outdoor activities such as going to the beach, barbecues, and vacations, which are typically enjoyable experiences, hence a high score for Joy. It receives a lower score for Sadness as people may associate summer with the end of the academic year where friends part ways, or the discomfort of very high temperatures."}')

In [634]:
emotions_df.head()

Unnamed: 0,emotion_id,emotion
0,0,Joy
1,1,Shame
2,2,Envy
3,3,Hate
4,4,Anxiety


In [629]:
gpt = emotional_association_scores(thing, model, emotions[:2], api_key)
gpt

'{"associations": [{"emotion": "Joy", "score": 1.5}, {"emotion": "Sadness", "score": 0.5}], "explanation": "Summer often brings joy to many people due to the warm weather, the possibility of beach vacations, outdoor activities, holidays, and other cheerful events usually associated with this season, so the score for joy is relatively high at 1.5. However, it can also evoke sadness for some because it marks the end of school for certain grades or universities, separation from friends or classmates during the summer break, or because the heat can be unpleasant, which gives it a lower sadness score of 0.5."}'

In [667]:
#test get_df
def get_one(thing, model, emotions_df, api_key):
    emotions = list(emotions_df['emotion'].values)
    gpt = emotional_association_scores(thing, model, emotions, api_key)
    scoresinfo = json.loads(gpt)['explanation']
    
    df = pd.DataFrame(json.loads(gpt)['associations'])
    df.rename(columns = {'score': thing}, inplace=True)
    df = pd.merge(df, emotions_df, on ='emotion', how ='inner')
    df.drop('emotion', axis = 1, inplace=True)
    df= df[['emotion_id',f'{thing}']]
    return ({thing: scoresinfo}, df)

In [668]:
out_one = get_one(thing, model, emotions_df.iloc[:2], api_key)

In [669]:
out_one

({'summer': 'Summer is a season typically associated with positive experiences such as vacations, warm weather, and outdoor activities, which is why it scores high in joy, with a score close to 2. It is usually a time when people feel happy and relaxed. In contrast, summer is not usually associated with shame, as it does not inherently involve aspects that would cause embarrassment or disgrace. Therefore, it receives a low score of 0.2 for shame, given that such associations might only occur in specific and less common contexts.'},
    emotion_id  summer
 0           0     1.8
 1           1     0.2)

In [695]:
#test
def get_all(brands_df, model, emotions_df, api_key):
    scoresinfo = []
    merged_df = pd.DataFrame()
    brands = brands_df['name']
    for brand in brands:
        out = get_one(brand, model, emotions_df, api_key)
        scoresinfo.append(out[0])
        new_df = out[1]
        if merged_df.empty:
            merged_df = new_df
        else:
            merged_df = pd.merge(merged_df, new_df, on='emotion_id', how='inner')
    return (scoresinfo, merged_df)

In [696]:
brands_df = pd.DataFrame(list(json.loads(get_brands(model, api_key)).values())[0])
brands_df.reset_index(inplace= True)
brands_df.rename({'index':'brand_id'}, axis = 1, inplace = True)
brands_df['gpt'] = model
get_all(brands_df.iloc[:3], model, emotions_df.iloc[:2], api_key)

([{'Nike': "Nike, being a major sportswear and athletic company, is often associated with joy due to the excitement and positive feelings associated with sports, fitness, and achievement. This accounts for a higher score of 1.5 in joy. On the other hand, Nike has faced several controversies related to labor practices and allegations of sweatshop conditions, which contribute to a sense of shame for some consumers. However, these controversies might not be as prominent in the daily consumer's perception compared to the positive aspects, resulting in a lower score of 0.5 in shame."},
  {'Ralph Lauren': 'Ralph Lauren often evokes a sense of joy because of its association with high-end fashion, elegance, and a feeling of luxury that can bring pleasure and satisfaction to people. Hence, it scores a 1.5 for joy, reflecting this positive emotional connection without necessarily being extremely strong for everyone. On the other hand, Ralph Lauren generally does not evoke feelings of shame. Whil

In [753]:
#test
def get_brands_scores(model, api_key, emotions_df):
    out = json.loads(get_brands(model, api_key))
    brands_df = pd.DataFrame(list(out.values())[0])
    brands_df.reset_index(inplace= True)
    brands_df.rename({'index':'brand_id'}, axis = 1, inplace = True)
    brands_df['gpt'] = model

    all = get_all(brands_df, model, emotions_df, api_key)
    scoresinfo= all[0]
    scores_df = all[1]
    scores_df = pd.melt(scores_df, id_vars='emotion_id', value_vars =list(scores_df.columns))
    scores_df.rename(columns = {'variable':'name','value':'score'}, inplace=True)
    scores_df = pd.merge(scores_df, brands_df[['brand_id','name']], on ='name', how ='inner')
    scores_df.drop('name', axis= 1, inplace=True)
    scores_df = scores_df[['emotion_id','brand_id','score']]

    scoreinfo_df= pd.DataFrame([(k,v) for data in scoresinfo for k,v in data.items()], columns = ['name', 'scores_info'])
    brands_df = pd.merge(brands_df, scoreinfo_df, how = 'left', on ='name' )
    brands_df = brands_df[['brand_id','name','brand_info', 'scores_info','gpt']]
    return (brands_df, scores_df)

In [754]:
bs = get_brands_scores(model, api_key, emotions_df[:2])

In [796]:
bs[0]

Unnamed: 0,brand_id,name,brand_info,scores_info,gpt
0,0,Nike,"Founded in 1964 and based in Oregon, Nike is a...",Nike is a popular athletic brand associated wi...,gpt-4o-2024-08-06
1,1,Ralph Lauren,"Ralph Lauren, established in 1967, is a symbol...","For 'Joy', the score is 2.5 because Ralph Laur...",gpt-4o-2024-08-06
2,2,Under Armour,"Under Armour, headquartered in Baltimore since...","For the brand Under Armour, the emotion of 'Jo...",gpt-4o-2024-08-06
3,3,Levi's,"Levi's, originating in San Francisco and found...","Levi's is a well-known and respected brand, pa...",gpt-4o-2024-08-06
4,4,Tommy Hilfiger,"Tommy Hilfiger, an influential brand since 198...",Tommy Hilfiger is a well-known fashion brand t...,gpt-4o-2024-08-06


{'Nike': 0,
 'Ralph Lauren': 1,
 "Levi's": 2,
 'Under Armour': 3,
 'Calvin Klein': 4}

In [756]:
bs[1]

Unnamed: 0,emotion_id,brand_id,score
0,0,0,1.8
1,1,0,0.2
2,0,1,1.7
3,1,1,0.3
4,0,2,1.7
5,1,2,0.3
6,0,3,1.8
7,1,3,0.5
8,0,4,1.5
9,1,4,0.5


In [783]:
brand_id_ls = list(bs[1]['brand_id'].unique())
bs[1].loc[bs[1]['brand_id']==0]['score']


0    1.8
1    0.2
Name: score, dtype: float64

In [821]:
#test
def get_similarity(df, dfs, brands_df, number):
    similarities = dict()

    df.sort_values(by='emotion_id',inplace=True)
    df.set_index('emotion_id', inplace=True)
    # Reshape Series to 2D array (required by cosine_similarity)
    s1 = df.values.reshape(1, -1)

    brand_id_ls = list(dfs['brand_id'].unique())
    for brand_id in brand_id_ls:
        df_brand = dfs.loc[dfs['brand_id']==brand_id]
        df_brand = df_brand.sort_values(by='emotion_id')
        df_brand= df_brand.set_index('emotion_id')
        s2= df_brand['score'].values.reshape(1, -1)
        cosine_sim = cosine_similarity(s1, s2)
        similarities[brand_id]= cosine_sim[0][0]
        
    name_id = dict(zip(brands_df['name'], brands_df['brand_id']))
    similarities = {k: similarities[v] for k, v in name_id.items() if v in similarities}
    sorted_s = sorted(similarities.items(), key=lambda item: item[1], reverse = True)
    recommendations = list(dict(sorted_s[:number]).keys())
    return recommendations

In [822]:
number = 2
bs = get_brands_scores(model, api_key, emotions_df[:3])
out_one = get_one(thing, model, emotions_df.iloc[:3], api_key)
get_similarity(out_one[1], bs[1], bs[0], number)

{'Nike': 0, "Levi's": 1, 'Ralph Lauren': 2, 'Under Armour': 3, 'Calvin Klein': 4}
{'Nike': 0.9782838736655737, "Levi's": 0.9968895725584536, 'Ralph Lauren': 0.9782838736655737, 'Under Armour': 0.9670913609804875, 'Calvin Klein': 0.9011533136409089}
[("Levi's", 0.9968895725584536), ('Nike', 0.9782838736655737), ('Ralph Lauren', 0.9782838736655737), ('Under Armour', 0.9670913609804875), ('Calvin Klein', 0.9011533136409089)]


["Levi's", 'Nike']

In [None]:
# def check_emotions_exists(model, api_key, db_name, mytable):
#     with sqlite3.connect(os.path.abspath(db_name)) as conn:
#         cursor = conn.cursor()
#         cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?", (mytable,))
#         if cursor.fetchone() is not None:
#             print(f'Reading {mytable} from database...')
#             query = f'SELECT * FROM {mytable}' 
#             df = pd.read_sql_query(query, conn)
#         else:
#             print(f"{mytable} doesn't exist so generating one...")
            
#     return df

def check_data_exists(model, api_key, db_name, update_brand_list):
    with sqlite3.connect(os.path.abspath(db_name)) as conn:
        cursor = conn.cursor()
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name in ('emotions', 'brands', 'association_scores')")
        tables = cursor.fetchall()
        if len(tables) == 3:
            query = "SELECT * FROM 'emotions'"
            emotions_df = pd.read_sql_query(query, conn) 

            if update_brand_list == 'no':
                print('Reading from database...')
                query = "SELECT * FROM 'association_scores'" 
                scores_df = pd.read_sql_query(query, conn)
            else:
                print('Generating brands data...')
                brands, scores_df = get_brands_scores(model, api_key, emotions_df['emotion'][:3])
                brands.to_sql('brands', conn, if_exists = 'replace', index=False)
                scores_df.to_sql('association_scores', conn, if_exists = 'replace', index=False)

        else:
            print("Brands data doesn't exist so generating...")
            emotions_df = get_emotions_df(model, api_key)
            emotions_df.to_sql('emotions', conn, if_exists = 'replace', index=False)

            brands, scores_df = get_brands_scores(model, api_key, emotions_df['emotion'][:3])
            brands.to_sql('brands', conn, if_exists = 'replace', index=False)
            scores_df.to_sql('association_scores', conn, if_exists = 'replace', index=False)

    return (emotions_df, scores_df)




# Example usage
model = "gpt-4o-2024-08-06"
api_key=os.environ.get('OPENAI_API_KEY')
db_name = 'database.db'
update_brand_list = 'no'
number = 3
thing ='summer'



In [448]:
update_brand_list = 'no'
number = 3
emotions_df, scores_df = check_data_exists(model, api_key, db_name, update_brand_list)
scores_df.head()

Unnamed: 0_level_0,Nike,Levi's,Ralph Lauren,Under Armour,Calvin Klein
emotion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Joy,45.0,40.0,40.0,35.0,40.0
Sadness,10.0,10.0,10.0,10.0,5.0
Anger,12.0,5.0,5.0,5.0,10.0
Fear,15.0,8.0,8.0,15.0,8.0
Surprise,35.0,25.0,15.0,20.0,20.0
Disgust,8.0,5.0,3.0,5.0,5.0
Trust,40.0,42.0,35.0,38.0,30.0
Anticipation,30.0,30.0,30.0,30.0,35.0
Shame,10.0,6.0,5.0,5.0,5.0
Envy,28.0,20.0,25.0,18.0,25.0


In [452]:
update_brand_list = 'no'
number = 3
emotions_df, scores_df = check_data_exists(model, api_key, db_name, update_brand_list)
scores_df.head()

Reading from database...


Unnamed: 0,Nike,Levi's,Ralph Lauren,Under Armour,Calvin Klein
0,45.0,40.0,40.0,35.0,40.0
1,10.0,10.0,10.0,10.0,5.0
2,12.0,5.0,5.0,5.0,10.0
3,15.0,8.0,8.0,15.0,8.0
4,35.0,25.0,15.0,20.0,20.0


In [453]:
emotions_df.head()

Unnamed: 0,emotion_id,emotion
0,0,Joy
1,1,Sadness
2,2,Anger
3,3,Fear
4,4,Surprise


In [486]:
with sqlite3.connect(os.path.abspath(db_name)) as conn:
    cursor = conn.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    table_names = [table[0] for table in tables]
    print("Tables in the database:", table_names)

Tables in the database: ['emotions', 'brands', 'association_scores']


In [None]:
df = get_df(thing, model, emotions, api_key)
df_cleaned = df.dropna(axis=1)

dfs = get_dfs(things, model, emotions, api_key)
dfs_cleaned = dfs.dropna(axis=1)


result = get_similarity(df_cleaned, dfs_cleaned, number)
result

In [504]:
df

Unnamed: 0,id,brand,info,scores_info,gpt
0,0,Nike,"Founded in 1964, Nike is a multinational corpo...","Nike, as a popular and successful brand, evoke...",gpt-4o-2024-08-06
1,1,Levi's,"Founded in 1853, Levi's is renowned for its de...",Levi's is a well-established brand known for i...,gpt-4o-2024-08-06


In [454]:
with sqlite3.connect(os.path.abspath(db_name)) as conn:
    cursor = conn.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name ='brands';")
    query = "SELECT * FROM 'brands'" 
    df = pd.read_sql_query(query, conn)
df

Unnamed: 0,id,brand,info,scores_info,gpt
0,0,Nike,"Founded in 1964, Nike is a multinational corpo...","Nike, as a popular and successful brand, evoke...",gpt-4o-2024-08-06
1,1,Levi's,"Founded in 1853, Levi's is renowned for its de...",Levi's is a well-established brand known for i...,gpt-4o-2024-08-06
2,2,Ralph Lauren,"Established in 1967, Ralph Lauren is a luxury ...","Ralph Lauren, as a renowned fashion brand, is ...",gpt-4o-2024-08-06
3,3,Under Armour,"Founded in 1996, Under Armour is a leading bra...",Under Armour is a well-regarded athletic wear ...,gpt-4o-2024-08-06
4,4,Calvin Klein,"Launched in 1968, Calvin Klein is an iconic fa...","Calvin Klein, being a renowned fashion brand, ...",gpt-4o-2024-08-06


In [None]:
#check the databse tables, drop tables and check again: testing check_emotions_exists and check_brands_exists
# with sqlite3.connect(os.path.abspath(db_name)) as conn:
#     cursor = conn.cursor()
#     cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
#     tables = cursor.fetchall()
#     table_names = [table[0] for table in tables]
#     print("Tables in the database:", table_names)
#     for table in table_names:
#         query = f'SELECT * FROM {table}' 
#         table = pd.read_sql_query(query, conn)
#         print(table.head())
        
# # Drop all tables and check again the above works
# with sqlite3.connect(os.path.abspath(db_name)) as conn:
#     cursor = conn.cursor()
#     cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
#     tables = cursor.fetchall()
#     table_names = [table[0] for table in tables]
#     print("Tables in the database:", table_names)
#     for table in table_names:
#         cursor.execute(f"DROP TABLE IF EXISTS {table};")
#         print(f"Table {table} dropped")
#     conn.commit()

Tables in the database: []
