In [3]:
from bertopic import BERTopic
import json
import pandas as pd
pd.set_option('display.max_colwidth', None)
from umap import UMAP
import sys
from hdbscan import HDBSCAN
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cluster import KMeans
from openpyxl import Workbook

import nltk
from nltk.corpus import stopwords

from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, PartOfSpeech
from bertopic.vectorizers import ClassTfidfTransformer
from nltk.tokenize import sent_tokenize, word_tokenize

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
link = 'documents.xlsx'
df = pd.ExcelFile(link).parse('general_labeled')
df.columns = df.iloc[0]
df = df[1:].reset_index(drop=True)
df_ns = df['narratives_self']

# drop any missing or na values from the dataframe
df_ns = df_ns.dropna()
df_ns = df_ns.astype(str)

In [5]:
len(df['ns_label_unmerged'].unique())

20

In [6]:
stop_words = stopwords.words('english')

free_s_w = ["shirt", "shirts", "would", "tshirt", "could", "decision", "decide", "decisions", "decided"] 
stop_words.extend(free_s_w)

In [7]:
!pip install -U spacy
!python3 -m spacy download en_core_web_sm
!pip install hf_xet

Defaulting to user installation because normal site-packages is not writeable
Collecting numpy>=1.19.0 (from spacy)
  Using cached numpy-2.0.2-cp39-cp39-macosx_11_0_arm64.whl.metadata (60 kB)
Using cached numpy-2.0.2-cp39-cp39-macosx_11_0_arm64.whl (13.7 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.4
    Uninstalling numpy-1.26.4:
      Successfully uninstalled numpy-1.26.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gensim 4.3.3 requires numpy<2.0,>=1.18.5, but you have numpy 2.0.2 which is incompatible.[0m[31m
[0mSuccessfully installed numpy-2.0.2
Defaulting to user installation because normal site-packages is not writeable
Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none

In [None]:
import spacy
spacy.load('en_core_web_sm')
keybert_model = KeyBERTInspired()

pos_model = PartOfSpeech("en_core_web_sm")

mmr_model = MaximalMarginalRelevance(diversity=0.4)

representation_model = {
    "KeyBERT": keybert_model,
    "MMR": mmr_model,
    "POS": pos_model
}

In [None]:
from sklearn.decomposition import PCA
embedding_model = SentenceTransformer("all-mpnet-base-v2")
embeddings = embedding_model.encode(df_ns, show_progress_bar=True)

# we test with dimensionality reduction with UMAP(n_neighbors=15, n_components=5, min_dist=0.0, metric='cosine', random_state=40)
umap_model = UMAP(random_state=40)

hdbscan_model = HDBSCAN(min_cluster_size=20, min_samples=1, metric='euclidean', cluster_selection_method='leaf' ,prediction_data=True)
vectorizer_model = CountVectorizer(stop_words=stop_words, min_df=1, ngram_range=(1,1))

ctfidf_model = ctfidf_model = ClassTfidfTransformer(reduce_frequent_words=True)

Batches: 100%|██████████| 25/25 [00:10<00:00,  2.31it/s]


In [None]:
BERT_model = BERTopic(
    embedding_model=embedding_model,          
    umap_model=umap_model,                    
    hdbscan_model=hdbscan_model,              
    vectorizer_model=vectorizer_model,        
    ctfidf_model=ctfidf_model, 
    representation_model=representation_model
)

In [11]:
topics, _ = BERT_model.fit_transform(df_ns)
topics_no_outliers = BERT_model.reduce_outliers(df_ns, topics)

In [12]:
top_inf = BERT_model.get_topic_info()

## Can you please explain your reasoning/thought process when deciding how much you were willing to pay sellers in different scenarios?

In [13]:
top_inf.to_excel('final_ns_topics.xlsx')

In [14]:
top_inf

Unnamed: 0,Topic,Count,Name,Representation,KeyBERT,MMR,POS,Representative_Docs
0,-1,120,-1_privacy_donate_cause_study,"[privacy, donate, cause, study, right, seller, iâ, genuine, really, information]","[donation, sellers, sales, money, charitable, seller, selling, purchase, value, donate]","[privacy, seller, genuine, information, pay, donation, mind, incentive, money, wanted]","[privacy, cause, study, right, seller, genuine, information, donation, mind, incentive]","[I had a set quota in mind once I saw the Tshirts, and my willingness to pay sellers was more dependent on the quality and value of the tshirt rather than the sellerâ€™s political values. While I may be less inclined to purchase from a seller because I disagree with their political values, the amount of money Iâ€™m willing to spend on something in general is less likely to be impacted by the fact that a seller supports something I also support (and believe everyone should already be supporting). Hence there was little difference in how much I was willing to pay in the different scenarios., Honestly, I simply thought to myself how much would I be willing to pay for a plain old t shirt. I wouldn't pay more than $10 because I'm not into them that much anyways. They look too boxy on me. I started to make my answers exactly the same, because at the end of the day, when we're purchasing items, we don't usually know anything about the seller. So, it should only matter whether or not I wanted to buy the tshirt at that price. However, I couldn't help but have a picture come to my mind about a sleazy salesman trying to push some snake oil on me. That type of dishonesty is something I can't support. Therefore, I made my price lower the second time. , I was relatively similar on my election of what I was willing to pay the sellers in each scenario. I think I may have paid slightly more to the seller that supported the right to privacy without intentionally knowing it was to sell shirts. When I make a purchase, Iâ€™m am providing the seller my personal information to use in the course of business. I had slightly higher faith that they have effective privacy/data control practices. But overall, at the end of the day it is a plain white t shirt. I donâ€™t see myself paying substantially more for a product based on the sellerâ€™s beliefs without some other alteration to the product itself. ]"
1,0,89,0_climate_change_justice_racial,"[climate, change, justice, racial, faith, traditional, fighting, people, think, feel]","[climate, capitalism, beliefs, endorse, sincerity, belief, values, profits, society, initiatives]","[climate, change, racial, people, think, feel, supporting, concerned, issues, beliefs]","[climate, change, justice, racial, faith, traditional, people, country, children, half]","[For the first seller, when I didn't know their views, my willingness to pay was based on the product more than anything, tending to the higher side because I knew that the seller would also benefit form the higher price. With the seller who supported Racial Justice without knowing the selling benefits, I feel like they truly believe in the cause, so I was willing to pay them a higher price. However, for the seller who supported Racial Justice after knowing the benefits, I feel like their support isn't authentic, so I'm much less willing to give them money. I feel like this individual is exploiting the cause of Racial Justice for their own financial gains, and I feel that this is morally wrong., People who are trying to ""uphold traditional values"" tend to be along the conservative side (in America) and that side of politics has recently shown a higher tendency toward using capitalism to further their ideologies. Conversely helping climate changes tends to put an emphasis on less waste which would mean creating less goods that could end up in a landfill, if they are ACTUALLY trying to help climate change. The companies that are only looking for a sale opportunity probably care much less about where their t-shirt ends up. , The first was the hope that both the individual who actually supported the fight against climate change might invest some of that extra money into the funds to fit it. It was the same hope for the opportunist as well, but he'd receive less from me on the basis of not fully trusting his sincerity or the odds he'd invest it selflessly. As for the Faith in Public life, I feel that area is already rife with the grifting sort, out to bang the drum of protecting faith in public life, but in actuality are simply looking to line their pockets with the money of zealous fools]"
2,1,79,1_company_support_business_causes,"[company, support, business, causes, product, cause, someone, political, views, something]","[endorses, charities, donate, values, ideology, sellers, money, supporting, political, purchases]","[company, business, product, political, beliefs, supporting, people, aligns, generally, purchases]","[company, support, business, causes, product, cause, political, views, beliefs, people]","[I believe it's worth it to pay a couple dollars extra to support a company that is trying to do good things in the world. It's respectable to advocate for groups that can't always advocate for themselves and need extra support, so I feel it's worth it to spend extra. As far as the company that didn't donate to the cause until they knew that it was going to benefit them, that's not cool. So that's why I chose that I would spend less with that particular company, because it's really only about their bottom dollar instead of about the people. I like to sort companies that are about the people, I solely focus on the product I am purchasing and not who is selling it to me. A sellers stance on social issues has no bearing to my decision making. In other words, I just want a product at the best possible price won't pay more just because someone's beliefs align with mine. When evaluating the scenarios, I viewed each transaction as a business transaction, and not as a way to support someone who shares my beliefs., When it comes to making purchases, my base-line expectation is political neutrality. I may be slightly more likely to purchase something from someone who shares my values, but it really don't tip the scales that much for me. I wouldn't pay much more than I would otherwise. On the other hand, I would not support a retailer who loudly endorses values I do not hold. There are too many other options available. And I would rather pay more to a neutral seller than less to a company who loudly endorses opinions I disagree with.]"
3,2,75,2_bonus_payment_rather_prefer,"[bonus, payment, rather, prefer, cash, personal, desire, currently, need, compared]","[monetary, value, price, worth, money, considerations, amount, cost, payment, vendors]","[bonus, payment, amount, chose, preferred, free, value, 50, financial, considering]","[bonus, payment, cash, personal, desire, need, amount, free, use, value]","[My reasoning/thought process when deciding how much I am willing to pay the sellers in the different scenarios is totally dependent upon the fact that the economy has crushed my finances and I am literally hungry in America trying to pay my bills in this inflation and now looming recession according to the news. Prices are up but my paycheck is not. I hold a PhD and manage a university library. You would think I make enough to buy staples like cream cheese but the last time I was at the store it was $7.99. So when presented with an option to get a tee shirt or possibly get a bonus of $50, I selected the $50 because I am taking surveys to make extra money to feed my family. That was my reasoning/thought process when deciding how much I was willing to pay sellers in the different scenarios. Thank you., When deciding how much to pay sellers in different scenarios, my thought process would involve several key considerations:\n\n1. Personal Value and Utility\nPersonal Use: I would assess how much I value the T-shirt for personal use. This includes considering factors like design, quality, and how often I would wear it.\nAlternatives: I would compare the T-shirt to other similar items I could purchase, evaluating if I can get a better deal elsewhere or if this T-shirt is particularly unique or desirable.\n2. Budget Constraints\nFinancial Situation: I would consider my current financial situation and budget. This involves deciding how much discretionary spending I can afford without compromising other needs.\nPriority: I would determine how high purchasing the T-shirt ranks among my current spending priorities.\n3. Perceived Fairness and Sellerâ€™s Earnings\nFair Price: I would reflect on what I perceive to be a fair price for the T-shirt, considering production costs, brand reputation, and market prices.\nSupporting the Seller: If the seller is a small business or an individual artisan, I might be willing to pay more to support their craft and livelihood.\n4. Experimental Context\nBonus vs. T-shirt: Given that there's an option between receiving a bonus payment and the T-shirt, I would consider how much the monetary bonus is worth to me compared to the T-shirt. If I value the T-shirt significantly, I might indicate a higher willingness to pay.\nRandom Selection: Understanding that a random selection could result in either receiving the T-shirt or a bonus payment, I would balance my maximum willingness to pay with the likelihood of being satisfied with either outcome.\nScenario-Based Decision Making\nIn different scenarios, these factors interplay as follows:\n\nHigh Personal Value Scenario:\n\nIf I highly value the T-shirt (e.g., itâ€™s a limited edition or has significant personal meaning), I would be willing to pay a higher amount.\nI would still consider my budget and ensure Iâ€™m not overcommitting financially.\nLow Personal Value Scenario:\n\nIf the T-shirt is of low personal value (e.g., itâ€™s a generic design or something Iâ€™m not particularly fond of), I would indicate a lower willingness to pay.\nThe bonus payment might be more appealing in this case, influencing me to choose the monetary option over the T-shirt.\nBalanced Value Scenario:\n\nIf I see equal value in both the T-shirt and the bonus payment, I would try to find a middle ground in terms of how much Iâ€™m willing to pay.\nThis would involve setting a price that I believe is fair and reasonable, ensuring Iâ€™m content with either outcome.\nFinal Decision\nAfter weighing these factors, I would decide on an amount that reflects the T-shirt's perceived value, my financial situation, and the experimental conditions. My goal would be to make a decision that I feel comfortable with, regardless of whether I end up with the T-shirt or the bonus payment., When determining the amount I was ready to offer the vendors in various situations, I took into account various considerations. Initially, I assessed the T-shirt's intrinsic worth by considering its quality, design, and any personal liking I may have towards it. I next thought about my financial limits and how much money I could comfortably allocate for a T-shirt without impacting my finances.\nThen, I considered the satisfaction or utility I would get from owning the T-shirt compared to getting the same amount of money as a bonus. This involved taking into account the extent to which I require or desire a new T-shirt in relation to how much the bonus payment could assist me in meeting other financial obligations or savings objectives.\nMoreover, I considered how my choice would affect the seller's profits. I aimed to provide fair support to the vendors, particularly those who were putting in effort to promote and sell the T-shirts. So, I attempted to find a fair price that would sufficiently compensate the seller for their efforts, while also considering my willingness to pay.\nFinally, I took into account the anonymity of the process, realizing that my decisions wouldn't be personally critiqued but would impact the seller significantly. This enabled me to make a decision based on the aforementioned factors instead of external pressure or bias, making it more objective.]"
4,3,70,3_service_product_sellers_quality,"[service, product, sellers, quality, offers, items, reputation, prices, deciding, personal]","[value, sellers, consideration, prices, evaluating, considerations, seller, negotiation, items, deciding]","[service, product, sellers, quality, personal, features, negotiation, value, factors, budget]","[service, product, sellers, quality, items, reputation, prices, personal, feelings, brand]","[My decision on how much to pay sellers is a balanced approach that considers the intrinsic value of the product or service, market conditions, fairness and ethical considerations, and strategic negotiation. By carefully evaluating these factors, I aim to make informed, fair, and mutually beneficial purchasing decisions., . Assessment of Value and Quality:\n Product Quality: I evaluate the quality of the item or service. For example, if it's a T-shirt, I consider factors like material, craftsmanship, and design. Higher quality may justify a higher price.\n Value for Money: I assess whether the product or service offers good value relative to its price. This includes looking at features, benefits, and whether it meets my needs or preferences effectively., Recognizing the Product or Service's Value:\n\nIntrinsic Value: The product or service's inherent value is something I take into account. This entails evaluating the item's overall usefulness, functionality, and quality. For example, I'm willing to spend more if the product delivers unique characteristics or fulfills a pressing need.\nMarket Value: Another consideration I make is the market value, which is impacted by a number of variables including consumer feedback, brand reputation, and comparisons with other items in the market. This aids in determining the fair market value.\n\nFinancial Restraints:\nPersonal Budget: I take into account my financial circumstances and budgetary restrictions. If a product is necessary but costs more than I can afford, I might explore for alternatives or determine how far I can push my spending limit to make that specific buy.\nOpportunity Cost: I calculate the potential loss of spending more money on one item, or what I might have to give up. This aids in setting spending priorities for goods and services that provide the most value or advantages.\n\nPerceived Worth and Contentment:\n\nPerceived benefit: I estimate the level of benefit or satisfaction that the good or service will provide. I might be willing to pay more for a product, for instance, if it significantly improves my quality of life or offers large long-term benefits.\nExperience and Trust: My propensity to pay may be influenced by my prior interactions with the vendor or brand. Good relationships from the past foster trust, which increases my willingness to confidently pay greater costs.]"
5,4,57,4_based_thought_considered_much,"[based, thought, considered, much, quality, well, mainly, retail, guided, first]","[value, thrift, tshirts, worth, costs, price, sellers, discount, cost, demand]","[based, quality, retail, guided, materials, deciding, constraints, designs, brand, cost]","[much, quality, retail, first, value, worth, materials, due, costs, designs]","[I made these decisions based on the fact these T-short sellers are dedicating time to creating a business. Regardless of their values I find it motivating they are trying their hardest to earn an income and sell consumer goods. I believe that shirts clothing are essential and they are doing a frat job by making these items, packaging, selling, and shipping. , Quality of the T-shirt: I assess the quality of the T-shirt, including factors like fabric, design, durability, and craftsmanship. Higher-quality materials and unique designs might justify a higher price. Brand and Reputation: If the T-shirt is from a well-known or reputable brand, I might be willing to pay more due to perceived value and brand prestige., Mostly I was thinking about how much I am willing to pay for a T-shirt. I like to support good causes; I like to support sellers and businesses that have similar values to my own. However, I'm not willing to spend crazy amounts of money on a product that I could find more much cheaper elsewhere. There came a point, usually around the $20 mark, that I would prefer the money instead. It simply became a matter of value vs. perceived value. ]"
6,5,53,5_lgbtq_rights_equality_gender,"[lgbtq, rights, equality, gender, gun, gay, two, guns, advocates, support]","[equality, advocates, values, sellers, lgbtq, endorse, lgbt, preference, rights, sales]","[lgbtq, rights, gender, guns, advocates, religious, issue, society, party, republican]","[lgbtq, rights, equality, gender, gun, gay, guns, advocates, support, campaign]","[For the control / neutral seller, I figure that a custom t-shirt was worth about $10. That's how much my niece charged me for a custom t-shirt. For the seller who supported LGBTQ+ without knowing, I paid $20 since their support was genuine without financial incentive. For the seller who supported LGBTQ+ knowing that it could lead to greater sales, I split the difference between the two sellers at $16. Yes, they had a financial incentive but at least they weren't one of those weirdos who were totally against LGBTQ+ like the bakery shop who refused to bake a wedding cake for a gay couple due to their ""religious"" beliefs., I'm a graphic designer, so I'd be much more intererested in buying tees that have a message or some kind of design on them. Just this year I've bought campaign merch that doubles as gifts, because I know some of the money goes back to a campaign I strongly support. But their merch also looks cool! So I just wouldn't pay a lot for a white tee, even if they donate $2 to a cause I care about. I'd probably rather straight up donate to the Trevor Project or Equality Florida. I did differentiate, though, between the seller who genuinely wanted to support LGBTQ+ rights and the seller who just wanted to make a sale and didn't care. That seller could turn around and donate to causes I consider hateful; I wouldn't trust that they'd handle my donation in the way they promise., If a seller indicates support for gender equality, I am more likely to pay a premium. Supporting businesses that align with my values feels like an investment in the kind of world I want to live in. I look for clear, transparent indications that the seller genuinely supports the values they claim. This might include certifications, detailed information on their practices, or third-party endorsements. While gender equality is particularly important to me, I also consider other values such as environmental sustainability, ethical labor practices, and social justice. Sellers who demonstrate a commitment to these values also positively influence my willingness to pay more.]"
7,6,42,6_plain_high_15_clothes,"[plain, high, 15, clothes, 10, spend, usually, material, amount, necessity]","[thrift, clothes, inexpensive, cheap, goodwill, spend, price, cost, buy, fashionable]","[plain, clothes, spend, amount, 20, store, fake, goodwill, generally, design]","[plain, high, clothes, material, amount, necessity, store, fake, hard, certain]","[Plain t-shirts are something that is very easy to come by, and I am not willing to pay a very large amount of money unless the quality is very good, the design is very special, or the designer is someone who supports causes that I value and that help the earth and society. The maximum I am willing to pay for a plain shirt is usually about 10-15 dollars, and the higher end of the amount would be for a seller that I wanted to show support for due to shared values., In most cases, I buy plain T-shirts on sale from Goodwill during Black Friday events. In years past, Goodwill has had $2 sales for T-shirts. I donâ€™t like to spend a whole lot on T-shirts, unless the T-shirt has a specific message for a specific event that I would be attending. I personally have sometimes bought â€œmessageâ€ T-shirts for as high as $15 or $20, depending on the message, but I would generally go for $10 or under for a plain T-shirt. This is regardless of the sellerâ€™s personal opinion on certain political or public policy issues. , I purchase out of necessity and not to be fashion conscious. I wear clothes until they become faded, wear out, threads start to fray about. So whether clothes are on sale or not, whether the clothes are fashionable or not, whether the clothes are made of high quality material does not matter to me.\n\nI buy stuff based on necessity, that is when I need to buy them. So the amount I was willing to pay sellers really is immaterial to me in all cases. If I know that the product is high quality then I will purchase it at that time.]"
8,7,39,7_charity_donation_minimum_typically,"[charity, donation, minimum, typically, blank, donate, hold, tee, plain, 20]","[donation, donate, charity, donating, tee, value, worth, dollars, price, clothing]","[charity, donation, minimum, blank, tee, overall, clothing, cheap, color, buy]","[charity, donation, minimum, blank, tee, plain, cotton, one, clothing, cause]","[Overall the shirt looked like decent quality, but nothing I would pay more for than a shirt that I feel like I could purchase at Target or Walmart. When it comes to the thought process when deciding how much to pay the sellers in different scenarios, I feel like I was somewhat more willing to pay more for the shirt when the seller wasn't influenced by monetary gain, but the baseline at what I would pay stayed the same whether or not the seller was supporting something or not. Overall, my prices were pretty firm on what I'd pay for the shirts. , Well, I have owned a t shirt shop before and the ""information"" you provide was completely useless. I don't know if it is a light or heavy fabric, if it is 100% cotton or blend or any of the other myriad considerations one ponders when buying a t shirt. That being said... $14 is the MOST I would pay for a high quality blank shirt. That is just economics. All that donating stuff means nothing as most of the time it is just a scam. You wanted my honest opinion... you got it., When deciding the price I was willing to pay for a plain t-shirt, I first considered what constitutes a good price for such an item. Gildan sells plain white t-shirts for around $15, while brands like Carhartt offer thicker cotton t-shirts for about $20 each. I set my benchmark based on these prices.\n\nConsidering the numerous color options available, I assumed that these shirts must be relatively cheap to produce. If the brand was unknown, I anticipated some unusual marketing claims about why they were a ""better"" option, likely pricing their t-shirts around $30-$35. For me personally, that was the maximum I was willing to pay.\n\nWhen it comes to companies doing charity work, I expect to pay a premium. For plain t-shirts with no additional backing or cause, I would pay at most $25. For charity-based ones, I can see the price going up to $45-$50 per shirt. However, I would not pay more than $34 for just a basic plain t-shirt.]"
9,8,38,8_bonus_sellerâ_confidence_opportunity,"[bonus, sellerâ, confidence, opportunity, accept, reward, seller, choice, online, instead]","[purchase, money, seller, valuation, value, reward, prizes, values, payment, preferences]","[confidence, accept, reward, seller, option, payment, receive, supported, input, wanted]","[bonus, confidence, opportunity, seller, choice, option, payment, helpful, attempt, justice]","[I initially preferred the bonus payment to the T shirts no matter how small the amount except 0$ but was willing to pick the T shirt instead of the $2 bonus on hearing the seller also supports Traditional values as most important like myself in order for the seller to benefit a little. The seller who supported after finding out about the opportunity to sell didn't matter too much nor made much of a difference to me even though the one who supported the same Traditional value as me without knowing about the opportunity would be a stronger supporter. For me, support is still support even if it was realized later. Hence I was willing to pay them both the same., I generally wanted a bonus instead of a t-shirt because receiving a t-shirt is too much trouble. I was willing to buy a t-shirt from someone that was willing to donate to Fight Climate change without knowing about the chance to sell beforehand for a higher price because I know that he is going to be down $2 no matter what. I will pay a higher amount from a tshirt for that person because he is already incurring a cost to help something that I believe in. Even for the seller that knows he might be using Fight Climate change to sell, he is still donating money in the end. It might feel a bit scummy to use a cause to sell tshirts, but money for a cause is money for a cause. It doesn't really matter for me if the intentions aren't pure, $2 is still going to Fight Climate change on a successful sale., Sellerâ€™s Support for Values:\nNeutral Seller: For the neutral seller, my decision was based purely on the product itself (the T-shirt) and the price. Since there were no additional factors influencing my decision, I focused on the value I placed on the T-shirt alone.\nSeller Supporting Racial Justice: Knowing that this seller supports a value that is important to me (Racial Justice) added a positive bias. I was more inclined to support this seller because their values aligned with mine, which made me willing to pay a bit more for the T-shirt.\nSeller Supporting Racial Justice for Market Opportunity: Although this seller also supports Racial Justice, their support might be driven by the opportunity to sell more T-shirts. This made me slightly cautious, but I still valued their support for an important cause, which influenced my willingness to pay.\nProduct and Price Consideration:\nI considered the quality and utility of the T-shirt itself. Since the T-shirt is a basic item, I evaluated how much I would typically be willing to pay for such a product in a regular market setting.\nI compared the T-shirt to the bonus payment options. For each price point, I weighed the value of receiving the T-shirt against the monetary bonus. This helped me determine the maximum amount I was willing to pay before preferring the bonus payment instead.\nImpact on Sellerâ€™s Earnings:\nI understood that my willingness to pay directly affected the sellerâ€™s earnings. This consideration made me more thoughtful about my choices, especially for sellers who supported values I cared about. I wanted to ensure that my decisions reflected my support for those values.\nAnonymity and Fairness:\nKnowing that my choices were anonymous and that sellers would not know my identity or specific decision scenarios, I felt comfortable making honest decisions based on my true preferences.\nI aimed to be fair in my evaluations, ensuring that my willingness to pay was consistent with my genuine valuation of the T-shirt and the sellerâ€™s support for important values.\nConfidence in Decisions:\nI considered my confidence in the sellersâ€™ motivations and the quality of the T-shirt. For sellers who genuinely supported important values, I felt more confident in my willingness to pay a higher amount.\nFor the neutral seller, my confidence was based solely on the product itself, leading to a more straightforward valuation.\nOverall, my decisions were influenced by a combination of the productâ€™s value, the sellerâ€™s support for important values, and the impact on the sellerâ€™s earnings. I aimed to make thoughtful and fair choices that reflected my true preferences and support for values that matter to me.]"


In [None]:
fig = BERT_model.visualize_documents(df_ns, embeddings=embeddings)

fig.update_traces(marker_showscale=False)

fig.update_layout(showlegend=False)

fig.show()

In [16]:
number_of_topics = len(BERT_model.get_topic_info())
fig = BERT_model.visualize_barchart(top_n_topics=number_of_topics)
fig

In [17]:
fig2 = BERT_model.visualize_hierarchy()
fig2

In [18]:
%pip install gensim

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Defaulting to user installation because normal site-packages is not writeable
Collecting numpy<2.0,>=1.18.5 (from gensim)
  Using cached numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl (14.0 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.[0m[31m
[0mSuccessfully installed numpy-1.26.4
Note: you may need to restart the kernel to use updated packages.


In [19]:
docs = df_ns.tolist()

In [20]:
import gensim.corpora as corpora
from gensim.models.coherencemodel import CoherenceModel

In [None]:
documents = pd.DataFrame({"Document": docs,
                          "ID": range(len(docs)),
                          "Topic": topics})

In [22]:
documents_per_topic = documents.groupby(['Topic'], as_index=False).agg({'Document': ' '.join})
cleaned_docs = BERT_model._preprocess_text(documents_per_topic.Document.values)

In [None]:
vectorizer = BERT_model.vectorizer_model
analyzer = vectorizer_model.build_analyzer()

In [None]:
words = vectorizer.get_feature_names_out()
tokens = [analyzer(doc) for doc in cleaned_docs]
dictionary = corpora.Dictionary(tokens)
corpus = [dictionary.doc2bow(token) for token in tokens]
topic_words = [[words for words, _ in BERT_model.get_topic(topic)] 
               for topic in range(len(set(topics))-1)]

In [25]:
len(topic_words)

14

Evaluation

In [None]:
coherence_model = CoherenceModel(topics=topic_words, 
                                 texts=tokens, 
                                 corpus=corpus,
                                 dictionary=dictionary, 
                                 coherence='c_v')
coherence = coherence_model.get_coherence()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

In [None]:
coherence_model.get_coherence_per_topic()

[0.5996331547331826,
 0.45680726184513515,
 0.5154689574578023,
 0.568718907312437,
 0.42553306700634386,
 0.6557107214238853,
 0.5307283391912401,
 0.5004640745302434,
 0.2928205149089979,
 0.5181215569662116,
 0.4739330951255261,
 0.5594901423865223,
 0.72470022324749,
 0.5670830526072543]

In [28]:
coherence_model.get_coherence()

0.5278009334815909