In [1]:
from itertools import chain

import altair as alt
import pandas as pd
import tensorflow_hub as hub
from hdbscan import HDBSCAN
from IPython.display import display
from umap import UMAP

In [2]:
df = (
    pd.read_csv("cleaned_hm.csv.xz")
    .dropna(subset=["ground_truth_category", "cleaned_hm"], how="any")
    .loc[:, ["cleaned_hm", "ground_truth_category"]]
    .reset_index(drop=True)
)



In [3]:
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

In [4]:
docs = df["cleaned_hm"].tolist()
embeddings = embed(docs)

In [5]:
umap = UMAP(min_dist=0.00, n_neighbors=30, metric="cosine", random_state=1234)
embeddings_umap = umap.fit_transform(embeddings)

In [6]:
embeddings_df = pd.DataFrame()
embeddings_df["Document"] = docs
embeddings_df["Component 1"] = embeddings_umap[:, 0]
embeddings_df["Component 2"] = embeddings_umap[:, 1]
embeddings_df["Ground Truth"] = df["ground_truth_category"]

In [7]:
hdbscan = HDBSCAN(cluster_selection_method="leaf", min_cluster_size=len(embeddings_df) // 100)

In [8]:
clusters = hdbscan.fit_predict(embeddings_umap)
embeddings_df["Cluster"] = clusters

In [9]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [10]:
chart = (
    (
        alt.Chart(
            embeddings_df,
            height=1000,
            width=1000,
            title="Happy Moments - USE → UMAP → HDBSCAN",
        )
        .mark_point()
        .encode(
            x=alt.X("Component 1", axis=None),
            y=alt.Y("Component 2", axis=None),
            tooltip=["Document", "Cluster", "Ground Truth"],
            color="Cluster:N",
            shape="Ground Truth"
        )
    )
    .configure_axis(grid=False)
    .configure_view(strokeWidth=0)
    .interactive()
)

In [11]:
chart.save("docs/nlp-use-umap-hdbscan-chart.html")

In [12]:
exemplars = list(map(tuple, chain.from_iterable(hdbscan.exemplars_)))

def exemplar(row):
    return (row["Component 1"], row["Component 2"]) in exemplars

embeddings_df["Exemplar"] = embeddings_df.apply(exemplar, axis=1)


In [13]:
embeddings_df["Exemplar"].value_counts()

False    10829
True      3296
Name: Exemplar, dtype: int64

In [14]:
grouped_df = embeddings_df.groupby("Cluster")

In [15]:
pd.set_option("display.max_colwidth", 200)

In [18]:
for key, item in grouped_df:
    n =  len(grouped_df.get_group(key))
    if key == -1:
        print(f"No Cluster (outlier) \tN Total: {n} ({(n/len(embeddings_df)) * 100:.1f}%)")
        print("-----" * 5)
        continue
    print(f"Cluster: {key} \tN Total: {n} ({(n/len(embeddings_df)) * 100:.1f}%)")
    print("Cluster Exemplars: ")
    display(grouped_df.get_group(key).query("Exemplar")["Document"].to_frame())
    display(grouped_df.get_group(key)["Ground Truth"].value_counts().to_frame())
    print("-----" * 5)

No Cluster (outlier) 	N Total: 6831 (48.4%)
-------------------------
Cluster: 0 	N Total: 144 (1.0%)
Cluster Exemplars: 


Unnamed: 0,Document
606,I WENT TO MOVIE
749,I WEND TO MOVIE
750,I WENT TO MOVIE
875,I WENT TO MOVIE
877,I WENT TO MOVIE
...,...
13909,I WENT TO MOVIE
13992,I WENT MOVIE
13998,I WENT TO MOVIE
14069,I WENT TO MOVIE


Unnamed: 0,Ground Truth
leisure,144


-------------------------
Cluster: 1 	N Total: 153 (1.1%)
Cluster Exemplars: 


Unnamed: 0,Document
94,My cat snuggled with me while we slept.
241,My cat cuddled with me when I went to sleep.
256,I was able to take my cat out for a walk while the sky was clear.
257,"I played with my new kitten, we have only had him for 3 weeks!"
272,My friend sent me pictures of his cat.
...,...
13655,We found a stray cat and couldnt catch it but we fed it . that made me happy .
13666,My wife found a kitten on her morning walk. She is really cute and my kids love her.
13754,"My cats came and cuddled with me, including the male the cat that does not like the cuddle much."
13791,My son learned to meow like a cat.


Unnamed: 0,Ground Truth
affection,124
enjoy_the_moment,11
achievement,8
bonding,5
leisure,5


-------------------------
Cluster: 2 	N Total: 159 (1.1%)
Cluster Exemplars: 


Unnamed: 0,Document
56,I bought a new phone
97,I bought a new laptop.
171,I picked up a new phone this afternoon with an Android operating system instead of the terrible Windows phone I have been using.
337,I bought a new cell phone with latest features and specifications.
401,I've got a new laptop and it works really well.
...,...
13545,I bought a new laptop.
13683,I managed to install a new screen on a broken phone.
13903,I bought a new laptop.
13959,I was shopping for a new cell phone at Target and found out that I will receive a $200 gift card when I buy a new phone and activate service.


Unnamed: 0,Ground Truth
achievement,134
affection,9
enjoy_the_moment,8
leisure,5
bonding,3


-------------------------
Cluster: 3 	N Total: 299 (2.1%)
Cluster Exemplars: 


Unnamed: 0,Document
114,"My friend, Sheryl came over to visit me and my puppy."
235,Spending time with my dogs outside made me happy. I love watching them play and having fun
285,Both of my dogs and my toddler played together nicely outside while I enjoyed the nice sunny weather.
521,I petted and played with a friend's beautiful dog (golden retriever).
532,My dogs were all three vying for my attention by jumping at my feet and simultaneous giving me lots of doggy kisses.
...,...
13488,I was able to play with my parents' dog for the first time in a few weeks.
13775,I took my dog on a drive around town and she seemed really happy about it.
13783,My fiancee and our two dogs were all lying in bed comfortably. I watched them just lie there for a couple minutes before going to bed myself.
13854,I was happy when my dog showed improvement with a new trick that I am trying to teach her when I worked on it with her this morning.


Unnamed: 0,Ground Truth
affection,246
achievement,17
bonding,17
enjoy_the_moment,6
leisure,5
nature,4
exercise,4


-------------------------
Cluster: 4 	N Total: 222 (1.6%)
Cluster Exemplars: 


Unnamed: 0,Document
109,I watched a wonderful movie that I have not seen in years. My family and I were thrilled.
138,"I watched a movie with my wife, and we both enjoyed it."
430,"I was able to see one of my most anticipated movies, Logan, starring Hugh Jackman."
540,In the last 24 hours I was happy because I went to a movie theater and enjoyed a movie.
696,There was a funny scene in a movie that I was seeing for the first time.
...,...
13425,today made me happy to invite my wife and my best childhood friend to watch a movie in the cinema.
13682,"My sister and I watched ""An Affair to Remember"" and really enjoyed watching it together."
13773,I love watch movies with my wife in my bedroom
13809,watched Bahubali movie with my friends


Unnamed: 0,Ground Truth
leisure,96
affection,70
bonding,37
enjoy_the_moment,15
achievement,4


-------------------------
Cluster: 5 	N Total: 179 (1.3%)
Cluster Exemplars: 


Unnamed: 0,Document
51,Watched a new episode of a show that I like.
99,Watching Bob's Burgers streaming on Fox.
131,I watched a particularly compelling episode of a show that I enjoy on Netflix.
219,I saw that there was a new episode of Mindy Project and I was excited to watch it.
228,I watched a TV show I like a lot.
...,...
13803,"My favorite TV show is back on for a new season on AMC, and I can't wait to binge on it."
13851,I managed to find time to catch up on Netflix series.
13885,"I was able to watch my favorite show, which I hadn't had time for in a while."
13974,i watched the game of thrones trailer


Unnamed: 0,Ground Truth
leisure,146
enjoy_the_moment,13
affection,13
bonding,4
achievement,3


-------------------------
Cluster: 6 	N Total: 309 (2.2%)
Cluster Exemplars: 


Unnamed: 0,Document
122,"Increased my max bench press this morning, heaviest I've ever lifted"
147,I went to the gym. Hitting a new personal record on the bench press made me more confident in my abilities.
214,I completed a goal that I set for myself at the gym and it pumped me up for tomorrow.
231,I went to the gym with a friend of mine who I hadn't seen in a long time.
555,Getting to do yoga unexpectedly.
...,...
13447,"I helped my friend Kyle create a workout regiment, he was really thankful."
13456,I felt extra flexible today and was able to push into a deeper pose like I was Gumby.
13734,I had a good workout.
13843,"I successfully rolled out the perpetual knots in my calves last night (after a week of work), and my posture and foot pain have improved drastically."


Unnamed: 0,Ground Truth
achievement,154
exercise,147
affection,4
bonding,3
enjoy_the_moment,1


-------------------------
Cluster: 7 	N Total: 222 (1.6%)
Cluster Exemplars: 


Unnamed: 0,Document
11,INDIA WON THE SERIES AGAINST AUSTRALIA MADE ME TO FEEL HAPPY
52,A good win for my sports team
123,I watched a Blackhawks game last night.
210,The team I coach won its match today.
273,"I made plans with my brother to get together this weekend to watch the Final Four. My nephew could win $700, because he has Gonzaga beating Oregon in the championship game."
...,...
13440,I watched a basketball game.
13445,I watched a double overtime game 7 in the Eastern Conference Hockey finals.
13531,The Seattle Mariners won last night 6-4.
13690,I was happy when the Mets came from behind to beat the Cubs.


Unnamed: 0,Ground Truth
leisure,65
achievement,51
enjoy_the_moment,40
bonding,38
affection,20
exercise,8


-------------------------
Cluster: 8 	N Total: 273 (1.9%)
Cluster Exemplars: 


Unnamed: 0,Document
20,I was able to play my video game that I enjoy the most quite a bit and that made me relaxed and happy.
104,Did well in the videogame I was playing\r\n
149,I completed a modification for a video game that I was able to release to the public.
167,I found something unexpected in a video game I am playing.
442,Started a new game that I have been waiting a long time to play.
...,...
13849,Completing valkyr prime in a video game.
13956,A small event that made me happy in the past 24 hours was the release of a Nintendo game called arms.
14040,I made a great scorekeeping spreadsheet for a game I play.
14093,Purchasing and playing a new video game.


Unnamed: 0,Ground Truth
leisure,125
achievement,83
bonding,40
affection,13
enjoy_the_moment,12


-------------------------
Cluster: 9 	N Total: 224 (1.6%)
Cluster Exemplars: 


Unnamed: 0,Document
335,I'm finally about to buy a car. My savings just got their few days ago. Now I'm looking. It's exciting.
438,i bought a new vehicle.
726,"I saw a really nice car on my way home, which is rare in my town."
945,I m very happy when i first time drive my own gifted car.
1159,It was so amazing I get my new car.
...,...
13227,Last day I got my new brand car.
13301,My friend took my car to get an oil change for me.
13581,I test-drove a new vehicle that I ended up really liking.
13899,I BOUGHT A NEW CAR


Unnamed: 0,Ground Truth
achievement,179
affection,15
enjoy_the_moment,14
bonding,14
leisure,2


-------------------------
Cluster: 10 	N Total: 232 (1.6%)
Cluster Exemplars: 


Unnamed: 0,Document
17,After 3 long weeks of deep snow covered ground we finally had enough warm days in the Northeast for the snow to melt and I was able to start my day with a fantastic hike up the Appalachian trail t...
81,I went for a walk early in the morning and air was clear and crisp.
186,I stepped outside without a jacket today. The sun is shining and it finally feels a bit like spring!
189,I picked up running again now that the winter weather has finally died down. I feel much better physically and emotionally because of it.
287,The temperature today was 70 degrees and the sun was shining. I don't like cold weather so this was heaven for me.
...,...
13780,The wind blowing my hair while I stuck my head out of the car's window.
13869,I read outside for hours in the sunshine.
13939,There was a sudden shower of rain in our area.
14027,It started to pour down rain and storm


Unnamed: 0,Ground Truth
nature,168
enjoy_the_moment,19
exercise,13
achievement,10
affection,9
leisure,8
bonding,5


-------------------------
Cluster: 11 	N Total: 205 (1.5%)
Cluster Exemplars: 


Unnamed: 0,Document
1,I meditated last night.
120,I got a full night of sleep. That does not often happen with a 3 month-old in the house. \r\n
169,I got to take a nap on the couch.
213,I had an amazing dream this morning.
480,When I jumped in bed at night after a fairly exhaustive day.
...,...
13846,I took a really nice nap while the air conditioner was on and woke up feeling very refreshed.
13852,Getting a good night sleep made me happy.
13970,I was able to get a great nights sleep.
14063,I WAS SLEEPING IN LONG TIME. SO I FELT VERY RELAXED.


Unnamed: 0,Ground Truth
enjoy_the_moment,85
leisure,53
achievement,39
affection,22
exercise,5
nature,1


-------------------------
Cluster: 12 	N Total: 427 (3.0%)
Cluster Exemplars: 


Unnamed: 0,Document
14,A made quite the progress on an old rusty workbench I am restoring at the moment. With the new paint it looks like new.
67,I managed to throw out a few items that were cluttering a room. I enjoyed the space I have after.
91,I finally cleaned out my basement.
157,A friend came over to help install an appliance. I was very happy I didn't have to pay a professional.
174,I completed upgrading the switches in our house so everything looks much better.
...,...
13562,Cleaned the garage out.
13685,I bought a new appliance for kitchen.
13700,Got halfway done painting the guest bedroom.
13944,"I found a new, simple way to clean some things."


Unnamed: 0,Ground Truth
achievement,331
enjoy_the_moment,51
affection,23
leisure,11
bonding,10
nature,1


-------------------------
Cluster: 13 	N Total: 148 (1.0%)
Cluster Exemplars: 


Unnamed: 0,Document
225,I was able to turn in the forbearance form for my student loans.
446,That I finally paid off my hospital bills!
583,I got my bills paid.
648,"I checked my bank account after I was paid via direct deposit, and made more than I thought I did."
713,I finally completed the process of divesting from a big bank with unethical investing practices.
...,...
13587,I received a refund check from an account that I had at least 10 years ago and forgot that I had it.
13619,"I got a refund of $99.00 from an online newspaper which, three days ago, removed the same amount from my checking account."
13649,My electric bill was $20 less than I expected.
13796,I transferred money into my bank account.


Unnamed: 0,Ground Truth
achievement,130
enjoy_the_moment,9
affection,6
bonding,3


-------------------------
Cluster: 14 	N Total: 233 (1.6%)
Cluster Exemplars: 


Unnamed: 0,Document
106,I got done with a test that I had been nervous about taking. I think I did pretty good too.
336,I bought several books for cheap that would allow me to study well for an upcoming test.
347,My happiness had its complement when I was called to congratulate myself for my graduation of an intensive course of programming. I felt expert in technology.
353,I got the invigilation duty for class X board exams this year. This is the first time get the duty after 19 years of work. This has given me a tremendous joy and satisfaction.
421,"I found out that I passed my qualifying exam at my graduate school, which was a huge hurdle I had to overcome and was very nervous about."
...,...
13207,when my test result was published
13244,"I was taking a quiz for my algebra course online, and I scored an 83%. I've never been great at math and my extra reading and practice is starting to pay off. I couldn't stop smiling, and I had to..."
13295,I was happy when I found out I got 88% on my quiz today.
13540,I just completed my boating licence exam so now I can ride wave runners!


Unnamed: 0,Ground Truth
achievement,221
enjoy_the_moment,6
affection,2
bonding,2
exercise,1
leisure,1


-------------------------
Cluster: 15 	N Total: 1019 (7.2%)
Cluster Exemplars: 


Unnamed: 0,Document
92,My new boss appreciated the work i put in
124,complete my pending job task today.
150,I finally figured out how to properly install a program at work.
159,I finished the work project I was working on today.
170,"I participated in a group task and felt valued in it, as if my skills were needed and appreciated."
...,...
13827,I completed a large assignment at work ahead of schedule.
13844,I made a somewhat significant milestone in my job.
13859,I finished a project at work that I had been trying to get done for a several months.
13889,I finished a project at work.


Unnamed: 0,Ground Truth
achievement,878
enjoy_the_moment,64
bonding,38
affection,18
leisure,17
nature,2
exercise,2


-------------------------
Cluster: 16 	N Total: 686 (4.9%)
Cluster Exemplars: 


Unnamed: 0,Document
323,My youngest son began to stand up holding on to an object.
338,I was treating my sons family to Sunday lunch and I caught my very intelligent 16 year old grandson off guard and made him laugh.
362,It made me happy when my infant son used baby sign language for the first time.
386,"My daughter started walking, and has gotten much better in the past few days."
524,My baby smiled at me
...,...
13692,I felt happy when I took my daughter to her drum lesson and saw her smiling when I picked her up. She told me she had a great lesson and learned something new and she thanked me for taking her to ...
13842,My younger son comforted my older son after he got hurt. He was demonstrating extreme empathy at only 6 years old.
13965,MY son started taking his first steps
13986,"Two year old Kaarmikha, my daughter started talking some new words in her own language. Kissing me often and demanding her needs. Motherhood is the best thing in world."


Unnamed: 0,Ground Truth
affection,603
achievement,60
bonding,13
enjoy_the_moment,6
leisure,3
nature,1


-------------------------
Cluster: 17 	N Total: 717 (5.1%)
Cluster Exemplars: 


Unnamed: 0,Document
10,i finally learned to cook chicken\r\n
46,My job provided lunch and it was really good.
71,I satisfied my many food cravings for dinner.
113,"I got Chinese food for dinner, it was good."
127,I had cream of chicken soup for dinner. It made me feel a lot better because I have been sick.
...,...
13656,"I had a great meal of grilled chicken, soup, and salad that was delicious."
13741,I ate a nice lunch.
13979,I eat good food.
14021,I nan some chicken skewers with a delicious teriyaki sauce. I am always in my happy place when I man the grill.


Unnamed: 0,Ground Truth
enjoy_the_moment,299
achievement,157
affection,119
leisure,101
bonding,39
nature,2


-------------------------
Cluster: 18 	N Total: 168 (1.2%)
Cluster Exemplars: 


Unnamed: 0,Document
48,I made vacation plans with my daughter today for Florida in July.
233,I am planning another trip upcoming in August. This one is a week-long trip in Colorado. Planning makes me so excited. I love seeing everything coming together and falling into place. I will proba...
340,I traveled to a state I had never been before.
383,I found out that I had enough money to take a vacation this summer to the Caribbean!
388,I successfully learned how to hover in a helicopter.
...,...
12992,yesterday night i came to trichy to erode is very nice travel and thankful to god nice moment
13168,I went my friend's with beach and it is very entertaining one.
13532,We went to a meeting to finalize plans for my daughter to travel to Spain.
13806,I booked a weekend trip with my friends to the beach.


Unnamed: 0,Ground Truth
affection,57
achievement,32
bonding,27
leisure,26
enjoy_the_moment,23
nature,3


-------------------------
Cluster: 19 	N Total: 173 (1.2%)
Cluster Exemplars: 


Unnamed: 0,Document
2,My grandmother start to walk from the bed after a long time.
66,"After a long time, today my cousin came home surprisingly that made my day !"
314,Going home to see my family after being gone for college.
706,My parents trying to be super supportive even if their ideas weren't always helpful because I could see how much they cared.
842,"YESTERDAY I WAS GONE FOR MY RELATIVE FUNCTION, BECAUSE MEET MY ALL RELATIVES."
...,...
13216,I got to see my grandma after 5 months of her vacation.
13359,I was happy when I talked to my parents earlier this morning.
13395,Came home to find my parents at my house for a quick visit.
13798,"Yesterday I found out my sister is coming to visit me this weekend, and that made me happy."


Unnamed: 0,Ground Truth
affection,168
achievement,2
enjoy_the_moment,2
bonding,1


-------------------------
Cluster: 20 	N Total: 492 (3.5%)
Cluster Exemplars: 


Unnamed: 0,Document
79,I talked to my girlfriend recently and she told me that she loved me.
129,Picking up my girlfriend from the airport since I hadn't seen her for two weeks.
212,I saw my boyfriend again after being apart for a month.
278,I got my bff to stop yelling at me all the time because her boyfriend is a dick
502,My lover presented one precious gift to me made very happy.
...,...
13671,"My boyfriend unexpectedly surprised me by visiting at my parents' house where I am dogsitting. I wasn't expecting to see him last night, but he came by anyway!"
13699,Met my girlfriend after 15 days.
13874,I got to spend the evening with my boyfriend.
13893,"I came to a ton of conclusions which all amounted to if he loved me, he would show me, consistently, the way I wanted to be shown love. Simple things like love letters, flowers, romantic date nigh..."


Unnamed: 0,Ground Truth
affection,457
bonding,21
achievement,8
enjoy_the_moment,4
leisure,2


-------------------------
Cluster: 21 	N Total: 163 (1.2%)
Cluster Exemplars: 


Unnamed: 0,Document
26,I finally decorated my son's room for his birthday.
57,I got to see a very good old friend that came over at my daughter's birthday.
117,I helped plan a surprise party for my Mom's 70th birthday party coming up.
328,Yesterday was my birthday and usually I am not honored or get more than a general happy birthday from my family and friends. But my boyfriend stayed up until one minute after midnight the night b...
350,My husband and me fighting we are not celebrating my birthday
...,...
13483,My middle son's birthday was this week and he was so anxious and excited about it.
13493,It was my son's birthday and we had a party for him.
13990,when my nephew got celebrate his birthday
14033,I have celebrated my mom's birthday today with my family and friends and that made me very happy.


Unnamed: 0,Ground Truth
affection,105
bonding,36
enjoy_the_moment,18
achievement,2
leisure,2


-------------------------
Cluster: 22 	N Total: 447 (3.2%)
Cluster Exemplars: 


Unnamed: 0,Document
64,i met my old friends after 5 years.we all had a talk for half a day.we played football like my old days.
93,I saw some friends that I had not seen in a while and it was good to reconnect.
300,I spent some time with a close friend.
354,I met with my school friends and enjoyed a lot.
355,I got to see one of my friends that I havent seen in a long time and we got to catch up.
...,...
13718,Today I meet my childhood friends.
13745,Visit my best friend
13771,A friend called me and we caught up on a lot of things.
13839,I got to see my best friend for the first time in over a month. I've really missed her - like missing a part of myself (my better half practically.)


Unnamed: 0,Ground Truth
bonding,415
affection,12
achievement,9
enjoy_the_moment,7
leisure,4


-------------------------
