# Bruh Analysis

This is a file for analyzing the use of "bronouns" from a SQL database

## Setup

In [3]:
# dependencies
import sqlite3
import math
import re
import nltk
import pandas as pd
from datetime import datetime

In [4]:
# Initialize DB
connection = sqlite3.connect('../db.pronouns.with-data.sqlite')
cursor = connection.cursor()

In [5]:
# Get the Rows
# Make columns and pronoun variables, so this is reusable
sql_columns = ['indexedAt','uri', 'text']
column_titles = ['Time', 'URI', 'Post Text'] # This is for Pandas, later down
pronoun = 'bruh'
select_query = f"SELECT {','.join(sql_columns)} FROM post WHERE post.pronoun == '{pronoun}'"
cursor.execute(select_query)

pronoun_rows = cursor.fetchall()


## Raw Data

In [6]:
# let's find out how many entries we have
size = len(pronoun_rows)

print(f"There are {size} instances of {pronoun}")

There are 3207 instances of bruh


In [7]:
# Let's look at a row
print(pronoun_rows[0])

('2025-02-04T17:03:09.084Z', 'at://did:plc:qeld3yz6jsokau6vnwapubtz/app.bsky.feed.post/3lhejxvxv5c2q', 'Bruh im just tryna wallow in sadness why is that so hard nowadays')


In [8]:
# We want to see some. But let's not blow it up
max_table_rows = int(size * .1)
raw_slice_size = int(max_table_rows * .1)

pd.set_option('display.max_rows', max_table_rows )
pd.set_option('display.max_colwidth', None)

In [9]:
raw_frame = pd.DataFrame(pronoun_rows[0:raw_slice_size], columns=column_titles)
raw_frame.style.set_caption(f"Showing {raw_slice_size} of {size}")

Unnamed: 0,Time,URI,Post Text
0,2025-02-04T17:03:09.084Z,at://did:plc:qeld3yz6jsokau6vnwapubtz/app.bsky.feed.post/3lhejxvxv5c2q,Bruh im just tryna wallow in sadness why is that so hard nowadays
1,2025-02-04T17:05:03.107Z,at://did:plc:bbeqyegnad5xy2nl6n5iw3zy/app.bsky.feed.post/3lhek3c2mtc26,bruh finding somewhere to sit on my campus on my fav building is impossible im gonna die before i even get to one spot
2,2025-02-04T17:05:46.805Z,at://did:plc:52bxkk7kixdpzgsile72ukcl/app.bsky.feed.post/3lhek4lkg3k2q,Bruh they sold crack to black neighborhoods and played an active part in the 80s drug trade. They was never it to begin with!
3,2025-02-04T17:07:12.005Z,at://did:plc:iixel3uapg43q4unbjx4a726/app.bsky.feed.post/3lhek754fmk25,"bruh if you don‚Äôt want to crawl inside of my skin then please leave me alone, I‚Äôm so tired of this nonchalant shit like do you fw me or not"
4,2025-02-04T17:09:51.696Z,at://did:plc:izxpomdyri45gzhppiiyattq/app.bsky.feed.post/3lhekdvrxj22j,Honestly can't wait til my kid says bruh to me
5,2025-02-04T17:16:51.392Z,at://did:plc:qw4sx3aio4hydfpmwkobfr53/app.bsky.feed.post/3lhekqfsyd22c,Naw bruh Cam'ron gone mistake you for that nigga lmaooo
6,2025-02-04T17:17:31.688Z,at://did:plc:d3e7nmgzhee6zbr36vpp5f7r/app.bsky.feed.post/3lhekri5ml22p,"Bruh istgg these mfs be saying anything they can think of that isn't ""where are you"" üíÄ"
7,2025-02-04T17:18:23.405Z,at://did:plc:gvwzq4j2zfxxolxl7ekezpvk/app.bsky.feed.post/3lhekt5acxs2y,"//bruh i've actually been trying to cool down my horny roll ngl, this shit's getting samey but also lol funni smut"
8,2025-02-04T17:20:54.901Z,at://did:plc:cpfybp6ugjvpds4jy3snmrca/app.bsky.feed.post/3lhekxlgy2s2c,stood up and pain shot down the back of my hip and leg im 22 bruh can my body not fall apart already
9,2025-02-04T17:30:37.677Z,at://did:plc:wj4s7p6rvcgucx7jjspr4suw/app.bsky.feed.post/3lheliwscok2v,Bruh Sinatra is giving Elon. üòû she got to gooooo



## Prettify the data
Let's find some ways to make the data a little more presentable. Let's get real URLs, a cleaner timestamp, and highlight the word. 

In [10]:
from src.notebook_helpers import make_link
from src.notebook_helpers import prettify_time
from src.notebook_helpers import mark

In [11]:
highlight = lambda s: f'{mark(s, "(bru+h+)")}'

In [12]:
pretty_data = pd.DataFrame(pronoun_rows[0:raw_slice_size], columns=column_titles)

pretty_data = pretty_data.reset_index(drop=True)

pretty_data.style\
    .format(subset=['Time'], formatter=prettify_time)\
    .format(subset=['URI'], formatter= make_link , escape='html')\
    .format(subset=['Post Text'], formatter=highlight, escape='html' )\
    .set_caption(f"Showing {raw_slice_size} of {size} instances of {pronoun}")


Unnamed: 0,Time,URI,Post Text
0,"Feb 04, 2025 05:03PM",Post,Bruh im just tryna wallow in sadness why is that so hard nowadays
1,"Feb 04, 2025 05:05PM",Post,bruh finding somewhere to sit on my campus on my fav building is impossible im gonna die before i even get to one spot
2,"Feb 04, 2025 05:05PM",Post,Bruh they sold crack to black neighborhoods and played an active part in the 80s drug trade. They was never it to begin with!
3,"Feb 04, 2025 05:07PM",Post,"bruh if you don‚Äôt want to crawl inside of my skin then please leave me alone, I‚Äôm so tired of this nonchalant shit like do you fw me or not"
4,"Feb 04, 2025 05:09PM",Post,Honestly can't wait til my kid says bruh to me
5,"Feb 04, 2025 05:16PM",Post,Naw bruh Cam'ron gone mistake you for that nigga lmaooo
6,"Feb 04, 2025 05:17PM",Post,"Bruh istgg these mfs be saying anything they can think of that isn't ""where are you"" üíÄ"
7,"Feb 04, 2025 05:18PM",Post,"//bruh i've actually been trying to cool down my horny roll ngl, this shit's getting samey but also lol funni smut"
8,"Feb 04, 2025 05:20PM",Post,stood up and pain shot down the back of my hip and leg im 22 bruh can my body not fall apart already
9,"Feb 04, 2025 05:30PM",Post,Bruh Sinatra is giving Elon. üòû she got to gooooo


## Now let's look at some sentiment

In [15]:
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
from nltk.sentiment.vader import SentimentIntensityAnalyzer
rows_with_sentiment = []
column_titles = ['Time', 'URI', 'Post Text', 'Negative', 'Neutral', 'Positive', 'Compound']

neg_count = 0
pos_count = 0
neu_count = 0
for row in pronoun_rows:
    sid = SentimentIntensityAnalyzer()
    ss = sid.polarity_scores(row[2])
    row_as_list = list(row)
    row_as_list.append(ss['neg'])
    neg_count += ss['neg']
    row_as_list.append(ss['neu'])
    neu_count += ss['neu']
    row_as_list.append(ss['pos'])
    pos_count += ss['pos']
    row_as_list.append(ss['compound'])
    rows_with_sentiment.append(row_as_list)



pretty_sentiment = pd.DataFrame(rows_with_sentiment[0:raw_slice_size], columns=column_titles)

ps = pretty_sentiment.reset_index(drop=True)

ps.style\
    .format(subset=['Time'], formatter=prettify_time)\
    .format(subset=['URI'], formatter= make_link , escape='html')\
    .format(subset=['Post Text'], formatter=highlight, escape='html' )\
    .set_caption(f"Showing {raw_slice_size} of {size} instances of {pronoun}")



Unnamed: 0,Time,URI,Post Text,Negative,Neutral,Positive,Compound
0,"Feb 04, 2025 05:03PM",Post,Bruh im just tryna wallow in sadness why is that so hard nowadays,0.302,0.698,0.0,-0.5812
1,"Feb 04, 2025 05:05PM",Post,bruh finding somewhere to sit on my campus on my fav building is impossible im gonna die before i even get to one spot,0.14,0.753,0.108,-0.2263
2,"Feb 04, 2025 05:05PM",Post,Bruh they sold crack to black neighborhoods and played an active part in the 80s drug trade. They was never it to begin with!,0.0,0.803,0.197,0.6588
3,"Feb 04, 2025 05:07PM",Post,"bruh if you don‚Äôt want to crawl inside of my skin then please leave me alone, I‚Äôm so tired of this nonchalant shit like do you fw me or not",0.256,0.588,0.156,-0.5984
4,"Feb 04, 2025 05:09PM",Post,Honestly can't wait til my kid says bruh to me,0.0,0.75,0.25,0.4588
5,"Feb 04, 2025 05:16PM",Post,Naw bruh Cam'ron gone mistake you for that nigga lmaooo,0.211,0.789,0.0,-0.34
6,"Feb 04, 2025 05:17PM",Post,"Bruh istgg these mfs be saying anything they can think of that isn't ""where are you"" üíÄ",0.0,1.0,0.0,0.0
7,"Feb 04, 2025 05:18PM",Post,"//bruh i've actually been trying to cool down my horny roll ngl, this shit's getting samey but also lol funni smut",0.0,0.78,0.22,0.6542
8,"Feb 04, 2025 05:20PM",Post,stood up and pain shot down the back of my hip and leg im 22 bruh can my body not fall apart already,0.13,0.87,0.0,-0.5106
9,"Feb 04, 2025 05:30PM",Post,Bruh Sinatra is giving Elon. üòû she got to gooooo,0.0,0.769,0.231,0.34


In [16]:
mean_neg = neg_count / len(pronoun_rows)
mean_pos = pos_count / len(pronoun_rows)
mean_neu = neu_count / len(pronoun_rows)

print(f"""
Mean Negative Sentiment {mean_neg}
Mean Positive Sentiment {mean_pos}
Mean Neutral Sentiment {mean_neu}
""")


Mean Negative Sentiment 0.11880947926410969
Mean Positive Sentiment 0.0988889928281885
Mean Neutral Sentiment 0.7822912379170557



In [17]:
from src.notebook_helpers import get_frequency_dict
from src.regexes import PROFANITY_REGEX
from src.regexes import AFFIRMATION_REGEX
from src.regexes import NEGATION_REGEX

In [18]:
negations = get_frequency_dict(pronoun_rows, NEGATION_REGEX)
affirmations = get_frequency_dict(pronoun_rows, AFFIRMATION_REGEX)
profanities = get_frequency_dict(pronoun_rows, PROFANITY_REGEX)

In [19]:
total_negations = sum( v for k,v in negations.items() )
total_affirmations =  sum( v for k,v in affirmations.items() )
total_profanities = sum( v for k,v in affirmations.items() )


In [20]:

neg_table = pd.DataFrame(negations.items(), columns=['Negation', 'Occurences'])
nt = neg_table.reset_index(drop=True)

nt.style\
    .set_caption(f"Showing {total_negations} negations that were used with {pronoun}")

Unnamed: 0,Negation,Occurences
0,naw,7
1,no,146
2,nah,31
3,noooo,2
4,nope,2
5,nuh,1
6,nooooooo,1
7,naaaaaaaaaah,1
8,nooooo,1


In [21]:
aff_table = pd.DataFrame(affirmations.items(), columns=['Affirmation', 'Occurences'])
at = aff_table.reset_index(drop=True)

at.style\
    .set_caption(f"Showing {total_affirmations} affirmations that were used with {pronoun}")

Unnamed: 0,Affirmation,Occurences
0,yeah,29
1,ya,23
2,yeaaaaah,1
3,yap,1
4,yay,3
5,yes,23
6,yea,5
7,yeh,1
8,ye,2
9,yep,5


In [22]:
prof_table = pd.DataFrame(profanities.items(), columns=['Profanity', 'Occurences'])
pt = prof_table.reset_index(drop=True)

pt.style\
    .set_caption(f"Showing {total_profanities} profanities that were used with {pronoun}")

Unnamed: 0,Profanity,Occurences
0,shit,232
1,fuck,123
2,fuckin,24
3,fucking,143
4,damn,59
5,fucks,1
6,wtf,86
7,fuckingdyingsky,1
8,ass,191
9,fucked,34
