# Bro Analysis

This is a file for analyzing the use of "bronouns" from a SQL database

## Setup

In [1]:
# dependencies
import sqlite3
import math
import re
import nltk
import pandas as pd
from datetime import datetime

In [2]:
# Initialize DB
connection = sqlite3.connect('../db.pronouns.with-data.sqlite')
cursor = connection.cursor()

In [3]:
# Get the Rows
# Make columns and pronoun variables, so this is reusable
sql_columns = ['indexedAt','uri', 'text']
column_titles = ['Time', 'URI', 'Post Text'] # This is for Pandas, later down
pronoun = 'bro'
select_query = f"SELECT {','.join(sql_columns)} FROM post WHERE post.pronoun == '{pronoun}'"
cursor.execute(select_query)

pronoun_rows = cursor.fetchall()


## Raw Data

In [4]:
# let's find out how many entries we have
size = len(pronoun_rows)

print(f"There are {size} instances of {pronoun}")

There are 21290 instances of bro


In [5]:
# Let's look at a row
print(pronoun_rows[0])

('2025-02-04T17:02:30.996Z', 'at://did:plc:zljlg7cgdfsl7maqvjjpp7i4/app.bsky.feed.post/3lhejwqzwus2d', 'bro named his dog after william f buckley lmfaoo')


In [6]:
# We want to see some. But let's not blow it up
max_table_rows = int(size * .1)
raw_slice_size = int(max_table_rows * .1)

pd.set_option('display.max_rows', max_table_rows )
pd.set_option('display.max_colwidth', None)

In [7]:
raw_frame = pd.DataFrame(pronoun_rows[0:raw_slice_size], columns=column_titles)
raw_frame.style.set_caption(f"Showing {raw_slice_size} of {size}")

Unnamed: 0,Time,URI,Post Text
0,2025-02-04T17:02:30.996Z,at://did:plc:zljlg7cgdfsl7maqvjjpp7i4/app.bsky.feed.post/3lhejwqzwus2d,bro named his dog after william f buckley lmfaoo
1,2025-02-04T17:03:02.319Z,at://did:plc:a54rlrqaz5tupdleps5ygd2s/app.bsky.feed.post/3lhejxozykk24,How come I just saw my neighbor put a full trash bag on the hood of his car and drove it to the dumpster that way. Like bro was the smell that horrendous you refused to put it in ur car for 500 ft
2,2025-02-04T17:03:05.614Z,at://did:plc:tusj3ujtamsxcxj5tfbe4jgd/app.bsky.feed.post/3lhejxrokmc26,"Bro they aren‚Äôt done either ü§£ They have 29 days to make another deal, and they have to show they actually secure their side."
3,2025-02-04T17:03:41.435Z,at://did:plc:ozzimeu3j3baptfz3r5eoqf2/app.bsky.feed.post/3lhejytgn4c24,"I like how he‚Äôs too bro to tuck in the shirt, just puts the belt around it"
4,2025-02-04T17:03:51.591Z,at://did:plc:kdfeyljkeezfmbhplhbgnun5/app.bsky.feed.post/3lhejz5j7es2r,"Good, cuz I never manage to make myself gape so I‚Äôm relying on you bro üëÄ"
5,2025-02-04T17:04:00.691Z,at://did:plc:tyfbq5qsy34d3s2xz5rptdii/app.bsky.feed.post/3lhejzgh2tk2u,Bro skipped every single leg day tryna impress twinks w arm muscle
6,2025-02-04T17:04:00.996Z,at://did:plc:jecowr67m4kclijoza5bws4m/app.bsky.feed.post/3lhejzgnzik2h,"How do you turn off the porn accounts while looking for cute stuff? Goth/hello kitty searches, bro I am not interested in any of that. ü´† I actually only want to see sanrio and spooky stuff. Not poosee lips and tiddies. üôÉ I wanna see cutie stuff, NOT coochie stuff."
7,2025-02-04T17:04:11.419Z,at://did:plc:6comhmbsjsvmzuvgmjzmzwpy/app.bsky.feed.post/3lhejzqmso22e,Bro looks like his mom told him ‚Äúyou ain‚Äôt leaving until all your chores are done‚Äù
8,2025-02-04T17:04:18.820Z,at://did:plc:3gxlsgpcwwppj37l2l3ygrpo/app.bsky.feed.post/3lhejzy7yvc2d,"Disclaimer: I am not a tech bro so miss me with the ""bullish on AGI"" bullshit or whatever. I just think the openly negative sentiment around anything labeled ""AI"" has obscured the things going on in AI that seem to be a net positive. & I don't mean this in a defeatist sense, but it's not going away"
9,2025-02-04T17:04:23.509Z,at://did:plc:u7cf7nkfjf65u6ve4qtulnqq/app.bsky.feed.post/3lhek23qx5k2n,me too bro me too



## Prettify the data
Let's find some ways to make the data a little more presentable. Let's get real URLs, a cleaner timestamp, and highlight the word. 

In [8]:
from src.notebook_helpers import make_link
from src.notebook_helpers import prettify_time
from src.notebook_helpers import mark


In [9]:
highlight = lambda s: f'{mark(s, "(bro+)")}'

In [10]:
pretty_data = pd.DataFrame(pronoun_rows[0:raw_slice_size], columns=column_titles)

pretty_data = pretty_data.reset_index(drop=True)

pretty_data.style\
    .format(subset=['Time'], formatter=prettify_time)\
    .format(subset=['URI'], formatter= make_link , escape='html')\
    .format(subset=['Post Text'], formatter=highlight, escape='html' )\
    .set_caption(f"Showing {raw_slice_size} of {size} instances of {pronoun}")


Unnamed: 0,Time,URI,Post Text
0,"Feb 04, 2025 05:02PM",Post,bro named his dog after william f buckley lmfaoo
1,"Feb 04, 2025 05:03PM",Post,How come I just saw my neighbor put a full trash bag on the hood of his car and drove it to the dumpster that way. Like bro was the smell that horrendous you refused to put it in ur car for 500 ft
2,"Feb 04, 2025 05:03PM",Post,"Bro they aren‚Äôt done either ü§£ They have 29 days to make another deal, and they have to show they actually secure their side."
3,"Feb 04, 2025 05:03PM",Post,"I like how he‚Äôs too bro to tuck in the shirt, just puts the belt around it"
4,"Feb 04, 2025 05:03PM",Post,"Good, cuz I never manage to make myself gape so I‚Äôm relying on you bro üëÄ"
5,"Feb 04, 2025 05:04PM",Post,Bro skipped every single leg day tryna impress twinks w arm muscle
6,"Feb 04, 2025 05:04PM",Post,"How do you turn off the porn accounts while looking for cute stuff? Goth/hello kitty searches, bro I am not interested in any of that. ü´† I actually only want to see sanrio and spooky stuff. Not poosee lips and tiddies. üôÉ I wanna see cutie stuff, NOT coochie stuff."
7,"Feb 04, 2025 05:04PM",Post,Bro looks like his mom told him ‚Äúyou ain‚Äôt leaving until all your chores are done‚Äù
8,"Feb 04, 2025 05:04PM",Post,"Disclaimer: I am not a tech bro so miss me with the ""bullish on AGI"" bullshit or whatever. I just think the openly negative sentiment around anything labeled ""AI"" has obscured the things going on in AI that seem to be a net positive. & I don't mean this in a defeatist sense, but it's not going away"
9,"Feb 04, 2025 05:04PM",Post,me too bro me too


## Now let's look at some sentiment

In [11]:
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
from nltk.sentiment.vader import SentimentIntensityAnalyzer
rows_with_sentiment = []
column_titles = ['Time', 'URI', 'Post Text', 'Negative', 'Neutral', 'Positive', 'Compound']

neg_count = 0
pos_count = 0
neu_count = 0
for row in pronoun_rows:
    sid = SentimentIntensityAnalyzer()
    ss = sid.polarity_scores(row[2])
    row_as_list = list(row)
    row_as_list.append(ss['neg'])
    neg_count += ss['neg']
    row_as_list.append(ss['neu'])
    neu_count += ss['neu']
    row_as_list.append(ss['pos'])
    pos_count += ss['pos']
    row_as_list.append(ss['compound'])
    rows_with_sentiment.append(row_as_list)



pretty_sentiment = pd.DataFrame(rows_with_sentiment[0:raw_slice_size], columns=column_titles)

ps = pretty_sentiment.reset_index(drop=True)

ps.style\
    .format(subset=['Time'], formatter=prettify_time)\
    .format(subset=['URI'], formatter= make_link , escape='html')\
    .format(subset=['Post Text'], formatter=highlight, escape='html' )\
    .set_caption(f"Showing {raw_slice_size} of {size} instances of {pronoun}")



Unnamed: 0,Time,URI,Post Text,Negative,Neutral,Positive,Compound
0,"Feb 04, 2025 05:02PM",Post,bro named his dog after william f buckley lmfaoo,0.0,1.0,0.0,0.0
1,"Feb 04, 2025 05:03PM",Post,How come I just saw my neighbor put a full trash bag on the hood of his car and drove it to the dumpster that way. Like bro was the smell that horrendous you refused to put it in ur car for 500 ft,0.158,0.79,0.052,-0.6249
2,"Feb 04, 2025 05:03PM",Post,"Bro they aren‚Äôt done either ü§£ They have 29 days to make another deal, and they have to show they actually secure their side.",0.0,0.902,0.098,0.34
3,"Feb 04, 2025 05:03PM",Post,"I like how he‚Äôs too bro to tuck in the shirt, just puts the belt around it",0.0,0.857,0.143,0.3612
4,"Feb 04, 2025 05:03PM",Post,"Good, cuz I never manage to make myself gape so I‚Äôm relying on you bro üëÄ",0.0,0.818,0.182,0.4404
5,"Feb 04, 2025 05:04PM",Post,Bro skipped every single leg day tryna impress twinks w arm muscle,0.0,0.775,0.225,0.4404
6,"Feb 04, 2025 05:04PM",Post,"How do you turn off the porn accounts while looking for cute stuff? Goth/hello kitty searches, bro I am not interested in any of that. ü´† I actually only want to see sanrio and spooky stuff. Not poosee lips and tiddies. üôÉ I wanna see cutie stuff, NOT coochie stuff.",0.045,0.819,0.136,0.5487
7,"Feb 04, 2025 05:04PM",Post,Bro looks like his mom told him ‚Äúyou ain‚Äôt leaving until all your chores are done‚Äù,0.0,0.857,0.143,0.3612
8,"Feb 04, 2025 05:04PM",Post,"Disclaimer: I am not a tech bro so miss me with the ""bullish on AGI"" bullshit or whatever. I just think the openly negative sentiment around anything labeled ""AI"" has obscured the things going on in AI that seem to be a net positive. & I don't mean this in a defeatist sense, but it's not going away",0.144,0.815,0.041,-0.5938
9,"Feb 04, 2025 05:04PM",Post,me too bro me too,0.0,1.0,0.0,0.0


In [12]:
mean_neg = neg_count / len(pronoun_rows)
mean_pos = pos_count / len(pronoun_rows)
mean_neu = neu_count / len(pronoun_rows)

print(f"""
Mean Negative Sentiment {mean_neg}
Mean Positive Sentiment {mean_pos}
Mean Neutral Sentiment {mean_neu}
""")


Mean Negative Sentiment 0.10793100046970425
Mean Positive Sentiment 0.11620070455612957
Mean Neutral Sentiment 0.7758673555659956



## Now some other useful information from context

In [13]:
from src.notebook_helpers import get_frequency_dict
from src.regexes import PROFANITY_REGEX
from src.regexes import AFFIRMATION_REGEX
from src.regexes import NEGATION_REGEX

In [14]:
negations = get_frequency_dict(pronoun_rows, NEGATION_REGEX)
affirmations = get_frequency_dict(pronoun_rows, AFFIRMATION_REGEX)
profanities = get_frequency_dict(pronoun_rows, PROFANITY_REGEX)

In [15]:
total_negations = sum( v for k,v in negations.items() )
total_affirmations =  sum( v for k,v in affirmations.items() )
total_profanities = sum( v for k,v in affirmations.items() )


In [16]:

neg_table = pd.DataFrame(negations.items(), columns=['Negation', 'Occurences'])
neg_table.sort_values("Occurences", axis=0, ascending=False, inplace=True)
nt = neg_table.reset_index(drop=True)

nt.style\
    .set_caption(f"Showing {total_negations} negations that were used with {pronoun}")

Unnamed: 0,Negation,Occurences
0,no,1069
1,nah,132
2,nope,15
3,na,11
4,naw,10
5,nooo,9
6,nooooo,8
7,noooo,7
8,noooooooo,4
9,nuh,3


In [17]:
aff_table = pd.DataFrame(affirmations.items(), columns=['Affirmation', 'Occurences'])
aff_table.sort_values("Occurences", axis=0, ascending=False, inplace=True)

at = aff_table.reset_index(drop=True)

at.style\
    .set_caption(f"Showing {total_affirmations} affirmations that were used with {pronoun}")

Unnamed: 0,Affirmation,Occurences
0,yeah,404
1,yes,178
2,ya,97
3,yea,47
4,yep,15
5,yap,14
6,yup,11
7,yu,6
8,yay,6
9,ye,6


In [18]:
prof_table = pd.DataFrame(profanities.items(), columns=['Profanity', 'Occurences'])
prof_table.sort_values("Occurences", axis=0, ascending=False, inplace=True)

pt = prof_table.reset_index(drop=True)

pt.style\
    .set_caption(f"Showing {total_profanities} profanities that were used with {pronoun}")

Unnamed: 0,Profanity,Occurences
0,ass,1201
1,shit,1033
2,fucking,956
3,fuck,879
4,damn,426
5,wtf,346
6,fuckin,131
7,dick,122
8,cock,113
9,fucked,110


## Associated Pronouns

In [33]:
from nltk.tokenize import word_tokenize
from collections import Counter

associate_pronouns = Counter()
for row in pronoun_rows:
    text = row[2]
    tokens = word_tokenize(text)
    poss = nltk.pos_tag(tokens)
    for pos in poss:
        tok,p = pos
        if p == 'PRP$':
            associate_pronouns.update([tok.lower()])

total_pronouns = sum( v for k,v in associate_pronouns.items() )

pron_table = pd.DataFrame(associate_pronouns.items(), columns=['Profanity', 'Occurences'])
pron_table.sort_values("Occurences", axis=0, ascending=False, inplace=True)


pt = pron_table.reset_index(drop=True)

pt.style\
    .set_caption(f"Showing {total_pronouns} pronouns that were used with {pronoun}")


Unnamed: 0,Profanity,Occurences
0,my,3358
1,his,1704
2,your,1408
3,their,675
4,our,438
5,its,382
6,her,317
7,‚Äú,3
8,you,2
9,yee,1
