In [164]:
import pandas as pd
import datetime

In [166]:
post_data = pd.read_csv("aiethics_data_pmaw_posts_cleaned.csv")
comments_data = pd.read_csv("aiethics_comments.csv")

In [167]:
print(datetime.datetime.fromtimestamp(min(post_data['created_utc'])).isoformat())
print(datetime.datetime.fromtimestamp(max(comments_data['created_utc'])).isoformat())

2016-07-01T13:52:35
2023-04-19T19:15:53


In [169]:
self_text_post = post_data[['author', 'id', 'selftext', 'score']]
self_text_post['sub_id'] = [id for id in self_text_post['id']]
self_text_comments = comments_data[['author', 'id','selftext','sub_id', 'score']]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [170]:
all_text = pd.concat([self_text_post, self_text_comments])

In [171]:
all_text.sort_values(by=['sub_id'])

Unnamed: 0,author,id,selftext,score,sub_id
104,UmamiSalami,d4wtdvg,I like this approach (my background is in econ...,2,4qt241
103,LichJesus,d4w8d6a,> To be honest I'm not sure I could even relia...,2,4qt241
102,dust4ngel,d4wug88,what i think is fascinating about this is that...,2,4qt241
101,amennen,d4wbva1,We don't need everyone to agree on an answer t...,3,4qt241
100,noggin-scratcher,d4w1mid,>Swerving has a 1 in 10 million chance of kill...,6,4qt241
...,...,...,...,...,...
205,skyfishgoo,iq00g6t,i doubt it.\n\nwe can't even agree on the defi...,0,xop2wx
206,Mental-Swordfish7129,j2m0x88,What if we know a guy who knows a guy who may ...,1,xop2wx
209,skyfishgoo,iq0mjyl,probably from all the times we shut it off as ...,0,xop2wx
68,agustin-vaquero,xwt1n3,[vAIsual](https://www.linkedin.com/company/vai...,0,xwt1n3


In [172]:
all_text.shape

(360, 5)

In [175]:
all_text['score'].mean()

2.5027777777777778

In [176]:
all_text['sub_id'].nunique()

94

# try network creation

In [11]:
id_author = dict(zip(all_text['id'], all_text['author']))
id_parent = dict(zip(all_text['id'], all_text['sub_id']))
ids = all_text['id'].to_list()

In [12]:
# who replies to whom?
final_data = {'src':[], 'target':[]}
for i in ids:
    cur_parent = id_parent[i]
    cur_auth = id_author[i]
    if cur_parent != i and cur_parent in id_author.keys(): # if any rows are dropped their children are also dropped
        final_data['src'].append(cur_auth)
        parent_author = id_author[cur_parent]
        final_data['target'].append(parent_author)

In [13]:
final_data = pd.DataFrame(final_data)

In [16]:
final_data.shape

(218, 2)

In [15]:
#remove self loops
final_data = final_data[final_data['src']!=final_data['target']]

In [17]:
final_data['weight'] = 1

In [18]:
final_map = final_data.groupby(['src', 'target']).sum().reset_index()

In [19]:
final_map.shape

(149, 3)

In [20]:
final_map = final_map.sort_values(by=['weight'])

In [21]:
final_map[final_map['weight']>=2]

Unnamed: 0,src,target,weight
145,tingshuo,UmamiSalami,2
143,son1dow,isincredible,2
4,AriasFco,jmp4joy,2
140,skyfishgoo,quent-sb,2
139,skyfishgoo,looselyhuman,2
71,UmamiTofu,jmp4joy,2
72,UmamiTofu,machineethicsthrowaw,2
103,granbolinaboom,rand3289,2
47,PantsGrenades,UmamiSalami,2
66,UmamiSalami,Periplokos,2


In [22]:
final_map = final_map[final_map['src']!=final_map['target']]

In [23]:
final_map.shape

(149, 3)

In [24]:
final_map.to_csv("social_network_v1_aiethics.csv")

# Trying topic modeling

In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vect = TfidfVectorizer(max_df=0.8, min_df=2, stop_words='english')
doc_term_matrix = tfidf_vect.fit_transform(all_text['selftext'])

In [16]:
doc_term_matrix

<360x2405 sparse matrix of type '<class 'numpy.float64'>'
	with 13126 stored elements in Compressed Sparse Row format>

In [17]:
from sklearn.decomposition import NMF

nmf = NMF(n_components=5, random_state=42)
nmf.fit(doc_term_matrix)



NMF(n_components=5, random_state=42)

In [18]:
for i,topic in enumerate(nmf.components_):
    print(f'Top 10 words for topic #{i}:')
    print([tfidf_vect.get_feature_names()[i] for i in topic.argsort()[-10:]])
    print('\n')

Top 10 words for topic #0:
['human', 'bias', 'https', 'amp', 'look', 'issues', 'research', 'like', 'ethics', 'ai']


Top 10 words for topic #1:
['superintelligence', 'help', 'comments', 'prevent', 'old', 'sorry', 'remove', 'comment', 'doxxing', 'deleted']


Top 10 words for topic #2:
['en', 'aiethics', 'artificial', 'wiki', 'org', 'comments', 'reddit', 'www', 'com', 'https']


Top 10 words for topic #3:
['want', 'algorithm', 'big', 'hear', 'read', 'results', 'ai', 'assumptions', 'algorithms', 'data']


Top 10 words for topic #4:
['morality', 'right', 'don', 'make', 'human', 'ethical', 'good', 'just', 'think', 'moral']






In [19]:
topic_values = nmf.transform(doc_term_matrix)
all_text['Topic'] = topic_values.argmax(axis=1)
all_text.head()

Unnamed: 0,author,id,selftext,score,sub_id,Topic
0,bioethicallysound,7hft48,https://soundcloud.com/21-bioethically-sound/0...,5,7hft48,2
1,flodyssey,7em793,I'm looking for some machine ethics moral dile...,12,7em793,4
2,UmamiSalami,72iszw,(email sent to AAAI mailing list last week)\n\...,3,72iszw,0
3,PBJLNGSN,7197l1,Hello! I am a student working on a design/rese...,3,7197l1,0
4,Apporve99,6xg682,what are the introductory steps I must take to...,1,6xg682,4


In [12]:
all_text.sort_values(by=['Topic', 'score'], ascending=[True, False])

Unnamed: 0,id,selftext,score,sub_id,Topic
73,x9qpi4,I want to introduce a paper I wrote with Peter...,18,x9qpi4,0
11,a4p7eg,"Hey everyone, I’m curious as to whether any of...",10,a4p7eg,0
39,c1nhzf,Looks like an interesting internship opportuni...,9,c1nhzf,0
215,iowhzf3,There is this concept in AI safety research ca...,8,xh5iy9,0
67,lq92dr,How does one go about becoming an AI ethicist?...,7,lq92dr,0
...,...,...,...,...,...
160,h7vd79p,Did you take into account that small companies...,0,oycaif,4
163,h7vqrbu,"In other words, you are sacrificing common peo...",0,oycaif,4
205,iq00g6t,i doubt it.\n\nwe can't even agree on the defi...,0,xop2wx,4
209,iq0mjyl,probably from all the times we shut it off as ...,0,xop2wx,4


In [13]:
all_text

Unnamed: 0,id,selftext,score,sub_id,Topic
0,7hft48,https://soundcloud.com/21-bioethically-sound/0...,5,7hft48,2
1,7em793,I'm looking for some machine ethics moral dile...,12,7em793,4
2,72iszw,(email sent to AAAI mailing list last week)\n\...,3,72iszw,0
3,7197l1,Hello! I am a student working on a design/rese...,3,7197l1,0
4,6xg682,what are the introductory steps I must take to...,1,6xg682,4
...,...,...,...,...,...
261,ddxbdvg,[removed],2,5uw2p6,4
262,dkgagf9,quantum effects.\n\nby the time consciousness ...,1,5uw2p6,4
263,ddxfzlq,[removed],2,5uw2p6,4
264,ddlq0tq,"Well, what you really would want to do is just...",2,5tbzwk,4


In [5]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [25]:
def get_top_sentiment(text):
    # get the sentiment
    sentiment = SentimentIntensityAnalyzer()
    sent = sentiment.polarity_scores(text)
    #print(sent)
    sent.pop('compound')
    # get the sentiment that has highest value
    best_sent = max(sent, key=sent.get)
    if best_sent == 'neu':
        pos_val = sent['pos']
        neg_val = sent['neg']
        diff1 = sent['neu'] - sent['pos']
        diff2 = sent['neu'] - sent['neg']
        #print(diff1, diff2)
        if diff1 < 0.6:
            best_sent = 'pos'
        elif diff2 < 0.6:
            best_sent = 'neg'
    return best_sent

In [59]:
#test func
text_1 = "The book was a perfect balance between wrtiting style and plot."
print(get_top_sentiment(text_1))

pos


In [63]:
all_text['sentiment'] = all_text['selftext'].apply (lambda text: get_top_sentiment(text))

In [1]:
all_text['sentiment'].value_counts()

NameError: name 'all_text' is not defined

In [14]:
all_text.to_csv("aiethics_topic_analysis.csv")

## emotion mapping and plotting

In [3]:
import pandas as pd
data_analyzed = pd.read_csv("aiethics_topic_analysis.csv")

In [162]:
data_analyzed.shape

(353, 10)

In [168]:
data_analyzed.columns

Index(['Unnamed: 0', 'id', 'selftext', 'score', 'sub_id', 'Topic',
       'Topic_Label', 'Unnamed: 7', 'Unnamed: 8', 'sentiment'],
      dtype='object')

In [88]:
data_analyzed.head(10)

Unnamed: 0.1,Unnamed: 0,id,selftext,score,sub_id,Topic,Topic_Label,Unnamed: 7,Unnamed: 8,sentiment
0,28,4qt241,Everyone's heard of the debate about what cars...,10,4qt241,2.0,Critical Probing,,,neu
1,99,d4w6526,I don't think people are irrationally protecti...,13,4qt241,4.0,Perspectives on ethical reasoning and decision...,,,neg
2,100,d4w1mid,>Swerving has a 1 in 10 million chance of kill...,6,4qt241,4.0,Perspectives on ethical reasoning and decision...,,,neu
3,101,d4wbva1,We don't need everyone to agree on an answer t...,3,4qt241,4.0,Perspectives on ethical reasoning and decision...,Policy considerations needed to handle ethics,,neu
4,102,d4wug88,what i think is fascinating about this is that...,2,4qt241,4.0,Perspectives on ethical reasoning and decision...,,,neu
5,103,d4w8d6a,> To be honest I'm not sure I could even relia...,2,4qt241,4.0,Perspectives on ethical reasoning and decision...,Security and privacy issues interlinked,,neu
6,104,d4wtdvg,I like this approach (my background is in econ...,2,4qt241,4.0,Perspectives on ethical reasoning and decision...,Making ethical reasoning more egalitarian and ...,,pos
7,98,d4w9run,Assuming you're serious: are you referring to ...,2,4qvi4m,2.0,Critical Probing,,,pos
8,27,4qvi4m,If I add a constant c to my loss function Loss...,8,4qvi4m,4.0,Perspectives on ethical reasoning and decision...,Technical implementation of ethical systems,,pos
9,96,d4wb6nb,Is this a serious question or are you trolling...,7,4qvi4m,4.0,Perspectives on ethical reasoning and decision...,Limitations of mathematical thought around eth...,,neu


In [93]:
data_analyzed['Topic_Label'].unique()

array(['Critical Probing', 'Perspectives', 'Community Building', nan,
       'Resource sharing', 'Influence', 'Scholarly Practices'],
      dtype=object)

In [144]:
data_analyzed['Topic_Label'] = data_analyzed['Topic_Label'].str.replace("Scholarly Practices", "Influence")

In [145]:
data_analyzed['sentiment'] = data_analyzed['selftext'].apply (lambda text: get_top_sentiment(text))

In [146]:
sent_df = data_analyzed['sentiment'].value_counts().reset_index()
sent_df.columns = ['sentiment', 'counts']
sent_df['sentiment'] = sent_df['sentiment'].str.replace("neu", "Neutral")
sent_df['sentiment'] =  sent_df['sentiment'].str.replace("pos", "Positive")
sent_df['sentiment'] =  sent_df['sentiment'].str.replace("neg", "Negative")

In [147]:
sent_df

Unnamed: 0,sentiment,counts
0,Neutral,237
1,Positive,100
2,Negative,16


In [148]:
import plotly.express as px
fig = px.pie(sent_df, values="counts", names="sentiment", color_discrete_sequence=['cyan','goldenrod','magenta'])
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(legend=dict(title = 'Sentiment', yanchor="top", y=0.9, xanchor="left", x=0.75))
fig.show()

In [149]:
topic_df = data_analyzed['Topic_Label'].value_counts().reset_index()
topic_df.columns = ['Topic_Label', 'counts']

In [157]:
import plotly.express as px
fig = px.pie(topic_df, values="counts", names="Topic_Label")
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(legend=dict(title = 'Content themes', yanchor="top", y=0.9, xanchor="left", x=0.74))
fig.show()

In [151]:
emo_cat = data_analyzed[['Topic_Label','sentiment']]
grouped_data = emo_cat.groupby(['Topic_Label','sentiment']).size().reset_index(name='count')
grouped_data['sentiment'] = grouped_data['sentiment'].str.replace("neu", "Neutral")
grouped_data['sentiment'] =  grouped_data['sentiment'].str.replace("pos", "Positive")
grouped_data['sentiment'] =  grouped_data['sentiment'].str.replace("neg", "Negative")
df_wide=pd.pivot(grouped_data, index=['Topic_Label'], columns = 'sentiment',values = 'count') #Reshape from long to wide

#Re-arange the new columns in the correct order
cols = grouped_data['sentiment'].unique()
df_wide=df_wide[cols]

In [152]:
df_wide.reset_index(inplace=True)
df_wide = df_wide.rename(columns = {'sentiment':'Serial ID'})

In [153]:
df_wide = df_wide.fillna(0)

In [161]:
fig = px.bar(grouped_data, x="Topic_Label", y="count",color='sentiment' ,color_discrete_sequence=['magenta', 'cyan', 'goldenrod'], barmode = 'stack')
fig.update_layout(xaxis_title = 'Content Themes', yaxis_title =  
      'Sentiment counts', legend = dict(title='Sentiment'))
fig.show()