# Measuring interactivity

In [1]:
import pandas as pd
import numpy as np
import re

### Read original CSV

In [2]:
#load data

data_path = 'data/full_data.csv'

interactivity_df = pd.read_csv(data_path) #import csv

#retain only important columns
interactivity_df_w_mention = interactivity_df[['ID','commentText']]


In [3]:
#create show list name
show_list = ["@TheDailyShow","@LastWeekTonight","@SouthPark","@nbcsnl","@colbertlateshow","@RealTimers","@TheOnion","@fullfrontalsamb",
"@JimmyKimmelLive","@LateNightSeth","@zondagmetlubach","@Lucky_TV","@klikbeet","@EvenTotHierBnnvara","@AC360","@TuckerCarlson","@hardball",
"@CBSEveningNews","@11thHour","@NewsHour","@ABCWorldNews","@Nightline","@FaceTheNation","60Minutes","@NBCNews","@MeetThePress","@NOS",
"@NOS","@nosop3","@RTLnieuws","@Nieuwsuur"] 

for show in show_list:
    interactivity_df_w_mention['commentText'] = interactivity_df_w_mention.apply(lambda x: x['commentText'].replace(show,""), axis = 1) #replace all appearing @ from the list

interactivity_df_w_mention['mentioned'] = interactivity_df_w_mention.apply(lambda x: 1 if "@" in x['commentText'] else 0, axis = 1) #return bool value whether contains @

interactivity_df_w_mention

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity_df_w_mention['commentText'] = interactivity_df_w_mention.apply(lambda x: x['commentText'].replace(show,""), axis = 1) #replace all appearing @ from the list
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity_df_w_mention['mentioned'] = interactivity_df_w_mention.apply(lambda x: 1 if "@" in x['commentText'] else 0, axis = 1) #return bool value whether contains @


Unnamed: 0,ID,commentText,mentioned
0,UgyPHwv8G0cDE6-wEgl4AaABAg.8_0ZjJKSJty8_0kXGkAd2U,sad,0
1,Ugx2WXq9UdV8mPPjejJ4AaABAg.8yHCKV0Boe58yYRxEQEF45,That's a vicious insult!!! What did a box of r...,0
2,1110578710648890000,The question has always been if he was compro...,0
3,UgwUPFScjJ0MCeaP2F54AaABAg.8lvp3fc9Euf8lvvgsUgEgV,Goya Solidar. So there are a few of us left. ...,0
4,UgwWKCWtSJdFvjGHvTp4AaABAg.8kUC5dGrQ2H8kUDRihE2f3,hello hello \nNo-one else will hug him.,0
...,...,...,...
3857,1152219467579100000,@FullFrontalSamB They can’t afford chemical pe...,1
3858,1085362296472430000,@CNN @andersoncooper It's not if..... he will...,1
3859,UghFY3QJ6nmT_ngCoAEC.7-H0Z7--wxd8goqpaPs-bl,"Nah, they knew all about the cameras. I'm gue...",0
3860,UgyWabsmmnq3zam4DgZ4AaABAg,Alexander Hamilton. Troops are waiting in the ...,0


### Checking presence of mention / @

In [4]:
#Subsetting Twitter 
interactivity_twi_w_mention = interactivity_df_w_mention[interactivity_df_w_mention['ID'].str.contains("^[0-9]",regex = True)]

#Subsetting YouTube
interactivity_yt_w_mention = interactivity_df_w_mention[interactivity_df_w_mention['ID'].str.contains("^Ug",regex = True)]

In [5]:
#drop commentText to avoid duplicated columns in the merge df
interactivity_twi_w_mention = interactivity_twi_w_mention[['ID','mentioned']] #twitter subset

interactivity_yt_w_mention = interactivity_yt_w_mention[['ID','mentioned']] #YT subset

interactivity_df_w_mention = interactivity_df_w_mention[['ID','mentioned']] #complete subset

interactivity_twi_w_mention 

Unnamed: 0,ID,mentioned
2,1110578710648890000,0
436,1122365699538790000,1
681,1176274969271200000,1
682,1171574860579050000,1
683,972979929221448000,1
...,...,...
1404,1171601712089640000,1
1405,1187430772002760000,1
3856,1167577566015890000,1
3857,1152219467579100000,1


### Merge and export

In [6]:
#merge Twitter subset
merge_with_interactivity_twi = interactivity_df.merge(interactivity_twi_w_mention, on = "ID")

#merge YouTube subset
merge_with_interactivity_yt = interactivity_df.merge(interactivity_yt_w_mention, on = "ID")

#merge complete
merge_with_interactivity = interactivity_df.merge(interactivity_df_w_mention, on = "ID")

merge_with_interactivity.to_csv("outputs/automated_results/interactivity.csv",index=False)

### Comparison with manually coded data

In [7]:
#Comparison Twitter
merge_with_interactivity_twi = merge_with_interactivity_twi[['ID','Interaction','Acknowledgement','mentioned']]
merge_with_interactivity_twi['manual_interactivity'] = merge_with_interactivity_twi.apply(lambda x: 1 if (x['Interaction']|x['Acknowledgement']) == 1 else 0, axis = 1)

#Comparison YouTube
merge_with_interactivity_yt = merge_with_interactivity_yt[['ID','Interaction','Acknowledgement','mentioned']]
merge_with_interactivity_yt['manual_interactivity'] = merge_with_interactivity_yt.apply(lambda x: 1 if (x['Interaction']|x['Acknowledgement']) == 1 else 0, axis = 1)

#Comparison complete
merge_with_interactivity = merge_with_interactivity[['ID','Interaction','Acknowledgement','mentioned']]
merge_with_interactivity['manual_interactivity'] = merge_with_interactivity.apply(lambda x: 1 if (x['Interaction']|x['Acknowledgement']) == 1 else 0, axis = 1)

In [8]:
#Correlation matrices for the subsets
import warnings

warnings.filterwarnings("ignore") #ignore deprecation wanrings 

print("Twitter:")
display(merge_with_interactivity_twi.corr()) # Twitter subset 
print("YouTube:")
display(merge_with_interactivity_yt.corr()) # YouTube subset
print("Complete:")
display(merge_with_interactivity.corr()) #complete set 

Twitter:


Unnamed: 0,Interaction,Acknowledgement,mentioned,manual_interactivity
Interaction,1.0,0.886439,0.179877,1.0
Acknowledgement,0.886439,1.0,0.15945,0.886439
mentioned,0.179877,0.15945,1.0,0.179877
manual_interactivity,1.0,0.886439,0.179877,1.0


YouTube:


Unnamed: 0,Interaction,Acknowledgement,mentioned,manual_interactivity
Interaction,1.0,0.72977,0.37594,0.998714
Acknowledgement,0.72977,1.0,0.331704,0.731596
mentioned,0.37594,0.331704,1.0,0.375438
manual_interactivity,0.998714,0.731596,0.375438,1.0


Complete:


Unnamed: 0,Interaction,Acknowledgement,mentioned,manual_interactivity
Interaction,1.0,0.754235,0.099647,0.998903
Acknowledgement,0.754235,1.0,0.117588,0.755834
mentioned,0.099647,0.117588,1.0,0.099066
manual_interactivity,0.998903,0.755834,0.099066,1.0


### Computing precision / recall

**Note.** The manual coded scores are now treated as actual score (aka golden standard)

In [9]:
def tp_fp_fn_tn(actual_score,predicted_score):
    #Let manual = actual score
    #TP: actual = 1, preducted = 1
    #FP: actual = 0, predicted = 1
    #FN: actual = 1, predicted = 0
    #TN: actual = 0, predicted = 0
    
    tp = sum((actual_score == 1) & (predicted_score ==1))
    fp = sum((actual_score == 0) & (predicted_score ==1))
    fn = sum((actual_score == 1) & (predicted_score ==0))
    tn = sum((actual_score == 0) & (predicted_score ==0))
    
    return tp, fp, fn, tn

In [10]:
#Precision and recall score 

def precision_score(tp, fp):
    precision = tp/(fp+tp)
    return precision

def recall_score(tp,fn):
    recall = tp/(fn+tp)
    
    return recall

In [11]:
#Calculate precision / recall for each dictionary
def precision_recall(dictionary):
    #calculate tp_fp_fn_tn for each dict

    tp, fp, fn, tn = tp_fp_fn_tn(merge_with_interactivity['manual_interactivity'], dictionary)
    
    precision = precision_score(tp,fp)
    recall = recall_score(tp,fn)
    
    return precision,recall

In [12]:
#Create PRF1 data

twitter_PR = precision_recall(merge_with_interactivity_twi['mentioned']) #twitter precision and recall
yt_PR = precision_recall(merge_with_interactivity_yt['mentioned']) #yt precision and recall

twitter_F1 = (twitter_PR[0] * twitter_PR[1]) / (twitter_PR[0] + twitter_PR[1]) #twitter F1 score
yt_F1 = (yt_PR[0] * yt_PR[1]) / (yt_PR[0] + yt_PR[1]) #yt F1 score

print(f"Precision, Recall & F1 of Twitter subset: \n {twitter_PR} {twitter_F1}")

print(f"Precision, Recall & F1 of YouTube subset: \n {yt_PR} {yt_F1}")


Precision, Recall & F1 of Twitter subset: 
 (0.40301724137931033, 0.6470588235294118) 0.24833997343957503
Precision, Recall & F1 of YouTube subset: 
 (0.35376044568245124, 0.11159929701230228) 0.08483633934535738
