In [1]:
import sklearn
from sklearn.metrics import cohen_kappa_score
import pandas as pd

##### For a given topic dataset CSV, create three separate CSVs, corresponding to lockdowns, masking and distancing, and vaccination. The new CSVs are written with the name of the topic suffixed to the original filename.

```
import argparse
import re
import pandas as pd

parser = argparse.ArgumentParser()
parser.add_argument("--infile", "-i", type=str, help="CSV for dataset to expand",
                    required=True)
args = parser.parse_args()

for topic in ["lockdowns", "masking and distancing", "vaccination"]:
    df = pd.read_csv(args.infile)
    cols = df.columns
    for col in cols:
        if "annotation" in col:
            df[col] = (df[col].notna() & df[col].str.contains(topic))
    new_fn = re.sub(r"\.csv$", f"_{topic.replace(' ', '_')}.csv", args.infile)
    df.to_csv(new_fn, index=False)

```

###### Using command: python expand_topic_csv_dataset.py --infile twitter_topic_0.csv
###### Using command: python expand_topic_csv_dataset.py --infile twitter_topic_1.csv
###### Using command: python expand_topic_csv_dataset.py --infile twitter_topic_2.csv
###### Using command: python expand_topic_csv_dataset.py --infile twitter_topic_3.csv



## twitter_topic_0_lockdowns.csv

In [2]:
df = pd.read_csv(r'twitter_topic\twitter_topic_0_lockdowns.csv')
df.head()

Unnamed: 0,text,annotation_104,annotation_101,annotation_102,annotation_103
0,Putin After Announcing #CovidVaccine #Russian ...,False,False,False,False
1,Courtesy: WA! #WhatsApp #COVID #CovidVaccine h...,False,False,False,False
2,4 of the vaccines Jared bought are expected to...,False,False,False,False
3,One day you will realize CDC Guidelines magica...,False,False,False,False
4,Im far from lying. Current CDC guidelines is ...,False,False,False,False


In [3]:
print(df['annotation_101'].isnull().sum())
print(df['annotation_102'].isnull().sum())
print(df['annotation_103'].isnull().sum())
print(df['annotation_104'].isnull().sum())
print(df['text'].isnull().sum())

0
0
0
0
0


In [4]:
annotation_1= df['annotation_101']
annotation_2= df['annotation_102']
annotation_3= df['annotation_103']
annotation_4= df['annotation_104']

### Cohen_kappa average score of annotation 101

In [5]:
cohen_score_12 = cohen_kappa_score(annotation_1,annotation_2)
cohen_score_12

0.10287081339712911

In [6]:
cohen_score_13 = cohen_kappa_score(annotation_1,annotation_3)
cohen_score_13

0.22248803827751196

In [7]:
cohen_score_14 = cohen_kappa_score(annotation_1,annotation_4)
cohen_score_14

0.2982938910291689

In [8]:
cohen_score_average_1 = round((cohen_score_12 + cohen_score_13 + cohen_score_14)/3,3)
print("Average Cohen Kappa Score for annotation 101:",(cohen_score_average_1))

Average Cohen Kappa Score for annotation 101: 0.208


### Cohen_kappa average score of annotation 102

In [9]:
cohen_score_21 = cohen_kappa_score(annotation_2,annotation_1)
cohen_score_21

0.10287081339712911

In [10]:
cohen_score_23 = cohen_kappa_score(annotation_2,annotation_3)
cohen_score_23

0.32659932659932667

In [11]:
cohen_score_24 = cohen_kappa_score(annotation_2,annotation_4)
cohen_score_24

0.08051689860834976

In [12]:
cohen_score_average_2 = round((cohen_score_21 + cohen_score_23 + cohen_score_24)/3,3)
print("Average Cohen Kappa Score for annotation 102:",(cohen_score_average_2))

Average Cohen Kappa Score for annotation 102: 0.17


### Cohen_kappa average score of annotation 103

In [13]:
cohen_score_31 = cohen_kappa_score(annotation_3,annotation_1)
cohen_score_31

0.22248803827751196

In [14]:
cohen_score_32 = cohen_kappa_score(annotation_3,annotation_2)
cohen_score_32

0.32659932659932667

In [15]:
cohen_score_34 = cohen_kappa_score(annotation_3,annotation_4)
cohen_score_34

0.08051689860834976

In [16]:
cohen_score_average_3 = round((cohen_score_31 + cohen_score_32 + cohen_score_34)/3,3)
print("Average Cohen Kappa Score for annotation 103:",(cohen_score_average_3))

Average Cohen Kappa Score for annotation 103: 0.21


### Cohen_kappa average score of annotation 104

In [17]:
cohen_score_41 = cohen_kappa_score(annotation_4,annotation_1)
cohen_score_41

0.2982938910291689

In [18]:
cohen_score_42 = cohen_kappa_score(annotation_4,annotation_2)
cohen_score_42

0.08051689860834976

In [19]:
cohen_score_43 = cohen_kappa_score(annotation_4,annotation_3)
cohen_score_43

0.08051689860834976

In [20]:
cohen_score_average_4 = round((cohen_score_41 + cohen_score_42 + cohen_score_43)/3,3)
print("Average Cohen Kappa Score for annotation 102:",(cohen_score_average_4))

Average Cohen Kappa Score for annotation 102: 0.153


##### If average cohen kappa score for any annotation is less than 0.2, we will drop it

In [21]:
def drop_annotate():
    average = [cohen_score_average_1, cohen_score_average_2,cohen_score_average_3,cohen_score_average_4]
    columns_name = ['annotation_101','annotation_102','annotation_103','annotation_104']
    #columns =[annotation_1,annotation_2,annotation_3,annotation_4]
    for i in range(0,len(average)):
        if average[i]< 0.2:
            print(columns_name[i])
            print(average[i])
            df.drop(columns_name[i], axis=1, inplace = True)
    print(df.head())

In [22]:
drop_annotate()

annotation_102
0.17
annotation_104
0.153
                                                text  annotation_101  \
0  Putin After Announcing #CovidVaccine #Russian ...           False   
1  Courtesy: WA! #WhatsApp #COVID #CovidVaccine h...           False   
2  4 of the vaccines Jared bought are expected to...           False   
3  One day you will realize CDC Guidelines magica...           False   
4  Im far from lying.  Current CDC guidelines is ...           False   

   annotation_103  
0           False  
1           False  
2           False  
3           False  
4           False  


## twitter_topic_0_masking_and_distancing.csv

In [23]:
df_1 = pd.read_csv(r'twitter_topic\twitter_topic_0_masking_and_distancing.csv')
df_1.head()

Unnamed: 0,text,annotation_104,annotation_101,annotation_102,annotation_103
0,Putin After Announcing #CovidVaccine #Russian ...,False,False,False,False
1,Courtesy: WA! #WhatsApp #COVID #CovidVaccine h...,False,False,False,False
2,4 of the vaccines Jared bought are expected to...,False,False,False,False
3,One day you will realize CDC Guidelines magica...,True,False,False,False
4,Im far from lying. Current CDC guidelines is ...,True,True,False,True


In [24]:
print(df_1['annotation_101'].isnull().sum())
print(df_1['annotation_102'].isnull().sum())
print(df_1['annotation_103'].isnull().sum())
print(df_1['annotation_104'].isnull().sum())
print(df_1['text'].isnull().sum())

0
0
0
0
0


In [25]:
annotation_1= df_1['annotation_101']
annotation_2= df_1['annotation_102']
annotation_3= df_1['annotation_103']
annotation_4= df_1['annotation_104']

### Cohen_kappa average score of annotation 101

In [26]:
cohen_score_12 = cohen_kappa_score(annotation_1,annotation_2)
cohen_score_12

0.8671118985961564

In [27]:
cohen_score_13 = cohen_kappa_score(annotation_1,annotation_3)
cohen_score_13

0.9375043399763905

In [28]:
cohen_score_14 = cohen_kappa_score(annotation_1,annotation_4)
cohen_score_14

0.5068285280728377

In [29]:
cohen_score_average_1 = round((cohen_score_12 + cohen_score_13 + cohen_score_14)/3,3)
print("Average Cohen Kappa Score for annotation 101:",(cohen_score_average_1))

Average Cohen Kappa Score for annotation 101: 0.77


### Cohen_kappa average score of annotation 102

In [30]:
cohen_score_21 = cohen_kappa_score(annotation_2,annotation_1)
cohen_score_21

0.8671118985961564

In [31]:
cohen_score_23 = cohen_kappa_score(annotation_2,annotation_3)
cohen_score_23

0.8282365620369123

In [32]:
cohen_score_24 = cohen_kappa_score(annotation_2,annotation_4)
cohen_score_24

0.5030120481927711

In [33]:
cohen_score_average_2 = round((cohen_score_21 + cohen_score_23 + cohen_score_24)/3,3)
print("Average Cohen Kappa Score for annotation 102:",(cohen_score_average_2))

Average Cohen Kappa Score for annotation 102: 0.733


### Cohen_kappa average score of annotation 103

In [34]:
cohen_score_31 = cohen_kappa_score(annotation_3,annotation_1)
cohen_score_31

0.9375043399763905

In [35]:
cohen_score_32 = cohen_kappa_score(annotation_3,annotation_2)
cohen_score_32

0.8282365620369123

In [36]:
cohen_score_34 = cohen_kappa_score(annotation_3,annotation_4)
cohen_score_34

0.5234493192133132

In [37]:
cohen_score_average_3 = round((cohen_score_31 + cohen_score_32 + cohen_score_34)/3,3)
print("Average Cohen Kappa Score for annotation 103:",(cohen_score_average_3))

Average Cohen Kappa Score for annotation 103: 0.763


### Cohen_kappa average score of annotation 104

In [38]:
cohen_score_41 = cohen_kappa_score(annotation_4,annotation_1)
cohen_score_41

0.5068285280728377

In [39]:
cohen_score_42 = cohen_kappa_score(annotation_4,annotation_2)
cohen_score_42

0.5030120481927711

In [40]:
cohen_score_43 = cohen_kappa_score(annotation_4,annotation_3)
cohen_score_43

0.5234493192133132

In [41]:
cohen_score_average_4 = round((cohen_score_41 + cohen_score_42 + cohen_score_43)/3,3)
print("Average Cohen Kappa Score for annotation 104:",(cohen_score_average_4))

Average Cohen Kappa Score for annotation 104: 0.511


##### If average cohen kappa score for any annotation is less than 0.2, we will drop it

In [42]:
def drop_annotate():
    average = [cohen_score_average_1, cohen_score_average_2,cohen_score_average_3,cohen_score_average_4]
    columns_name = ['annotation_101','annotation_102','annotation_103','annotation_104']
    #columns =[annotation_1,annotation_2,annotation_3,annotation_4]
    for i in range(0,len(average)):
        if average[i]< 0.2:
            print(columns_name[i])
            print(average[i])
            df_1.drop(columns_name[i], axis=1, inplace = True)
    print(df_1.head())

In [43]:
drop_annotate()

                                                text  annotation_104  \
0  Putin After Announcing #CovidVaccine #Russian ...           False   
1  Courtesy: WA! #WhatsApp #COVID #CovidVaccine h...           False   
2  4 of the vaccines Jared bought are expected to...           False   
3  One day you will realize CDC Guidelines magica...            True   
4  Im far from lying.  Current CDC guidelines is ...            True   

   annotation_101  annotation_102  annotation_103  
0           False           False           False  
1           False           False           False  
2           False           False           False  
3           False           False           False  
4            True           False            True  


## twitter_topic_0_vaccination.csv

In [44]:
df_2 = pd.read_csv(r'twitter_topic\twitter_topic_0_vaccination.csv')
df_2.head()

Unnamed: 0,text,annotation_104,annotation_101,annotation_102,annotation_103
0,Putin After Announcing #CovidVaccine #Russian ...,True,True,True,False
1,Courtesy: WA! #WhatsApp #COVID #CovidVaccine h...,True,True,False,False
2,4 of the vaccines Jared bought are expected to...,True,True,True,True
3,One day you will realize CDC Guidelines magica...,False,False,False,False
4,Im far from lying. Current CDC guidelines is ...,False,True,True,True


In [45]:
print(df_2['annotation_101'].isnull().sum())
print(df_2['annotation_102'].isnull().sum())
print(df_2['annotation_103'].isnull().sum())
print(df_2['annotation_104'].isnull().sum())
print(df_2['text'].isnull().sum())

0
0
0
0
0


In [46]:
annotation_1= df_2['annotation_101']
annotation_2= df_2['annotation_102']
annotation_3= df_2['annotation_103']
annotation_4= df_2['annotation_104']

### Cohen_kappa average score of annotation 101

In [47]:
def cohen_avg_1():
    cohen_score_12 = cohen_kappa_score(annotation_1,annotation_2)
    cohen_score_13 = cohen_kappa_score(annotation_1,annotation_3)
    cohen_score_14 = cohen_kappa_score(annotation_1,annotation_4)
    cohen_score_average_1 = round((cohen_score_12 + cohen_score_13 + cohen_score_14)/3,3)
    print("Average Cohen Kappa Score for annotation 101:",(cohen_score_average_1))
    return cohen_score_average_1

In [48]:
cohen_avg_1()

Average Cohen Kappa Score for annotation 101: 0.762


0.762

### Cohen_kappa average score of annotation 102

In [49]:
def cohen_avg_2():
    cohen_score_21 = cohen_kappa_score(annotation_2,annotation_1)
    cohen_score_23 = cohen_kappa_score(annotation_2,annotation_3)
    cohen_score_24 = cohen_kappa_score(annotation_2,annotation_4)
    cohen_score_average_2 = round((cohen_score_21 + cohen_score_23 + cohen_score_24)/3,3)
    print("Average Cohen Kappa Score for annotation 102:",(cohen_score_average_2))
    return cohen_score_average_2

In [50]:
cohen_avg_2()

Average Cohen Kappa Score for annotation 102: 0.624


0.624

### Cohen_kappa average score of annotation 103

In [51]:
def cohen_avg_3():
    cohen_score_31 = cohen_kappa_score(annotation_3,annotation_1)
    cohen_score_32 = cohen_kappa_score(annotation_3,annotation_2)
    cohen_score_34 = cohen_kappa_score(annotation_3,annotation_4)
    cohen_score_average_3 = round((cohen_score_31 + cohen_score_32 + cohen_score_34)/3,3)
    print("Average Cohen Kappa Score for annotation 103:",(cohen_score_average_3))
    return cohen_score_average_3

In [52]:
cohen_avg_3()

Average Cohen Kappa Score for annotation 103: 0.71


0.71

### Cohen_kappa average score of annotation 104

In [53]:
def cohen_avg_4():
    cohen_score_41 = cohen_kappa_score(annotation_4,annotation_1)
    cohen_score_42 = cohen_kappa_score(annotation_4,annotation_2)
    cohen_score_43 = cohen_kappa_score(annotation_4,annotation_3)
    cohen_score_average_4 = round((cohen_score_41 + cohen_score_42 + cohen_score_43)/3,3)
    print("Average Cohen Kappa Score for annotation 103:",(cohen_score_average_4))
    return cohen_score_average_4

In [54]:
cohen_avg_4()

Average Cohen Kappa Score for annotation 103: 0.673


0.673

##### If average cohen kappa score for any annotation is less than 0.2, we will drop it

In [55]:
def drop_annotate():
    average = [cohen_avg_1(), cohen_avg_2(),cohen_avg_3(),cohen_avg_4()]
    columns_name = ['annotation_101','annotation_102','annotation_103','annotation_104']
    #columns =[annotation_1,annotation_2,annotation_3,annotation_4]
    for i in range(0,len(average)):
        if average[i]< 0.2:
            print(columns_name[i])
            print(average[i])
            df_2.drop(columns_name[i], axis=1, inplace = True)
    print(df_2.head())

In [56]:
drop_annotate()

Average Cohen Kappa Score for annotation 101: 0.762
Average Cohen Kappa Score for annotation 102: 0.624
Average Cohen Kappa Score for annotation 103: 0.71
Average Cohen Kappa Score for annotation 103: 0.673
                                                text  annotation_104  \
0  Putin After Announcing #CovidVaccine #Russian ...            True   
1  Courtesy: WA! #WhatsApp #COVID #CovidVaccine h...            True   
2  4 of the vaccines Jared bought are expected to...            True   
3  One day you will realize CDC Guidelines magica...           False   
4  Im far from lying.  Current CDC guidelines is ...           False   

   annotation_101  annotation_102  annotation_103  
0            True            True           False  
1            True           False           False  
2            True            True            True  
3           False           False           False  
4            True            True            True  


## twitter_topic_1_lockdowns.csv

In [57]:
df_3 = pd.read_csv(r'twitter_topic\twitter_topic_1_lockdowns.csv')
df_3.head()

Unnamed: 0,text,annotation_51,annotation_52,annotation_53,annotation_54
0,Follow the CDC guidelines. Don’t become a stat...,False,False,False,False
1,Do you agree with CDC guidelines that children...,False,False,False,False
2,"So, both #Pharmaceutical companies #lilly and ...",False,False,False,False
3,The CDC's guidelines are clear; you just don't...,False,False,False,False
4,CDC Updates School Guidelines For Students Ret...,True,True,True,False


In [58]:
print(df_3['annotation_51'].isnull().sum())
print(df_3['annotation_52'].isnull().sum())
print(df_3['annotation_53'].isnull().sum())
print(df_3['annotation_54'].isnull().sum())
print(df_3['text'].isnull().sum())

0
0
0
0
0


In [59]:
annotation_1= df_3['annotation_51']
annotation_2= df_3['annotation_52']
annotation_3= df_3['annotation_53']
annotation_4= df_3['annotation_54']

### Cohen_kappa average score of annotation 51

In [60]:
def cohen_avg_1():
    cohen_score_12 = cohen_kappa_score(annotation_1,annotation_2)
    cohen_score_13 = cohen_kappa_score(annotation_1,annotation_3)
    cohen_score_14 = cohen_kappa_score(annotation_1,annotation_4)
    cohen_score_average_1 = round((cohen_score_12 + cohen_score_13 + cohen_score_14)/3,3)
    print("Average Cohen Kappa Score for annotation 51:",(cohen_score_average_1))
    return cohen_score_average_1

In [61]:
cohen_avg_1()

Average Cohen Kappa Score for annotation 51: 0.384


0.384

### Cohen_kappa average score of annotation 52

In [62]:
def cohen_avg_2():
    cohen_score_21 = cohen_kappa_score(annotation_2,annotation_1)
    cohen_score_23 = cohen_kappa_score(annotation_2,annotation_3)
    cohen_score_24 = cohen_kappa_score(annotation_2,annotation_4)
    cohen_score_average_2 = round((cohen_score_21 + cohen_score_23 + cohen_score_24)/3,3)
    print("Average Cohen Kappa Score for annotation 52:",(cohen_score_average_2))
    return cohen_score_average_2

In [63]:
cohen_avg_2()

Average Cohen Kappa Score for annotation 52: 0.248


0.248

### Cohen_kappa average score of annotation 53

In [64]:
def cohen_avg_3():
    cohen_score_31 = cohen_kappa_score(annotation_3,annotation_1)
    cohen_score_32 = cohen_kappa_score(annotation_3,annotation_2)
    cohen_score_34 = cohen_kappa_score(annotation_3,annotation_4)
    cohen_score_average_3 = round((cohen_score_31 + cohen_score_32 + cohen_score_34)/3,3)
    print("Average Cohen Kappa Score for annotation 53:",(cohen_score_average_3))
    return cohen_score_average_3

In [65]:
cohen_avg_3()

Average Cohen Kappa Score for annotation 53: 0.347


0.347

### Cohen_kappa average score of annotation 54

In [66]:
def cohen_avg_4():
    cohen_score_41 = cohen_kappa_score(annotation_4,annotation_1)
    cohen_score_42 = cohen_kappa_score(annotation_4,annotation_2)
    cohen_score_43 = cohen_kappa_score(annotation_4,annotation_3)
    cohen_score_average_4 = round((cohen_score_41 + cohen_score_42 + cohen_score_43)/3,3)
    print("Average Cohen Kappa Score for annotation 54:",(cohen_score_average_4))
    return cohen_score_average_4

In [67]:
cohen_avg_4()

Average Cohen Kappa Score for annotation 54: 0.362


0.362

##### If average cohen kappa score for any annotation is less than 0.2, we will drop it

In [68]:
def drop_annotate():
    average = [cohen_avg_1(), cohen_avg_2(),cohen_avg_3(),cohen_avg_4()]
    columns_name = ['annotation_51','annotation_52','annotation_53','annotation_54']
    #columns =[annotation_1,annotation_2,annotation_3,annotation_4]
    for i in range(0,len(average)):
        if average[i]< 0.2:
            print(average[i])
            df_3.drop(columns_name[i], axis=1, inplace = True)
    print(df_3.head())

In [69]:
drop_annotate()

Average Cohen Kappa Score for annotation 51: 0.384
Average Cohen Kappa Score for annotation 52: 0.248
Average Cohen Kappa Score for annotation 53: 0.347
Average Cohen Kappa Score for annotation 54: 0.362
                                                text  annotation_51  \
0  Follow the CDC guidelines. Don’t become a stat...          False   
1  Do you agree with CDC guidelines that children...          False   
2  So, both #Pharmaceutical companies #lilly and ...          False   
3  The CDC's guidelines are clear; you just don't...          False   
4  CDC Updates School Guidelines For Students Ret...           True   

   annotation_52  annotation_53  annotation_54  
0          False          False          False  
1          False          False          False  
2          False          False          False  
3          False          False          False  
4           True           True          False  


## twitter_topic_1_masking_and_distancing.csv

In [70]:
df_4 = pd.read_csv(r'twitter_topic\twitter_topic_1_masking_and_distancing.csv')
df_4.head()

Unnamed: 0,text,annotation_51,annotation_52,annotation_53,annotation_54
0,Follow the CDC guidelines. Don’t become a stat...,True,False,True,False
1,Do you agree with CDC guidelines that children...,True,True,True,True
2,"So, both #Pharmaceutical companies #lilly and ...",False,False,False,False
3,The CDC's guidelines are clear; you just don't...,True,False,False,False
4,CDC Updates School Guidelines For Students Ret...,True,False,False,False


In [71]:
print(df_4['annotation_51'].isnull().sum())
print(df_4['annotation_52'].isnull().sum())
print(df_4['annotation_53'].isnull().sum())
print(df_4['annotation_54'].isnull().sum())
print(df_4['text'].isnull().sum())

0
0
0
0
0


In [72]:
annotation_1= df_4['annotation_51']
annotation_2= df_4['annotation_52']
annotation_3= df_4['annotation_53']
annotation_4= df_4['annotation_54']

### Cohen_kappa average score of annotation 51

In [73]:
def cohen_avg_1():
    cohen_score_12 = cohen_kappa_score(annotation_1,annotation_2)
    cohen_score_13 = cohen_kappa_score(annotation_1,annotation_3)
    cohen_score_14 = cohen_kappa_score(annotation_1,annotation_4)
    cohen_score_average_1 = round((cohen_score_12 + cohen_score_13 + cohen_score_14)/3,3)
    print("Average Cohen Kappa Score for annotation 51:",(cohen_score_average_1))
    return cohen_score_average_1

In [74]:
cohen_avg_1()

Average Cohen Kappa Score for annotation 51: 0.555


0.555

### Cohen_kappa average score of annotation 52

In [75]:
def cohen_avg_2():
    cohen_score_21 = cohen_kappa_score(annotation_2,annotation_1)
    cohen_score_23 = cohen_kappa_score(annotation_2,annotation_3)
    cohen_score_24 = cohen_kappa_score(annotation_2,annotation_4)
    cohen_score_average_2 = round((cohen_score_21 + cohen_score_23 + cohen_score_24)/3,3)
    print("Average Cohen Kappa Score for annotation 52:",(cohen_score_average_2))
    return cohen_score_average_2

In [76]:
cohen_avg_2()

Average Cohen Kappa Score for annotation 52: 0.699


0.699

### Cohen_kappa average score of annotation 53

In [77]:
def cohen_avg_3():
    cohen_score_31 = cohen_kappa_score(annotation_3,annotation_1)
    cohen_score_32 = cohen_kappa_score(annotation_3,annotation_2)
    cohen_score_34 = cohen_kappa_score(annotation_3,annotation_4)
    cohen_score_average_3 = round((cohen_score_31 + cohen_score_32 + cohen_score_34)/3,3)
    print("Average Cohen Kappa Score for annotation 53:",(cohen_score_average_3))
    return cohen_score_average_3

In [78]:
cohen_avg_3()

Average Cohen Kappa Score for annotation 53: 0.681


0.681

### Cohen_kappa average score of annotation 54

In [79]:
def cohen_avg_4():
    cohen_score_41 = cohen_kappa_score(annotation_4,annotation_1)
    cohen_score_42 = cohen_kappa_score(annotation_4,annotation_2)
    cohen_score_43 = cohen_kappa_score(annotation_4,annotation_3)
    cohen_score_average_4 = round((cohen_score_41 + cohen_score_42 + cohen_score_43)/3,3)
    print("Average Cohen Kappa Score for annotation 54:",(cohen_score_average_4))
    return cohen_score_average_4

In [80]:
cohen_avg_4()

Average Cohen Kappa Score for annotation 54: 0.705


0.705

##### If average cohen kappa score for any annotation is less than 0.2, we will drop it

In [81]:
def drop_annotate():
    average = [cohen_avg_1(), cohen_avg_2(),cohen_avg_3(),cohen_avg_4()]
    columns_name = ['annotation_51','annotation_52','annotation_53','annotation_54']
    #columns =[annotation_1,annotation_2,annotation_3,annotation_4]
    for i in range(0,len(average)):
        if average[i]< 0.2:
            print(average[i])
            df_4.drop(columns_name[i], axis=1, inplace = True)
    print(df_4.head())

In [82]:
drop_annotate()

Average Cohen Kappa Score for annotation 51: 0.555
Average Cohen Kappa Score for annotation 52: 0.699
Average Cohen Kappa Score for annotation 53: 0.681
Average Cohen Kappa Score for annotation 54: 0.705
                                                text  annotation_51  \
0  Follow the CDC guidelines. Don’t become a stat...           True   
1  Do you agree with CDC guidelines that children...           True   
2  So, both #Pharmaceutical companies #lilly and ...          False   
3  The CDC's guidelines are clear; you just don't...           True   
4  CDC Updates School Guidelines For Students Ret...           True   

   annotation_52  annotation_53  annotation_54  
0          False           True          False  
1           True           True           True  
2          False          False          False  
3          False          False          False  
4          False          False          False  


## twitter_topic_1_vaccination.csv

In [83]:
df_5 = pd.read_csv(r'twitter_topic\twitter_topic_1_vaccination.csv')
df_5.head()

Unnamed: 0,text,annotation_51,annotation_52,annotation_53,annotation_54
0,Follow the CDC guidelines. Don’t become a stat...,False,False,False,False
1,Do you agree with CDC guidelines that children...,False,False,False,False
2,"So, both #Pharmaceutical companies #lilly and ...",True,True,True,True
3,The CDC's guidelines are clear; you just don't...,False,False,False,False
4,CDC Updates School Guidelines For Students Ret...,False,False,False,False


In [84]:
print(df_5['annotation_51'].isnull().sum())
print(df_5['annotation_52'].isnull().sum())
print(df_5['annotation_53'].isnull().sum())
print(df_5['annotation_54'].isnull().sum())
print(df_5['text'].isnull().sum())

0
0
0
0
0


In [85]:
annotation_1= df_5['annotation_51']
annotation_2= df_5['annotation_52']
annotation_3= df_5['annotation_53']
annotation_4= df_5['annotation_54']

### Cohen_kappa average score of annotation 51

In [86]:
def cohen_avg_1():
    cohen_score_12 = cohen_kappa_score(annotation_1,annotation_2)
    cohen_score_13 = cohen_kappa_score(annotation_1,annotation_3)
    cohen_score_14 = cohen_kappa_score(annotation_1,annotation_4)
    cohen_score_average_1 = round((cohen_score_12 + cohen_score_13 + cohen_score_14)/3,3)
    print("Average Cohen Kappa Score for annotation 51:",(cohen_score_average_1))
    return cohen_score_average_1

In [87]:
cohen_avg_1()

Average Cohen Kappa Score for annotation 51: 0.656


0.656

### Cohen_kappa average score of annotation 52

In [88]:
def cohen_avg_2():
    cohen_score_21 = cohen_kappa_score(annotation_2,annotation_1)
    cohen_score_23 = cohen_kappa_score(annotation_2,annotation_3)
    cohen_score_24 = cohen_kappa_score(annotation_2,annotation_4)
    cohen_score_average_2 = round((cohen_score_21 + cohen_score_23 + cohen_score_24)/3,3)
    print("Average Cohen Kappa Score for annotation 52:",(cohen_score_average_2))
    return cohen_score_average_2

In [89]:
cohen_avg_2()

Average Cohen Kappa Score for annotation 52: 0.363


0.363

### Cohen_kappa average score of annotation 53

In [90]:
def cohen_avg_3():
    cohen_score_31 = cohen_kappa_score(annotation_3,annotation_1)
    cohen_score_32 = cohen_kappa_score(annotation_3,annotation_2)
    cohen_score_34 = cohen_kappa_score(annotation_3,annotation_4)
    cohen_score_average_3 = round((cohen_score_31 + cohen_score_32 + cohen_score_34)/3,3)
    print("Average Cohen Kappa Score for annotation 53:",(cohen_score_average_3))
    return cohen_score_average_3

In [91]:
cohen_avg_3()

Average Cohen Kappa Score for annotation 53: 0.613


0.613

### Cohen_kappa average score of annotation 54

In [92]:
def cohen_avg_4():
    cohen_score_41 = cohen_kappa_score(annotation_4,annotation_1)
    cohen_score_42 = cohen_kappa_score(annotation_4,annotation_2)
    cohen_score_43 = cohen_kappa_score(annotation_4,annotation_3)
    cohen_score_average_4 = round((cohen_score_41 + cohen_score_42 + cohen_score_43)/3,3)
    print("Average Cohen Kappa Score for annotation 54:",(cohen_score_average_4))
    return cohen_score_average_4

In [93]:
cohen_avg_4()

Average Cohen Kappa Score for annotation 54: 0.608


0.608

##### If average cohen kappa score for any annotation is less than 0.2, we will drop it

In [94]:
def drop_annotate():
    average = [cohen_avg_1(), cohen_avg_2(),cohen_avg_3(),cohen_avg_4()]
    columns_name = ['annotation_51','annotation_52','annotation_53','annotation_54']
    #columns =[annotation_1,annotation_2,annotation_3,annotation_4]
    for i in range(0,len(average)):
        if average[i]< 0.2:
            print(average[i])
            df_5.drop(columns_name[i], axis=1, inplace = True)
    print(df_5.head())

In [95]:
drop_annotate()

Average Cohen Kappa Score for annotation 51: 0.656
Average Cohen Kappa Score for annotation 52: 0.363
Average Cohen Kappa Score for annotation 53: 0.613
Average Cohen Kappa Score for annotation 54: 0.608
                                                text  annotation_51  \
0  Follow the CDC guidelines. Don’t become a stat...          False   
1  Do you agree with CDC guidelines that children...          False   
2  So, both #Pharmaceutical companies #lilly and ...           True   
3  The CDC's guidelines are clear; you just don't...          False   
4  CDC Updates School Guidelines For Students Ret...          False   

   annotation_52  annotation_53  annotation_54  
0          False          False          False  
1          False          False          False  
2           True           True           True  
3          False          False          False  
4          False          False          False  


## twitter_topic_2_lockdowns.csv

In [96]:
df_6 = pd.read_csv(r'twitter_topic\twitter_topic_2_lockdowns.csv')
df_6.head()

Unnamed: 0,text,annotation_56,annotation_57,annotation_58,annotation_59
0,"Federal Judge Rules Against CDC, Throws Out Cr...",False,False,False,False
1,Indeed. Even in the dysfunctional US health sy...,False,False,False,False
2,a vaccine seems to be ESSENTIAL if we are to s...,False,False,False,False
3,Nurses union calls on CDC to reinstate univers...,False,False,False,False
4,Sirf #MukeshAmbani Company &amp; it's Worker...,False,False,True,False


In [97]:
print(df_6['annotation_56'].isnull().sum())
print(df_6['annotation_57'].isnull().sum())
print(df_6['annotation_58'].isnull().sum())
print(df_6['annotation_59'].isnull().sum())
print(df_6['text'].isnull().sum())

0
0
0
0
0


In [98]:
annotation_1= df_6['annotation_56']
annotation_2= df_6['annotation_57']
annotation_3= df_6['annotation_58']
annotation_4= df_6['annotation_59']

### Cohen_kappa average score of annotation 56

In [99]:
def cohen_avg_1():
    cohen_score_12 = cohen_kappa_score(annotation_1,annotation_2)
    cohen_score_13 = cohen_kappa_score(annotation_1,annotation_3)
    cohen_score_14 = cohen_kappa_score(annotation_1,annotation_4)
    cohen_score_average_1 = round((cohen_score_12 + cohen_score_13 + cohen_score_14)/3,3)
    print("Average Cohen Kappa Score for annotation 56:",(cohen_score_average_1))
    return cohen_score_average_1

In [100]:
cohen_avg_1()

Average Cohen Kappa Score for annotation 56: 0.116


0.116

### Cohen_kappa average score of annotation 57

In [101]:
def cohen_avg_2():
    cohen_score_21 = cohen_kappa_score(annotation_2,annotation_1)
    cohen_score_23 = cohen_kappa_score(annotation_2,annotation_3)
    cohen_score_24 = cohen_kappa_score(annotation_2,annotation_4)
    cohen_score_average_2 = round((cohen_score_21 + cohen_score_23 + cohen_score_24)/3,3)
    print("Average Cohen Kappa Score for annotation 57:",(cohen_score_average_2))
    return cohen_score_average_2

In [102]:
cohen_avg_2()

Average Cohen Kappa Score for annotation 57: 0.301


0.301

### Cohen_kappa average score of annotation 58

In [103]:
def cohen_avg_3():
    cohen_score_31 = cohen_kappa_score(annotation_3,annotation_1)
    cohen_score_32 = cohen_kappa_score(annotation_3,annotation_2)
    cohen_score_34 = cohen_kappa_score(annotation_3,annotation_4)
    cohen_score_average_3 = round((cohen_score_31 + cohen_score_32 + cohen_score_34)/3,3)
    print("Average Cohen Kappa Score for annotation 58:",(cohen_score_average_3))
    return cohen_score_average_3

In [104]:
cohen_avg_3()

Average Cohen Kappa Score for annotation 58: 0.195


0.195

### Cohen_kappa average score of annotation 59

In [105]:
def cohen_avg_4():
    cohen_score_41 = cohen_kappa_score(annotation_4,annotation_1)
    cohen_score_42 = cohen_kappa_score(annotation_4,annotation_2)
    cohen_score_43 = cohen_kappa_score(annotation_4,annotation_3)
    cohen_score_average_4 = round((cohen_score_41 + cohen_score_42 + cohen_score_43)/3,3)
    print("Average Cohen Kappa Score for annotation 59:",(cohen_score_average_4))
    return cohen_score_average_4

In [106]:
cohen_avg_4()

Average Cohen Kappa Score for annotation 59: 0.303


0.303

##### If average cohen kappa score for any annotation is less than 0.2, we will drop it

In [107]:
def drop_annotate():
    average = [cohen_avg_1(), cohen_avg_2(),cohen_avg_3(),cohen_avg_4()]
    columns_name = ['annotation_56','annotation_57','annotation_58','annotation_59']
    #columns =[annotation_1,annotation_2,annotation_3,annotation_4]
    for i in range(0,len(average)):
        if average[i]< 0.2:
            print(average[i])
            df_6.drop(columns_name[i], axis=1, inplace = True)
    print(df_6.head())

In [108]:
drop_annotate()

Average Cohen Kappa Score for annotation 56: 0.116
Average Cohen Kappa Score for annotation 57: 0.301
Average Cohen Kappa Score for annotation 58: 0.195
Average Cohen Kappa Score for annotation 59: 0.303
0.116
0.195
                                                text  annotation_57  \
0  Federal Judge Rules Against CDC, Throws Out Cr...          False   
1  Indeed. Even in the dysfunctional US health sy...          False   
2  a vaccine seems to be ESSENTIAL if we are to s...          False   
3  Nurses union calls on CDC to reinstate univers...          False   
4  Sirf  #MukeshAmbani  Company &amp; it's Worker...          False   

   annotation_59  
0          False  
1          False  
2          False  
3          False  
4          False  


## twitter_topic_2_masking_and_distancing.csv

In [109]:
df_7 = pd.read_csv(r'twitter_topic\twitter_topic_2_masking_and_distancing.csv')
df_7.head()

Unnamed: 0,text,annotation_56,annotation_57,annotation_58,annotation_59
0,"Federal Judge Rules Against CDC, Throws Out Cr...",False,False,True,True
1,Indeed. Even in the dysfunctional US health sy...,False,False,False,False
2,a vaccine seems to be ESSENTIAL if we are to s...,False,True,False,False
3,Nurses union calls on CDC to reinstate univers...,True,True,True,True
4,Sirf #MukeshAmbani Company &amp; it's Worker...,False,False,False,False


In [110]:
print(df_7['annotation_56'].isnull().sum())
print(df_7['annotation_57'].isnull().sum())
print(df_7['annotation_58'].isnull().sum())
print(df_7['annotation_59'].isnull().sum())
print(df_7['text'].isnull().sum())

0
0
0
0
0


In [111]:
annotation_1= df_7['annotation_56']
annotation_2= df_7['annotation_57']
annotation_3= df_7['annotation_58']
annotation_4= df_7['annotation_59']

### Cohen_kappa average score of annotation 56

In [112]:
def cohen_avg_1():
    cohen_score_12 = cohen_kappa_score(annotation_1,annotation_2)
    cohen_score_13 = cohen_kappa_score(annotation_1,annotation_3)
    cohen_score_14 = cohen_kappa_score(annotation_1,annotation_4)
    cohen_score_average_1 = round((cohen_score_12 + cohen_score_13 + cohen_score_14)/3,3)
    print("Average Cohen Kappa Score for annotation 56:",(cohen_score_average_1))
    return cohen_score_average_1

In [113]:
cohen_avg_1()

Average Cohen Kappa Score for annotation 56: 0.635


0.635

### Cohen_kappa average score of annotation 57

In [114]:
def cohen_avg_2():
    cohen_score_21 = cohen_kappa_score(annotation_2,annotation_1)
    cohen_score_23 = cohen_kappa_score(annotation_2,annotation_3)
    cohen_score_24 = cohen_kappa_score(annotation_2,annotation_4)
    cohen_score_average_2 = round((cohen_score_21 + cohen_score_23 + cohen_score_24)/3,3)
    print("Average Cohen Kappa Score for annotation 57:",(cohen_score_average_2))
    return cohen_score_average_2

In [115]:
cohen_avg_2()

Average Cohen Kappa Score for annotation 57: 0.603


0.603

### Cohen_kappa average score of annotation 58

In [116]:
def cohen_avg_3():
    cohen_score_31 = cohen_kappa_score(annotation_3,annotation_1)
    cohen_score_32 = cohen_kappa_score(annotation_3,annotation_2)
    cohen_score_34 = cohen_kappa_score(annotation_3,annotation_4)
    cohen_score_average_3 = round((cohen_score_31 + cohen_score_32 + cohen_score_34)/3,3)
    print("Average Cohen Kappa Score for annotation 58:",(cohen_score_average_3))
    return cohen_score_average_3

In [117]:
cohen_avg_3()

Average Cohen Kappa Score for annotation 58: 0.564


0.564

### Cohen_kappa average score of annotation 59

In [118]:
def cohen_avg_4():
    cohen_score_41 = cohen_kappa_score(annotation_4,annotation_1)
    cohen_score_42 = cohen_kappa_score(annotation_4,annotation_2)
    cohen_score_43 = cohen_kappa_score(annotation_4,annotation_3)
    cohen_score_average_4 = round((cohen_score_41 + cohen_score_42 + cohen_score_43)/3,3)
    print("Average Cohen Kappa Score for annotation 59:",(cohen_score_average_4))
    return cohen_score_average_4

In [119]:
cohen_avg_4()

Average Cohen Kappa Score for annotation 59: 0.663


0.663

##### If average cohen kappa score for any annotation is less than 0.2, we will drop it

In [120]:
def drop_annotate():
    average = [cohen_avg_1(), cohen_avg_2(),cohen_avg_3(),cohen_avg_4()]
    columns_name = ['annotation_56','annotation_57','annotation_58','annotation_59']
    #columns =[annotation_1,annotation_2,annotation_3,annotation_4]
    for i in range(0,len(average)):
        if average[i]< 0.2:
            print(average[i])
            df_7.drop(columns_name[i], axis=1, inplace = True)
    print(df_7.head())

In [121]:
drop_annotate()

Average Cohen Kappa Score for annotation 56: 0.635
Average Cohen Kappa Score for annotation 57: 0.603
Average Cohen Kappa Score for annotation 58: 0.564
Average Cohen Kappa Score for annotation 59: 0.663
                                                text  annotation_56  \
0  Federal Judge Rules Against CDC, Throws Out Cr...          False   
1  Indeed. Even in the dysfunctional US health sy...          False   
2  a vaccine seems to be ESSENTIAL if we are to s...          False   
3  Nurses union calls on CDC to reinstate univers...           True   
4  Sirf  #MukeshAmbani  Company &amp; it's Worker...          False   

   annotation_57  annotation_58  annotation_59  
0          False           True           True  
1          False          False          False  
2           True          False          False  
3           True           True           True  
4          False          False          False  


## twitter_topic_2_vaccination.csv

In [122]:
df_8 = pd.read_csv(r'twitter_topic\twitter_topic_2_vaccination.csv')
df_8.head()

Unnamed: 0,text,annotation_56,annotation_57,annotation_58,annotation_59
0,"Federal Judge Rules Against CDC, Throws Out Cr...",False,False,False,False
1,Indeed. Even in the dysfunctional US health sy...,True,True,True,True
2,a vaccine seems to be ESSENTIAL if we are to s...,True,False,True,True
3,Nurses union calls on CDC to reinstate univers...,False,False,False,False
4,Sirf #MukeshAmbani Company &amp; it's Worker...,False,False,False,False


In [123]:
print(df_8['annotation_56'].isnull().sum())
print(df_8['annotation_57'].isnull().sum())
print(df_8['annotation_58'].isnull().sum())
print(df_8['annotation_59'].isnull().sum())
print(df_8['text'].isnull().sum())

0
0
0
0
0


In [124]:
annotation_1= df_8['annotation_56']
annotation_2= df_8['annotation_57']
annotation_3= df_8['annotation_58']
annotation_4= df_8['annotation_59']

### Cohen_kappa average score of annotation 56

In [125]:
def cohen_avg_1():
    cohen_score_12 = cohen_kappa_score(annotation_1,annotation_2)
    cohen_score_13 = cohen_kappa_score(annotation_1,annotation_3)
    cohen_score_14 = cohen_kappa_score(annotation_1,annotation_4)
    cohen_score_average_1 = round((cohen_score_12 + cohen_score_13 + cohen_score_14)/3,3)
    print("Average Cohen Kappa Score for annotation 56:",(cohen_score_average_1))
    return cohen_score_average_1

In [126]:
cohen_avg_1()

Average Cohen Kappa Score for annotation 56: 0.69


0.69

### Cohen_kappa average score of annotation 57

In [127]:
def cohen_avg_2():
    cohen_score_21 = cohen_kappa_score(annotation_2,annotation_1)
    cohen_score_23 = cohen_kappa_score(annotation_2,annotation_3)
    cohen_score_24 = cohen_kappa_score(annotation_2,annotation_4)
    cohen_score_average_2 = round((cohen_score_21 + cohen_score_23 + cohen_score_24)/3,3)
    print("Average Cohen Kappa Score for annotation 57:",(cohen_score_average_2))
    return cohen_score_average_2

In [128]:
cohen_avg_2()

Average Cohen Kappa Score for annotation 57: 0.695


0.695

### Cohen_kappa average score of annotation 58

In [129]:
def cohen_avg_3():
    cohen_score_31 = cohen_kappa_score(annotation_3,annotation_1)
    cohen_score_32 = cohen_kappa_score(annotation_3,annotation_2)
    cohen_score_34 = cohen_kappa_score(annotation_3,annotation_4)
    cohen_score_average_3 = round((cohen_score_31 + cohen_score_32 + cohen_score_34)/3,3)
    print("Average Cohen Kappa Score for annotation 58:",(cohen_score_average_3))
    return cohen_score_average_3

In [130]:
cohen_avg_3()

Average Cohen Kappa Score for annotation 58: 0.585


0.585

### Cohen_kappa average score of annotation 59

In [131]:
def cohen_avg_4():
    cohen_score_41 = cohen_kappa_score(annotation_4,annotation_1)
    cohen_score_42 = cohen_kappa_score(annotation_4,annotation_2)
    cohen_score_43 = cohen_kappa_score(annotation_4,annotation_3)
    cohen_score_average_4 = round((cohen_score_41 + cohen_score_42 + cohen_score_43)/3,3)
    print("Average Cohen Kappa Score for annotation 59:",(cohen_score_average_4))
    return cohen_score_average_4

In [132]:
cohen_avg_4()

Average Cohen Kappa Score for annotation 59: 0.737


0.737

##### If average cohen kappa score for any annotation is less than 0.2, we will drop it

In [133]:
def drop_annotate():
    average = [cohen_avg_1(), cohen_avg_2(),cohen_avg_3(),cohen_avg_4()]
    columns_name = ['annotation_56','annotation_57','annotation_58','annotation_59']
    #columns =[annotation_1,annotation_2,annotation_3,annotation_4]
    for i in range(0,len(average)):
        if average[i]< 0.2:
            print(average[i])
            df_8.drop(columns_name[i], axis=1, inplace = True)
    print(df_8.head())

In [134]:
drop_annotate()

Average Cohen Kappa Score for annotation 56: 0.69
Average Cohen Kappa Score for annotation 57: 0.695
Average Cohen Kappa Score for annotation 58: 0.585
Average Cohen Kappa Score for annotation 59: 0.737
                                                text  annotation_56  \
0  Federal Judge Rules Against CDC, Throws Out Cr...          False   
1  Indeed. Even in the dysfunctional US health sy...           True   
2  a vaccine seems to be ESSENTIAL if we are to s...           True   
3  Nurses union calls on CDC to reinstate univers...          False   
4  Sirf  #MukeshAmbani  Company &amp; it's Worker...          False   

   annotation_57  annotation_58  annotation_59  
0          False          False          False  
1           True           True           True  
2          False           True           True  
3          False          False          False  
4          False          False          False  


## twitter_topic_3_lockdowns.csv

In [135]:
df_9 = pd.read_csv(r'twitter_topic\twitter_topic_3_lockdowns.csv')
df_9.head()

Unnamed: 0,text,annotation_91,annotation_85,annotation_86,annotation_87
0,Bad news. Johnson &amp; Johnson pauses #CovidV...,False,False,False,False
1,Saw this on TV and you women Need to work with...,False,False,False,True
2,Two Indian vaccine candidates against COVID-19...,False,False,False,False
3,The point is media &amp; govt lie abt the numb...,False,False,False,False
4,California students will continue wearing mask...,False,True,False,False


In [136]:
print(df_9['annotation_85'].isnull().sum())
print(df_9['annotation_86'].isnull().sum())
print(df_9['annotation_87'].isnull().sum())
print(df_9['annotation_91'].isnull().sum())
print(df_9['text'].isnull().sum())

0
0
0
0
0


In [137]:
annotation_1= df_9['annotation_85']
annotation_2= df_9['annotation_86']
annotation_3= df_9['annotation_87']
annotation_4= df_9['annotation_91']

### Cohen_kappa average score of annotation 85

In [138]:
def cohen_avg_1():
    cohen_score_12 = cohen_kappa_score(annotation_1,annotation_2)
    cohen_score_13 = cohen_kappa_score(annotation_1,annotation_3)
    cohen_score_14 = cohen_kappa_score(annotation_1,annotation_4)
    cohen_score_average_1 = round((cohen_score_12 + cohen_score_13 + cohen_score_14)/3,3)
    print("Average Cohen Kappa Score for annotation 85:",(cohen_score_average_1))
    return cohen_score_average_1

In [139]:
cohen_avg_1()

Average Cohen Kappa Score for annotation 85: 0.218


0.218

### Cohen_kappa average score of annotation 86

In [140]:
def cohen_avg_2():
    cohen_score_21 = cohen_kappa_score(annotation_2,annotation_1)
    cohen_score_23 = cohen_kappa_score(annotation_2,annotation_3)
    cohen_score_24 = cohen_kappa_score(annotation_2,annotation_4)
    cohen_score_average_2 = round((cohen_score_21 + cohen_score_23 + cohen_score_24)/3,3)
    print("Average Cohen Kappa Score for annotation 86:",(cohen_score_average_2))
    return cohen_score_average_2

In [141]:
cohen_avg_2()

Average Cohen Kappa Score for annotation 86: 0.192


0.192

### Cohen_kappa average score of annotation 87

In [142]:
def cohen_avg_3():
    cohen_score_31 = cohen_kappa_score(annotation_3,annotation_1)
    cohen_score_32 = cohen_kappa_score(annotation_3,annotation_2)
    cohen_score_34 = cohen_kappa_score(annotation_3,annotation_4)
    cohen_score_average_3 = round((cohen_score_31 + cohen_score_32 + cohen_score_34)/3,3)
    print("Average Cohen Kappa Score for annotation 87:",(cohen_score_average_3))
    return cohen_score_average_3

In [143]:
cohen_avg_3()

Average Cohen Kappa Score for annotation 87: 0.261


0.261

### Cohen_kappa average score of annotation 91

In [144]:
def cohen_avg_4():
    cohen_score_41 = cohen_kappa_score(annotation_4,annotation_1)
    cohen_score_42 = cohen_kappa_score(annotation_4,annotation_2)
    cohen_score_43 = cohen_kappa_score(annotation_4,annotation_3)
    cohen_score_average_4 = round((cohen_score_41 + cohen_score_42 + cohen_score_43)/3,3)
    print("Average Cohen Kappa Score for annotation 91:",(cohen_score_average_4))
    return cohen_score_average_4

In [145]:
cohen_avg_4()

Average Cohen Kappa Score for annotation 91: 0.328


0.328

##### If average cohen kappa score for any annotation is less than 0.2, we will drop it

In [146]:
def drop_annotate():
    average = [cohen_avg_1(), cohen_avg_2(),cohen_avg_3(),cohen_avg_4()]
    columns_name = ['annotation_85','annotation_86','annotation_87','annotation_91']
    #columns =[annotation_1,annotation_2,annotation_3,annotation_4]
    for i in range(0,len(average)):
        if average[i]< 0.2:
            print(average[i])
            df_9.drop(columns_name[i], axis=1, inplace = True)
    print(df_9.head())

In [147]:
drop_annotate()

Average Cohen Kappa Score for annotation 85: 0.218
Average Cohen Kappa Score for annotation 86: 0.192
Average Cohen Kappa Score for annotation 87: 0.261
Average Cohen Kappa Score for annotation 91: 0.328
0.192
                                                text  annotation_91  \
0  Bad news. Johnson &amp; Johnson pauses #CovidV...          False   
1  Saw this on TV and you women Need to work with...          False   
2  Two Indian vaccine candidates against COVID-19...          False   
3  The point is media &amp; govt lie abt the numb...          False   
4  California students will continue wearing mask...          False   

   annotation_85  annotation_87  
0          False          False  
1          False           True  
2          False          False  
3          False          False  
4           True          False  


## twitter_topic_3_masking_and_distancing.csv

In [148]:
df_10 = pd.read_csv(r'twitter_topic\twitter_topic_3_masking_and_distancing.csv')
df_10.head()

Unnamed: 0,text,annotation_91,annotation_85,annotation_86,annotation_87
0,Bad news. Johnson &amp; Johnson pauses #CovidV...,False,False,False,False
1,Saw this on TV and you women Need to work with...,True,True,True,True
2,Two Indian vaccine candidates against COVID-19...,False,False,False,False
3,The point is media &amp; govt lie abt the numb...,False,False,False,False
4,California students will continue wearing mask...,True,True,True,True


In [149]:
print(df_10['annotation_85'].isnull().sum())
print(df_10['annotation_86'].isnull().sum())
print(df_10['annotation_87'].isnull().sum())
print(df_10['annotation_91'].isnull().sum())
print(df_10['text'].isnull().sum())

0
0
0
0
0


In [150]:
annotation_1= df_10['annotation_85']
annotation_2= df_10['annotation_86']
annotation_3= df_10['annotation_87']
annotation_4= df_10['annotation_91']

### Cohen_kappa average score of annotation 85

In [152]:
def cohen_avg_1():
    cohen_score_12 = cohen_kappa_score(annotation_1,annotation_2)
    cohen_score_13 = cohen_kappa_score(annotation_1,annotation_3)
    cohen_score_14 = cohen_kappa_score(annotation_1,annotation_4)
    cohen_score_average_1 = round((cohen_score_12 + cohen_score_13 + cohen_score_14)/3,3)
    print("Average Cohen Kappa Score for annotation 85:",(cohen_score_average_1))
    return cohen_score_average_1

In [153]:
cohen_avg_1()

Average Cohen Kappa Score for annotation 85: 0.803


0.803

### Cohen_kappa average score of annotation 86

In [155]:
def cohen_avg_2():
    cohen_score_21 = cohen_kappa_score(annotation_2,annotation_1)
    cohen_score_23 = cohen_kappa_score(annotation_2,annotation_3)
    cohen_score_24 = cohen_kappa_score(annotation_2,annotation_4)
    cohen_score_average_2 = round((cohen_score_21 + cohen_score_23 + cohen_score_24)/3,3)
    print("Average Cohen Kappa Score for annotation 86:",(cohen_score_average_2))
    return cohen_score_average_2

In [156]:
cohen_avg_2()

Average Cohen Kappa Score for annotation 86: 0.856


0.856

### Cohen_kappa average score of annotation 87

In [158]:
def cohen_avg_3():
    cohen_score_31 = cohen_kappa_score(annotation_3,annotation_1)
    cohen_score_32 = cohen_kappa_score(annotation_3,annotation_2)
    cohen_score_34 = cohen_kappa_score(annotation_3,annotation_4)
    cohen_score_average_3 = round((cohen_score_31 + cohen_score_32 + cohen_score_34)/3,3)
    print("Average Cohen Kappa Score for annotation 87:",(cohen_score_average_3))
    return cohen_score_average_3

In [159]:
cohen_avg_3()

Average Cohen Kappa Score for annotation 87: 0.814


0.814

### Cohen_kappa average score of annotation 91

In [161]:
def cohen_avg_4():
    cohen_score_41 = cohen_kappa_score(annotation_4,annotation_1)
    cohen_score_42 = cohen_kappa_score(annotation_4,annotation_2)
    cohen_score_43 = cohen_kappa_score(annotation_4,annotation_3)
    cohen_score_average_4 = round((cohen_score_41 + cohen_score_42 + cohen_score_43)/3,3)
    print("Average Cohen Kappa Score for annotation 91:",(cohen_score_average_4))
    return cohen_score_average_4

In [162]:
cohen_avg_4()

Average Cohen Kappa Score for annotation 91: 0.832


0.832

##### If average cohen kappa score for any annotation is less than 0.2, we will drop it

In [164]:
def drop_annotate():
    average = [cohen_avg_1(), cohen_avg_2(),cohen_avg_3(),cohen_avg_4()]
    columns_name = ['annotation_85','annotation_86','annotation_87','annotation_91']
    #columns =[annotation_1,annotation_2,annotation_3,annotation_4]
    for i in range(0,len(average)):
        if average[i]< 0.2:
            print(average[i])
            df_10.drop(columns_name[i], axis=1, inplace = True)
    print(df_10.head())

In [165]:
drop_annotate()

Average Cohen Kappa Score for annotation 85: 0.803
Average Cohen Kappa Score for annotation 86: 0.856
Average Cohen Kappa Score for annotation 87: 0.814
Average Cohen Kappa Score for annotation 91: 0.832
                                                text  annotation_91  \
0  Bad news. Johnson &amp; Johnson pauses #CovidV...          False   
1  Saw this on TV and you women Need to work with...           True   
2  Two Indian vaccine candidates against COVID-19...          False   
3  The point is media &amp; govt lie abt the numb...          False   
4  California students will continue wearing mask...           True   

   annotation_85  annotation_86  annotation_87  
0          False          False          False  
1           True           True           True  
2          False          False          False  
3          False          False          False  
4           True           True           True  


## twitter_topic_3_vaccination.csv



In [166]:
df_11 = pd.read_csv(r'twitter_topic\twitter_topic_3_vaccination.csv')
df_11.head()

Unnamed: 0,text,annotation_91,annotation_85,annotation_86,annotation_87
0,Bad news. Johnson &amp; Johnson pauses #CovidV...,True,True,True,True
1,Saw this on TV and you women Need to work with...,False,False,False,False
2,Two Indian vaccine candidates against COVID-19...,True,True,True,True
3,The point is media &amp; govt lie abt the numb...,True,True,True,True
4,California students will continue wearing mask...,False,False,False,False


In [167]:
print(df_11['annotation_85'].isnull().sum())
print(df_11['annotation_86'].isnull().sum())
print(df_11['annotation_87'].isnull().sum())
print(df_11['annotation_91'].isnull().sum())
print(df_11['text'].isnull().sum())

0
0
0
0
0


In [168]:
annotation_1= df_11['annotation_85']
annotation_2= df_11['annotation_86']
annotation_3= df_11['annotation_87']
annotation_4= df_11['annotation_91']

### Cohen_kappa average score of annotation 85

In [170]:
def cohen_avg_1():
    cohen_score_12 = cohen_kappa_score(annotation_1,annotation_2)
    cohen_score_13 = cohen_kappa_score(annotation_1,annotation_3)
    cohen_score_14 = cohen_kappa_score(annotation_1,annotation_4)
    cohen_score_average_1 = round((cohen_score_12 + cohen_score_13 + cohen_score_14)/3,3)
    print("Average Cohen Kappa Score for annotation 85:",(cohen_score_average_1))
    return cohen_score_average_1

In [171]:
cohen_avg_1()

Average Cohen Kappa Score for annotation 85: 0.658


0.658

### Cohen_kappa average score of annotation 86

In [173]:
def cohen_avg_2():
    cohen_score_21 = cohen_kappa_score(annotation_2,annotation_1)
    cohen_score_23 = cohen_kappa_score(annotation_2,annotation_3)
    cohen_score_24 = cohen_kappa_score(annotation_2,annotation_4)
    cohen_score_average_2 = round((cohen_score_21 + cohen_score_23 + cohen_score_24)/3,3)
    print("Average Cohen Kappa Score for annotation 86:",(cohen_score_average_2))
    return cohen_score_average_2

In [174]:
cohen_avg_2()

Average Cohen Kappa Score for annotation 86: 0.621


0.621

### Cohen_kappa average score of annotation 87

In [176]:
def cohen_avg_3():
    cohen_score_31 = cohen_kappa_score(annotation_3,annotation_1)
    cohen_score_32 = cohen_kappa_score(annotation_3,annotation_2)
    cohen_score_34 = cohen_kappa_score(annotation_3,annotation_4)
    cohen_score_average_3 = round((cohen_score_31 + cohen_score_32 + cohen_score_34)/3,3)
    print("Average Cohen Kappa Score for annotation 87:",(cohen_score_average_3))
    return cohen_score_average_3

In [177]:
cohen_avg_3()

Average Cohen Kappa Score for annotation 87: 0.636


0.636

### Cohen_kappa average score of annotation 91

In [179]:
def cohen_avg_4():
    cohen_score_41 = cohen_kappa_score(annotation_4,annotation_1)
    cohen_score_42 = cohen_kappa_score(annotation_4,annotation_2)
    cohen_score_43 = cohen_kappa_score(annotation_4,annotation_3)
    cohen_score_average_4 = round((cohen_score_41 + cohen_score_42 + cohen_score_43)/3,3)
    print("Average Cohen Kappa Score for annotation 91:",(cohen_score_average_4))
    return cohen_score_average_4

In [180]:
cohen_avg_4()

Average Cohen Kappa Score for annotation 91: 0.668


0.668

##### If average cohen kappa score for any annotation is less than 0.2, we will drop it

In [182]:
def drop_annotate():
    average = [cohen_avg_1(), cohen_avg_2(),cohen_avg_3(),cohen_avg_4()]
    columns_name = ['annotation_85','annotation_86','annotation_87','annotation_91']
    #columns =[annotation_1,annotation_2,annotation_3,annotation_4]
    for i in range(0,len(average)):
        if average[i]< 0.2:
            print(average[i])
            df_11.drop(columns_name[i], axis=1, inplace = True)
    print(df_11.head())

In [183]:
drop_annotate()

Average Cohen Kappa Score for annotation 85: 0.658
Average Cohen Kappa Score for annotation 86: 0.621
Average Cohen Kappa Score for annotation 87: 0.636
Average Cohen Kappa Score for annotation 91: 0.668
                                                text  annotation_91  \
0  Bad news. Johnson &amp; Johnson pauses #CovidV...           True   
1  Saw this on TV and you women Need to work with...          False   
2  Two Indian vaccine candidates against COVID-19...           True   
3  The point is media &amp; govt lie abt the numb...           True   
4  California students will continue wearing mask...          False   

   annotation_85  annotation_86  annotation_87  
0           True           True           True  
1          False          False          False  
2           True           True           True  
3           True           True           True  
4          False          False          False  
