## Load data

In [1]:
import pandas as pd

In [2]:
# load dataset
dataset = pd.read_csv('data/dataset_eng.csv')
dataset

Unnamed: 0,conversation_id,text,sentiment,label
0,0000604306a283600b730276a2039471,a9b326df4e6da61c5b6f5e1058be83a2: b8810fee2f4a...,Negative,0
1,0001347c00d419eb537c0692e6e58eba,e2bd430b29412d9267886e187ba28075: say asl and ...,Positive,0
2,000197b21283dc47810760e499d1f8ec,487862cd4ec27d841e2d2e80e8d91955: joint 5c7c53...,Negative,0
3,0002de15312dc33d78b6e9e4b5f61f1f,a1a8f84c419e34a1a72625e2ef245516: hi a1a8f84c4...,Negative,0
4,0002ee38ac5e78e7edbc4d4a556ec4b7,8150320816528784d7dfe286d781de4c: hey :) male ...,Negative,0
...,...,...,...,...
160768,fffde018f39dafd4c8ef4ebaaadbec97,0a39f78bcb297ab0ebe8a29c28bfed89: bugmail: [bu...,Negative,0
160769,fffe4d1b08952afb8627a9b594f913c7,e5a96ed432ed5041be76d3fb1784fb95: do you want ...,Negative,0
160770,ffff2d0e314610b1df596482d806ada9,eccc65c89e622a83cfec5827c16391de: haiiiiiiiii....,Negative,0
160771,ffff74f40b58182a2521235b9db901d4,7bc167d759d9c56d43d1d46575433d35: hey 169b2106...,Positive,0


In [3]:
print('Grooming_conversations', dataset[dataset['label']==1].shape[0])

Grooming_conversations 5753


## Baseline SVM

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [5]:
# split into training and testing dataset
train, test = train_test_split(dataset, test_size=0.3, random_state=42)

In [6]:
# define X and y
X_train = train['text']
y_train = train['label']

X_test = test['text']
y_test = test['label']

In [7]:
# import for vectorization
from sklearn.feature_extraction.text import TfidfVectorizer

In [8]:
# vectorize
vectorizer = TfidfVectorizer()
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)

In [9]:
# define SVM
svm = SVC(kernel='linear')
svm.fit(X_train_vect, y_train)

SVC(kernel='linear')

In [10]:
# predict 
y_pred = svm.predict(X_test_vect)

In [11]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     46544
           1       0.90      0.97      0.94      1688

    accuracy                           1.00     48232
   macro avg       0.95      0.98      0.97     48232
weighted avg       1.00      1.00      1.00     48232



In [12]:
# identify misclassified instances
misclassified = X_test[y_pred != y_test]

# add the true and predicted labels
truelabels = y_test[y_pred != y_test]
predictedlabels = y_pred[y_pred != y_test]
print(misclassified)
misclassified_base = pd.DataFrame(misclassified)
misclassified_base['True label'] = truelabels
misclassified_base['Predicted label'] = predictedlabels

94918     868885a424ff7e7a90251a07099f7150: hii 868885a4...
44073     8bb86493a2f4ac9cd6d2c19062865829: hi 520c8494c...
137179    826e428ecb6c7c2a2695f0cb1b711756: hi b950e1248...
155096    a6d5de9b5e00b181fc3be41fcd94953b: you there i ...
68283     e58d1ff65d38573b2e8d46d6880e7e16: hi 0428e454f...
                                ...                        
35221     7aca8e54bb10bb50065a9ff263288054: hi' 4d63e775...
104657    9377eedaee855e765dc252c00a1e0802: hello 313835...
9660      120d2a9ae0d4c82283bc605fc35cdf67: oyoooooooooo...
3826      05859fed5e719b252e718eacd08d8821: hi 1649b06bc...
143829    e159cd65554bbed87970a2685f194940: hello 5281d6...
Name: text, Length: 221, dtype: object


## Sentiments

In [13]:
# split into three datasets
# non_grooming
df_rest = dataset[dataset['label'] == 0]
df_grooming = dataset[dataset['label'] == 1]
# positive
df_positive = df_grooming[df_grooming['sentiment'] == 'Positive']
# negative 
df_negative = df_grooming[df_grooming['sentiment'] == 'Negative']

### Positive Tested

In [14]:
# split non-grooming into train and test
df_rest_train, df_rest_test = train_test_split(df_rest, test_size=0.3, random_state=42)
# split grooming pos into train and test
df_pos_train, df_pos_test = train_test_split(df_positive, test_size=0.3, random_state=42)
# split grooming pos into train and test
df_neg_train, df_neg_test = train_test_split(df_negative, test_size=0.3, random_state=42)

In [15]:
# train data is non-grooming and positive sentiment and negative sentiment
train_data = pd.concat([df_pos_train, df_neg_train, df_rest_train], axis=0)
# test data is non-grooming and positive sentiment
test_data = pd.concat([df_pos_test, df_rest_test], axis=0)

In [16]:
# define X and y
X_train = train_data['text']
y_train = train_data['label']

X_test = test_data['text']
y_test = test_data['label']

In [17]:
# vectorize
vectorizer = TfidfVectorizer()
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)

In [18]:
# define SVM
svm = SVC(kernel='linear') 
svm.fit(X_train_vect, y_train)

SVC(kernel='linear')

In [19]:
# predict
y_pred = svm.predict(X_test_vect)

In [20]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     46506
           1       0.83      0.97      0.89       809

    accuracy                           1.00     47315
   macro avg       0.91      0.98      0.95     47315
weighted avg       1.00      1.00      1.00     47315



In [21]:
# identify misclassified instances
misclassified= X_test[y_pred != y_test]

# add the true and predicted labels
truelabels = y_test[y_pred != y_test]
predictedlabels = y_pred[y_pred != y_test]
misclassified_ptest = pd.DataFrame(misclassified)
misclassified_ptest['True label'] = truelabels
misclassified_ptest['Predicted label'] = predictedlabels

### Negative Tested

In [22]:
# test data is non-grooming and negative sentiment
test_data = pd.concat([df_neg_test, df_rest_test], axis=0)
# define X and y
X_test = test_data['text']
y_test = test_data['label']
# vectorize
vectorizer = TfidfVectorizer()
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)
# predict
y_pred = svm.predict(X_test_vect)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00     46506
           1       0.84      0.96      0.90       918

    accuracy                           1.00     47424
   macro avg       0.92      0.98      0.95     47424
weighted avg       1.00      1.00      1.00     47424



In [23]:
# identify misclassified instances
misclassified= X_test[y_pred != y_test]

# add the true and predicted labels
truelabels = y_test[y_pred != y_test]
predictedlabels = y_pred[y_pred != y_test]
misclassified_ntest = pd.DataFrame(misclassified)
misclassified_ntest['True label'] = truelabels
misclassified_ntest['Predicted label'] = predictedlabels

### Positive Trained

In [24]:
# train data is non-grooming and positive sentiment
train_data = pd.concat([df_positive, df_rest_train], axis=0)
# test data is non-grooming and negative sentiment
test_data = pd.concat([df_negative, df_rest_test], axis=0)

In [25]:
# define X and y
X_train = train_data['text']
y_train = train_data['label']

X_test = test_data['text']
y_test = test_data['label']

In [26]:
# vectorize
vectorizer = TfidfVectorizer()
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)

In [27]:
# define SVM
svm = SVC(kernel='linear') 
svm.fit(X_train_vect, y_train)

SVC(kernel='linear')

In [28]:
# predict
y_pred = svm.predict(X_test_vect)

In [29]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      1.00      1.00     46506
           1       0.97      0.89      0.93      3058

    accuracy                           0.99     49564
   macro avg       0.98      0.94      0.96     49564
weighted avg       0.99      0.99      0.99     49564



In [30]:
# identify misclassified instances
misclassified= X_test[y_pred != y_test]

# add the true and predicted labels
truelabels = y_test[y_pred != y_test]
predictedlabels = y_pred[y_pred != y_test]
print(misclassified)
misclassified_pt = pd.DataFrame(misclassified)
misclassified_pt['True label'] = truelabels
misclassified_pt['Predicted label'] = predictedlabels

1056     426d0b70843d16c615f6e754c5b718d1: hi 36b5f84e4...
1445     fb1d96b82911c7e1561bcd24eedd8bda: hi fb1d96b82...
1677     95e6690e70956f690f4dd7faa80d1054: :) d9a3b807f...
2389     fe784e376f0fec7691b114f16d7f953e: if u wouldnt...
2767     4982b68761043e693da736df5852a7c5: do u know ur...
                               ...                        
72283    220840d2c4fda35d80b9e3855263d7b9: no my parent...
94384    0f0c160cf83143bfb4969214a729543d: hi 8a6e288a0...
46270    8c4078d55ba07096949e82f0993a423b: i cant meet ...
56665    0d2a55796774a59c0af915376f6ca962: hi 15c217074...
26830    2df46ad50ba97fbc91c9517e4b877364: hi 2df46ad50...
Name: text, Length: 421, dtype: object


### Negative Trained

In [31]:
# train data is non-grooming and negative sentiment
train_data = pd.concat([df_negative, df_rest_train], axis=0)
# test data is non-grooming and positive sentiment
test_data = pd.concat([df_positive, df_rest_test], axis=0)

In [32]:
# define X and y
X_train = train_data['text']
y_train = train_data['label']

X_test = test_data['text']
y_test = test_data['label']

In [33]:
# vectorize
vectorizer = TfidfVectorizer()
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)

In [34]:
# define SVM
svm = SVC(kernel='linear') 
svm.fit(X_train_vect, y_train)

SVC(kernel='linear')

In [35]:
# predict
y_pred = svm.predict(X_test_vect)

In [36]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     46506
           1       0.95      0.93      0.94      2695

    accuracy                           0.99     49201
   macro avg       0.97      0.96      0.97     49201
weighted avg       0.99      0.99      0.99     49201



In [37]:
# identify misclassified instances
misclassified= X_test[y_pred != y_test]

# add the true and predicted labels 
truelabels = y_test[y_pred != y_test]
predictedlabels = y_pred[y_pred != y_test]
print(misclassified)
misclassified_nt = pd.DataFrame(misclassified)
misclassified_nt['True label'] = truelabels
misclassified_nt['Predicted label'] = predictedlabels

1240       398cab8240d8a5a9f5f201115c0337c6: hey there baby
2177      398cab8240d8a5a9f5f201115c0337c6: hey there 8f...
2954      ea83f356e7a3329c1f9982649127f07b: some guy was...
4257      5c91acf8c2808994d9681a0ee5d28ea3: where did yo...
5166      b8931a8b614fb54f6051ffc75f39db29: heather, are...
                                ...                        
72283     220840d2c4fda35d80b9e3855263d7b9: no my parent...
105249    7ca517cbd80d30aaf838cdb7f0b6e416: hey 7620a4f7...
147081    dea3f8b63c53a9ad473ca562d70b7c2c: hi dd951f65f...
94384     0f0c160cf83143bfb4969214a729543d: hi 8a6e288a0...
56665     0d2a55796774a59c0af915376f6ca962: hi 15c217074...
Name: text, Length: 343, dtype: object


## Positive Trained and Tested

In [38]:
# split into train and test
df_pos_train, df_pos_test = train_test_split(df_positive, test_size=0.3, random_state=42)
# train data is non-grooming and positive sentiment
train_data = pd.concat([df_pos_train, df_rest_train], axis=0)
# test data is non-grooming and positive sentiment
test_data = pd.concat([df_pos_test, df_rest_test], axis=0)

In [39]:
# define X and y
X_train = train_data['text']
y_train = train_data['label']

X_test = test_data['text']
y_test = test_data['label']

In [40]:
# vectorize
vectorizer = TfidfVectorizer()
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)

In [41]:
# define SVM
svm = SVC(kernel='linear') 
svm.fit(X_train_vect, y_train)

SVC(kernel='linear')

In [42]:
# predict
y_pred = svm.predict(X_test_vect)

In [43]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     46506
           1       0.95      0.90      0.92       809

    accuracy                           1.00     47315
   macro avg       0.97      0.95      0.96     47315
weighted avg       1.00      1.00      1.00     47315



In [44]:
# identify misclassified instances
misclassified= X_test[y_pred != y_test]

# add the true and predicted labels
truelabels = y_test[y_pred != y_test]
predictedlabels = y_pred[y_pred != y_test]
print(misclassified)
misclassified_ptt = pd.DataFrame(misclassified)
misclassified_ptt['True label'] = truelabels
misclassified_ptt['Predicted label'] = predictedlabels

87938     5a41bf6d7766977c25b0b6a97e4e1d58: hey baby i m...
68786     201a9319a4df100cb91e81644345f3b2: yeah, im on ...
132722            dacf132a918dc8a6ad5206a92e262ea4: you on?
71705     c420f4c2451ed50149332783dd90db59: hey let me k...
116780            ad3403c013a364bbde185b702aa5735d: u there
                                ...                        
46532     20f1b3a80d54187b48a7f75a798070ce: i love you y...
141811    eb6309bf2390a9c855eb32497b7fd6dd: hey 3989cf31...
71501     1cbe227ab5505cdcc648cf79be55838a: hiii..! f938...
94384     0f0c160cf83143bfb4969214a729543d: hi 8a6e288a0...
26830     2df46ad50ba97fbc91c9517e4b877364: hi 2df46ad50...
Name: text, Length: 119, dtype: object


### Negative Trained and Tested

In [45]:
# split into train and test
df_neg_train, df_neg_test = train_test_split(df_negative, test_size=0.3, random_state=42)
# train data is non-grooming and negative sentiment
train_data = pd.concat([df_neg_train, df_rest_train], axis=0)
# test data is non-grooming and negative sentiment
test_data = pd.concat([df_neg_test, df_rest_test], axis=0)

In [46]:
# define X and y
X_train = train_data['text']
y_train = train_data['label']

X_test = test_data['text']
y_test = test_data['label']

In [47]:
# vectorize
vectorizer = TfidfVectorizer()
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)

In [48]:
# define SVM
svm = SVC(kernel='linear') 
svm.fit(X_train_vect, y_train)

SVC(kernel='linear')

In [49]:
# predict
y_pred = svm.predict(X_test_vect)

In [50]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     46506
           1       0.88      0.92      0.90       918

    accuracy                           1.00     47424
   macro avg       0.94      0.96      0.95     47424
weighted avg       1.00      1.00      1.00     47424



In [51]:
# identify misclassified instances
misclassified= X_test[y_pred != y_test]

# add the true and predicted labels
truelabels = y_test[y_pred != y_test]
predictedlabels = y_pred[y_pred != y_test]
print(misclassified)
misclassified_ntt = pd.DataFrame(misclassified)
misclassified_ntt['True label'] = truelabels
misclassified_ntt['Predicted label'] = predictedlabels

93535     2a1ac47332661b61d943d3a4e08dda5a: hey whats up...
149385    5a91e880aa9a503098926794b7101a53: whats up 404...
26697     eb38e8279981c9f04dd6641cfdcf7200: no........i ...
79671     993ecd27ac2d977fe189d4456ffcae37: hey mo dd1b6...
131111    f7ba507db5b5b1150eabf5707f0334dd: u there yet?...
                                ...                        
8892      7dfb35eb7e30f2ac72e2808256087e08: heyy 8f01351...
118693    35bed363ff480c9c085919cbf494e15f: hey fb15b78d...
72283     220840d2c4fda35d80b9e3855263d7b9: no my parent...
94384     0f0c160cf83143bfb4969214a729543d: hi 8a6e288a0...
56665     0d2a55796774a59c0af915376f6ca962: hi 15c217074...
Name: text, Length: 191, dtype: object


## Analyzing false predictions

### Baseline

In [52]:
fp_base = misclassified_base[misclassified_base['True label'] == 0]
fn_base = misclassified_base[misclassified_base['True label'] == 1]

In [53]:
sentiments = dataset.loc[fp_base.index.to_list(), 'sentiment']
sentiments = pd.DataFrame(sentiments)
print(f"False Positives labeled as 'Positive': {sentiments[sentiments['sentiment'] == 'Positive'].shape[0]}")
print(f"False Positives labeled as 'Negative': {sentiments[sentiments['sentiment'] == 'Negative'].shape[0]}")
sentiments = dataset.loc[fn_base.index.to_list(), 'sentiment']
sentiments = pd.DataFrame(sentiments)
print(f"False Negatives labeled as 'Positive': {sentiments[sentiments['sentiment'] == 'Positive'].shape[0]}")
print(f"False Negatives labeled as 'Negative': {sentiments[sentiments['sentiment'] == 'Negative'].shape[0]}")

False Positives labeled as 'Positive': 40
False Positives labeled as 'Negative': 134
False Negatives labeled as 'Positive': 18
False Negatives labeled as 'Negative': 29


### Positive Trained

In [54]:
fp_pt = misclassified_pt[misclassified_pt['True label'] == 0]
fn_pt = misclassified_pt[misclassified_pt['True label'] == 1]
sentiments = dataset.loc[fp_pt.index.to_list(), 'sentiment']
sentiments = pd.DataFrame(sentiments)
print(f"False Positives labeled as 'Positive': {sentiments[sentiments['sentiment'] == 'Positive'].shape[0]}")
print(f"False Positives labeled as 'Negative': {sentiments[sentiments['sentiment'] == 'Negative'].shape[0]}")
sentiments = dataset.loc[fn_pt.index.to_list(), 'sentiment']
sentiments = pd.DataFrame(sentiments)
print(f"False Negatives labeled as 'Positive': {sentiments[sentiments['sentiment'] == 'Positive'].shape[0]}")
print(f"False Negatives labeled as 'Negative': {sentiments[sentiments['sentiment'] == 'Negative'].shape[0]}")

False Positives labeled as 'Positive': 29
False Positives labeled as 'Negative': 54
False Negatives labeled as 'Positive': 0
False Negatives labeled as 'Negative': 338


### Negative Trained

In [55]:
fp_nt = misclassified_nt[misclassified_nt['True label'] == 0]
fn_nt = misclassified_nt[misclassified_nt['True label'] == 1]
sentiments = dataset.loc[fp_nt.index.to_list(), 'sentiment']
sentiments = pd.DataFrame(sentiments)
print(f"False Positives labeled as 'Positive': {sentiments[sentiments['sentiment'] == 'Positive'].shape[0]}")
print(f"False Positives labeled as 'Negative': {sentiments[sentiments['sentiment'] == 'Negative'].shape[0]}")
sentiments = dataset.loc[fn_nt.index.to_list(), 'sentiment']
sentiments = pd.DataFrame(sentiments)
print(f"False Negatives labeled as 'Positive': {sentiments[sentiments['sentiment'] == 'Positive'].shape[0]}")
print(f"False Negatives labeled as 'Negative': {sentiments[sentiments['sentiment'] == 'Negative'].shape[0]}")

False Positives labeled as 'Positive': 14
False Positives labeled as 'Negative': 127
False Negatives labeled as 'Positive': 202
False Negatives labeled as 'Negative': 0


### Positive Trained and Tested

In [56]:
fp_ptt = misclassified_ptt[misclassified_ptt['True label'] == 0]
fn_ptt = misclassified_ptt[misclassified_ptt['True label'] == 1]
sentiments = dataset.loc[fp_ptt.index.to_list(), 'sentiment']
sentiments = pd.DataFrame(sentiments)
print(f"False Positives labeled as 'Positive': {sentiments[sentiments['sentiment'] == 'Positive'].shape[0]}")
print(f"False Positives labeled as 'Negative': {sentiments[sentiments['sentiment'] == 'Negative'].shape[0]}")
sentiments = dataset.loc[fn_ptt.index.to_list(), 'sentiment']
sentiments = pd.DataFrame(sentiments)
print(f"False Negatives labeled as 'Positive': {sentiments[sentiments['sentiment'] == 'Positive'].shape[0]}")
print(f"False Negatives labeled as 'Negative': {sentiments[sentiments['sentiment'] == 'Negative'].shape[0]}")

False Positives labeled as 'Positive': 17
False Positives labeled as 'Negative': 20
False Negatives labeled as 'Positive': 82
False Negatives labeled as 'Negative': 0


### Negative Trained and Tested

In [57]:
fp_ntt = misclassified_ntt[misclassified_ntt['True label'] == 0]
fn_ntt = misclassified_ntt[misclassified_ntt['True label'] == 1]
sentiments = dataset.loc[fp_ntt.index.to_list(), 'sentiment']
sentiments = pd.DataFrame(sentiments)
print(f"False Positives labeled as 'Positive': {sentiments[sentiments['sentiment'] == 'Positive'].shape[0]}")
print(f"False Positives labeled as 'Negative': {sentiments[sentiments['sentiment'] == 'Negative'].shape[0]}")
sentiments = dataset.loc[fn_ntt.index.to_list(), 'sentiment']
sentiments = pd.DataFrame(sentiments)
print(f"False Negatives labeled as 'Positive': {sentiments[sentiments['sentiment'] == 'Positive'].shape[0]}")
print(f"False Negatives labeled as 'Negative': {sentiments[sentiments['sentiment'] == 'Negative'].shape[0]}")

False Positives labeled as 'Positive': 11
False Positives labeled as 'Negative': 103
False Negatives labeled as 'Positive': 0
False Negatives labeled as 'Negative': 77


### Positive Tested

In [58]:
fp_ptest = misclassified_ptest[misclassified_ptest['True label'] == 0]
fn_ptest = misclassified_ptest[misclassified_ptest['True label'] == 1]
sentiments = dataset.loc[fp_ptest.index.to_list(), 'sentiment']
sentiments = pd.DataFrame(sentiments)
print(f"False Positives labeled as 'Positive': {sentiments[sentiments['sentiment'] == 'Positive'].shape[0]}")
print(f"False Positives labeled as 'Negative': {sentiments[sentiments['sentiment'] == 'Negative'].shape[0]}")
sentiments = dataset.loc[fn_ptest.index.to_list(), 'sentiment']
sentiments = pd.DataFrame(sentiments)
print(f"False Negatives labeled as 'Positive': {sentiments[sentiments['sentiment'] == 'Positive'].shape[0]}")
print(f"False Negatives labeled as 'Negative': {sentiments[sentiments['sentiment'] == 'Negative'].shape[0]}")

False Positives labeled as 'Positive': 29
False Positives labeled as 'Negative': 134
False Negatives labeled as 'Positive': 26
False Negatives labeled as 'Negative': 0


### Negative Tested

In [59]:
fp_ntest = misclassified_ntest[misclassified_ntest['True label'] == 0]
fn_ntest = misclassified_ntest[misclassified_ntest['True label'] == 1]
sentiments = dataset.loc[fp_ntest.index.to_list(), 'sentiment']
sentiments = pd.DataFrame(sentiments)
print(f"False Positives labeled as 'Positive': {sentiments[sentiments['sentiment'] == 'Positive'].shape[0]}")
print(f"False Positives labeled as 'Negative': {sentiments[sentiments['sentiment'] == 'Negative'].shape[0]}")
sentiments = dataset.loc[fn_ntest.index.to_list(), 'sentiment']
sentiments = pd.DataFrame(sentiments)
print(f"False Negatives labeled as 'Positive': {sentiments[sentiments['sentiment'] == 'Positive'].shape[0]}")
print(f"False Negatives labeled as 'Negative': {sentiments[sentiments['sentiment'] == 'Negative'].shape[0]}")

False Positives labeled as 'Positive': 29
False Positives labeled as 'Negative': 134
False Negatives labeled as 'Positive': 0
False Negatives labeled as 'Negative': 36


### Non-grooming

In [61]:
print(f"Non-grooming Positive Tone: {df_rest[df_rest['sentiment']== 'Positive'].shape[0]}")
print(f"Non-grooming Negative Tone: {df_rest[df_rest['sentiment']== 'Negative'].shape[0]}")
print(f"Percentage Positive Tone: {df_rest[df_rest['sentiment']== 'Positive'].shape[0]/df_rest.shape[0]}")
print(f"Percentage Negative Tone: {df_rest[df_rest['sentiment']== 'Negative'].shape[0]/df_rest.shape[0]}")

Non-grooming Positive Tone: 40081
Non-grooming Negative Tone: 114939
Percentage Positive Tone: 0.25855373500193524
Percentage Negative Tone: 0.7414462649980648
