In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

### Merging all comments together

In [2]:
# train and test data
en_comments_train = pd.read_csv('en_comments_lexicon_tags_train.csv')
tl_comments_train = pd.read_csv('tl_comments_lexicon_tags_train.csv')
tlen_comments_train = pd.read_csv('tlen_comments_lexicon_tags_train.csv')

en_comments_test = pd.read_csv('en_comments_lexicon_tags_test.csv')
tl_comments_test = pd.read_csv('tl_comments_lexicon_tags_test.csv')
tlen_comments_test = pd.read_csv('tlen_comments_lexicon_tags_test.csv')

In [3]:
full_train = pd.concat([en_comments_train, tl_comments_train, tlen_comments_train])
full_train

Unnamed: 0,Comment,Language,Sentiment,Lexicon Sentiment
0,curve grade hehehehe,en,1,1
1,master lesson perfectly,en,1,1
2,terror but teach well,en,1,1
3,unenthusiastic barely understand teach,en,-1,-1
4,not applicable,en,0,-1
...,...,...,...,...
323,malimit siyang magtanong bagsak recitation,tlen,-1,-1
324,tas quiz multiple choice kaso wala projector i...,tlen,-1,-1
325,lagi sineset mood classroom ambiance,tlen,1,0
326,malabo magbigay requirements,tlen,-1,1


In [4]:
full_test = pd.concat([en_comments_test, tl_comments_test, tlen_comments_test])
full_test

Unnamed: 0,Comment,Language,Sentiment,Lexicon Sentiment
0,say cod fish he whale stay alive escape youre ...,en,1,1
1,punctual professor give importance reward stud...,en,1,1
2,joke funny not,en,0,-1
3,follow syllabus time,en,1,1
4,absent,en,-1,0
...,...,...,...,...
23,labo magturo hirap magpatest,tlen,-1,-1
24,magaling magturo mahirap magpaexam ngunit masa...,tlen,1,1
25,nagbibigay chance makabawi students,tlen,1,1
26,malakas magbigay incentives,tlen,1,1


In [5]:
full_train_test = pd.concat([full_train, full_test])
full_train_test

Unnamed: 0,Comment,Language,Sentiment,Lexicon Sentiment
0,curve grade hehehehe,en,1,1
1,master lesson perfectly,en,1,1
2,terror but teach well,en,1,1
3,unenthusiastic barely understand teach,en,-1,-1
4,not applicable,en,0,-1
...,...,...,...,...
23,labo magturo hirap magpatest,tlen,-1,-1
24,magaling magturo mahirap magpaexam ngunit masa...,tlen,1,1
25,nagbibigay chance makabawi students,tlen,1,1
26,malakas magbigay incentives,tlen,1,1


In [6]:
accuracy_lexicon = accuracy_score(full_train_test['Sentiment'], full_train_test['Lexicon Sentiment'])
correct_lexicon = accuracy_score(full_train_test['Sentiment'], full_train_test['Lexicon Sentiment'], normalize=False)

print('Lexicon Sentiments')
print('Accuracy Score: ', accuracy_lexicon)
print('Count of Correctly Classified Comments: ', correct_lexicon, '/ 739')

Lexicon Sentiments
Accuracy Score:  0.5548037889039242
Count of Correctly Classified Comments:  410 / 739


In [7]:
print(classification_report(full_train_test['Sentiment'], full_train_test['Lexicon Sentiment']))

              precision    recall  f1-score   support

          -1       0.65      0.45      0.53       353
           0       0.14      0.36      0.20        45
           1       0.61      0.69      0.65       341

    accuracy                           0.55       739
   macro avg       0.47      0.50      0.46       739
weighted avg       0.60      0.55      0.57       739



### Vectorize Comments

In [8]:
# vectorize as train and test concat
vectorizer = CountVectorizer()
matrix = vectorizer.fit_transform(full_train_test.Comment)
train_test_matrix = pd.DataFrame(matrix.toarray(),
                  index=full_train_test.index,
                  columns=vectorizer.get_feature_names())
train_test_matrix

Unnamed: 0,aaralin,able,absence,absent,accept,accommodate,accurate,act,activities,activity,...,yak,yan,yay,year,yet,yosi,youre,youtube,yung,âhugotsâ
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
24,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
25,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
26,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
# appending lexicon sentiment 
train_test_matrix = pd.concat([train_test_matrix, full_train_test['Lexicon Sentiment']], axis=1)
train_test_matrix

Unnamed: 0,aaralin,able,absence,absent,accept,accommodate,accurate,act,activities,activity,...,yan,yay,year,yet,yosi,youre,youtube,yung,âhugotsâ,Lexicon Sentiment
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-1
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-1
24,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
25,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
26,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


### Naive Bayes

In [10]:
x_train = train_test_matrix.iloc[:-74,:-1]
x_train

Unnamed: 0,aaralin,able,absence,absent,accept,accommodate,accurate,act,activities,activity,...,yak,yan,yay,year,yet,yosi,youre,youtube,yung,âhugotsâ
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
323,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
324,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
325,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
326,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
y_train = train_test_matrix.iloc[:-74,-1]
y_train

0      1
1      1
2      1
3     -1
4     -1
      ..
323   -1
324   -1
325    0
326    1
327   -1
Name: Lexicon Sentiment, Length: 665, dtype: int64

In [12]:
nb_clf = MultinomialNB()
nb_clf.fit(x_train,y_train)

MultinomialNB()

### SVM

In [13]:
# WITH PARAM TUNING
svm_clf = SVC(C=100, gamma=0.001, kernel='rbf')
svm_clf_proba = SVC(probability=True, C=100, gamma=0.001, kernel='rbf')
svm_clf.fit(x_train,y_train)
svm_clf_proba.fit(x_train,y_train)

SVC(C=100, gamma=0.001, probability=True)

## Testing 

In [14]:
x_test = train_test_matrix.iloc[-74:,:-1]
x_test

Unnamed: 0,aaralin,able,absence,absent,accept,accommodate,accurate,act,activities,activity,...,yak,yan,yay,year,yet,yosi,youre,youtube,yung,âhugotsâ
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
24,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
25,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
26,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
# manual annotations
y_test = full_train_test.iloc[-74:,-2]
y_test

0     1
1     1
2     0
3     1
4    -1
     ..
23   -1
24    1
25    1
26    1
27    1
Name: Sentiment, Length: 74, dtype: int64

In [16]:
# prediction under Naive Bayes
y_pred_nb = nb_clf.predict(x_test)
accuracy_nb = accuracy_score(y_test, y_pred_nb)
accuracy_nb # accuracy

0.5945945945945946

In [17]:
y_pred_nb # preditions

array([ 0,  1, -1,  1, -1, -1, -1,  1,  1,  1,  1,  1,  0,  1, -1,  1,  1,
       -1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1, -1,  1,  1, -1,
       -1, -1,  1,  1,  1,  1,  1,  1, -1, -1,  1, -1,  1,  1, -1,  1,  1,
        1,  1,  1,  1,  0, -1, -1, -1,  0,  1,  1,  1,  1,  1,  1,  1,  1,
        1, -1,  1, -1,  1,  1], dtype=int64)

In [18]:
# probabilities for each class (Naive Bayes)
y_pred_proba_nb = nb_clf.predict_proba(x_test)
y_pred_proba_nb

array([[6.38764418e-03, 9.85852870e-01, 7.75948604e-03],
       [3.46236948e-01, 1.25656079e-01, 5.28106972e-01],
       [6.48780883e-01, 9.69884004e-03, 3.41520277e-01],
       [2.13538028e-01, 6.38450123e-02, 7.22616960e-01],
       [4.97286386e-01, 1.22779048e-01, 3.79934566e-01],
       [5.63300706e-01, 1.41632724e-03, 4.35282966e-01],
       [4.97286386e-01, 1.22779048e-01, 3.79934566e-01],
       [1.72099807e-01, 8.53698415e-02, 7.42530352e-01],
       [9.95011598e-03, 1.43787729e-03, 9.88612007e-01],
       [2.60019859e-01, 4.01181519e-02, 6.99861989e-01],
       [5.68737787e-02, 2.05468747e-02, 9.22579347e-01],
       [3.41675054e-01, 6.88891555e-03, 6.51436030e-01],
       [2.38230829e-01, 4.70549852e-01, 2.91219319e-01],
       [1.71607898e-01, 1.70251662e-01, 6.58140440e-01],
       [8.88684734e-01, 1.10710351e-02, 1.00244231e-01],
       [2.14938597e-01, 2.70213925e-02, 7.58040010e-01],
       [4.30049342e-02, 5.87944123e-03, 9.51115625e-01],
       [4.68506534e-01, 3.03654

In [19]:
# prediction under SVM
y_pred_svm = svm_clf.predict(x_test)
accuracy_svm = accuracy_score(y_test, y_pred_svm)
accuracy_svm # accuracy

0.5675675675675675

In [20]:
y_pred_svm

array([ 1,  1, -1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1, -1,  1, -1,  1,  1,  1,  1,  1,  1,
       -1,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1, -1,  1,  1,
        1,  1,  1,  1,  1, -1, -1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,
        1,  1,  1,  1,  1,  1], dtype=int64)

In [21]:
# probabilities for each class (SVM)
y_pred_proba_svm = svm_clf_proba.predict_proba(x_test)
y_pred_proba_svm

array([[0.09334332, 0.07854795, 0.82810874],
       [0.16609109, 0.08408889, 0.74982002],
       [0.83022443, 0.0851832 , 0.08459237],
       [0.24934978, 0.19385314, 0.55679709],
       [0.43749953, 0.21080144, 0.35169904],
       [0.8132116 , 0.04882203, 0.13796637],
       [0.43749953, 0.21080144, 0.35169904],
       [0.22004094, 0.15560005, 0.62435901],
       [0.08009445, 0.07509542, 0.84481013],
       [0.25022937, 0.06602745, 0.68374318],
       [0.23466806, 0.08285118, 0.68248076],
       [0.30342692, 0.03721936, 0.65935372],
       [0.20526082, 0.44992095, 0.34481823],
       [0.21581465, 0.21711568, 0.56706967],
       [0.76821549, 0.0635143 , 0.16827021],
       [0.28065987, 0.07923818, 0.64010195],
       [0.10588514, 0.08947157, 0.80464329],
       [0.30787282, 0.10182799, 0.59029919],
       [0.06871504, 0.030875  , 0.90040996],
       [0.18737448, 0.24752762, 0.5650979 ],
       [0.15672694, 0.04037898, 0.80289409],
       [0.24788782, 0.0839991 , 0.66811308],
       [0.

In [22]:
#NB first then SVM
def NBfirst():
    NBfirstSentiment=[]
    counter = 0
    for i in range(len(y_pred_proba_nb)):
        for j in range(3):
            if y_pred_proba_nb[i:i+1,j:j+1] > 0.75:
                counter += 1
                if j == 0:
                    NBfirstSentiment.append(-1)
                if j == 1:
                    NBfirstSentiment.append(0)
                if j == 2:
                    NBfirstSentiment.append(1)
            if (j == 2) and (counter == 0):
                NBfirstSentiment.append(y_pred_svm[i])
            if (j == 2) and (counter > 0):
                counter = 0
    return NBfirstSentiment

In [23]:
def SVMfirst():
    SVMfirstSentiment=[]
    counter = 0
    for i in range(len(y_pred_proba_svm)):
        for j in range(3):
            if y_pred_proba_svm[i:i+1,j:j+1] > 0.75:
                counter += 1
                if j == 0:
                    SVMfirstSentiment.append(-1)
                if j == 1:
                    SVMfirstSentiment.append(0)
                if j == 2:
                    SVMfirstSentiment.append(1)
            if (j == 2) and (counter == 0):
                SVMfirstSentiment.append(y_pred_nb[i])
            if (j == 2) and (counter > 0):
                counter = 0
    return SVMfirstSentiment

In [24]:
pd.set_option('display.max_rows', None)
full_test['NB-SVM Sentiment'] = NBfirst()
full_test['SVM-NB Sentiment'] = SVMfirst()
full_test

Unnamed: 0,Comment,Language,Sentiment,Lexicon Sentiment,NB-SVM Sentiment,SVM-NB Sentiment
0,say cod fish he whale stay alive escape youre ...,en,1,1,0,1
1,punctual professor give importance reward stud...,en,1,1,1,1
2,joke funny not,en,0,-1,-1,-1
3,follow syllabus time,en,1,1,1,1
4,absent,en,-1,0,1,-1
5,not teacher but father student,en,1,-1,-1,-1
6,absent,en,-1,0,1,-1
7,treat respect,en,1,1,1,1
8,give good time quiz excellent professor overall,en,1,1,1,1
9,core university visible way teach,en,1,1,1,1


In [25]:
accuracy_NB_SVM = accuracy_score(full_test['Sentiment'], full_test['NB-SVM Sentiment'])
correct_class_NB_SVM = accuracy_score(full_test['Sentiment'], full_test['NB-SVM Sentiment'], normalize=False)

print('NB before SVM')
print('Accuracy Score: ', accuracy_NB_SVM)
print('Count of Correctly Classified Comments: ', correct_class_NB_SVM, '/ 74')

NB before SVM
Accuracy Score:  0.5540540540540541
Count of Correctly Classified Comments:  41 / 74


In [26]:
accuracy_SVM_NB = accuracy_score(full_test['Sentiment'], full_test['SVM-NB Sentiment'])
correct_class_SVM_NB = accuracy_score(full_test['Sentiment'], full_test['SVM-NB Sentiment'], normalize=False)

print('SVM before NB')
print('Accuracy Score: ', accuracy_SVM_NB)
print('Count of Correctly Classified Comments: ', correct_class_SVM_NB, '/ 74')

SVM before NB
Accuracy Score:  0.6081081081081081
Count of Correctly Classified Comments:  45 / 74


In [27]:
print(classification_report(full_test['Sentiment'], full_test['NB-SVM Sentiment']))

              precision    recall  f1-score   support

          -1       0.55      0.21      0.31        28
           0       0.00      0.00      0.00         5
           1       0.57      0.85      0.69        41

    accuracy                           0.55        74
   macro avg       0.37      0.36      0.33        74
weighted avg       0.52      0.55      0.50        74



In [28]:
print(classification_report(full_test['Sentiment'], full_test['SVM-NB Sentiment']))

              precision    recall  f1-score   support

          -1       0.55      0.39      0.46        28
           0       0.00      0.00      0.00         5
           1       0.67      0.83      0.74        41

    accuracy                           0.61        74
   macro avg       0.41      0.41      0.40        74
weighted avg       0.58      0.61      0.58        74



In [35]:
full_test.to_csv('test_results.csv', index=False)

### English Results

In [37]:
en_results = full_test[full_test['Language'] =='en']
en_results

Unnamed: 0,Comment,Language,Sentiment,Lexicon Sentiment,NB-SVM Sentiment,SVM-NB Sentiment
0,say cod fish he whale stay alive escape youre ...,en,1,1,0,1
1,punctual professor give importance reward stud...,en,1,1,1,1
2,joke funny not,en,0,-1,-1,-1
3,follow syllabus time,en,1,1,1,1
4,absent,en,-1,0,1,-1
5,not teacher but father student,en,1,-1,-1,-1
6,absent,en,-1,0,1,-1
7,treat respect,en,1,1,1,1
8,give good time quiz excellent professor overall,en,1,1,1,1
9,core university visible way teach,en,1,1,1,1


In [39]:
accuracy_NB_SVM_en = accuracy_score(en_results['Sentiment'], en_results['NB-SVM Sentiment'])
correct_class_NB_SVM_en = accuracy_score(en_results['Sentiment'], en_results['NB-SVM Sentiment'], normalize=False)
en_count = len(en_results)

print('NB before SVM English')
print('Accuracy Score: ', accuracy_NB_SVM_en)
print('Count of Correctly Classified Comments: ', correct_class_NB_SVM_en, '/', en_count)
print(classification_report(en_results['Sentiment'], en_results['NB-SVM Sentiment']))

NB before SVM English
Accuracy Score:  0.5757575757575758
Count of Correctly Classified Comments:  19 / 33
              precision    recall  f1-score   support

          -1       0.25      0.10      0.14        10
           0       0.00      0.00      0.00         2
           1       0.64      0.86      0.73        21

    accuracy                           0.58        33
   macro avg       0.30      0.32      0.29        33
weighted avg       0.48      0.58      0.51        33



In [44]:
accuracy_SVM_NB_en = accuracy_score(en_results['Sentiment'], en_results['SVM-NB Sentiment'])
correct_class_SVM_NB_en = accuracy_score(en_results['Sentiment'], en_results['SVM-NB Sentiment'], normalize=False)

print('SVM before NB English')
print('Accuracy Score: ', accuracy_SVM_NB_en)
print('Count of Correctly Classified Comments: ', correct_class_SVM_NB_en, '/', en_count)
print(classification_report(en_results['Sentiment'], en_results['SVM-NB Sentiment']))

SVM before NB English
Accuracy Score:  0.696969696969697
Count of Correctly Classified Comments:  23 / 33
              precision    recall  f1-score   support

          -1       0.50      0.40      0.44        10
           0       0.00      0.00      0.00         2
           1       0.79      0.90      0.84        21

    accuracy                           0.70        33
   macro avg       0.43      0.43      0.43        33
weighted avg       0.66      0.70      0.67        33



### Filipino Results

In [40]:
tl_results = full_test[full_test['Language'] =='tl']
tl_results

Unnamed: 0,Comment,Language,Sentiment,Lexicon Sentiment,NB-SVM Sentiment,SVM-NB Sentiment
0,parang binabasa lang yung libro nagtuturo,tl,-1,1,1,-1
1,hindi namamahiya nang estudyante,tl,1,-1,-1,-1
2,marunong umintindi umunawa estudyante,tl,1,0,0,-1
3,palagi panararamdam palagi siyang nandiyan pag...,tl,1,1,1,1
4,sobrang panget pagmumukha,tl,-1,-1,1,1
5,kontrolin yung klase sobrang gulo naming,tl,1,1,1,1
6,parang barkada mo lang kasi mabait madaling ka...,tl,1,1,1,1
7,madaling mawalan pasensya,tl,-1,0,1,1
8,patas magbigay grado,tl,1,1,1,1
9,maitim balat,tl,0,0,1,-1


In [41]:
accuracy_NB_SVM_tl = accuracy_score(tl_results['Sentiment'], tl_results['NB-SVM Sentiment'])
correct_class_NB_SVM_tl = accuracy_score(tl_results['Sentiment'], tl_results['NB-SVM Sentiment'], normalize=False)
tl_count = len(tl_results)

print('NB before SVM Filipino')
print('Accuracy Score: ', accuracy_NB_SVM_tl)
print('Count of Correctly Classified Comments: ', correct_class_NB_SVM_tl, '/', tl_count)
print(classification_report(tl_results['Sentiment'], tl_results['NB-SVM Sentiment']))

NB before SVM Filipino
Accuracy Score:  0.46153846153846156
Count of Correctly Classified Comments:  6 / 13
              precision    recall  f1-score   support

          -1       0.50      0.20      0.29         5
           0       0.00      0.00      0.00         1
           1       0.50      0.71      0.59         7

    accuracy                           0.46        13
   macro avg       0.33      0.30      0.29        13
weighted avg       0.46      0.46      0.43        13



In [45]:
accuracy_SVM_NB_tl = accuracy_score(tl_results['Sentiment'], tl_results['SVM-NB Sentiment'])
correct_class_SVM_NB_tl = accuracy_score(tl_results['Sentiment'], tl_results['SVM-NB Sentiment'], normalize=False)

print('SVM before NB Filipino')
print('Accuracy Score: ', accuracy_SVM_NB_tl)
print('Count of Correctly Classified Comments: ', correct_class_SVM_NB_tl, '/', tl_count)
print(classification_report(tl_results['Sentiment'], tl_results['SVM-NB Sentiment']))

SVM before NB Filipino
Accuracy Score:  0.5384615384615384
Count of Correctly Classified Comments:  7 / 13
              precision    recall  f1-score   support

          -1       0.40      0.40      0.40         5
           0       0.00      0.00      0.00         1
           1       0.62      0.71      0.67         7

    accuracy                           0.54        13
   macro avg       0.34      0.37      0.36        13
weighted avg       0.49      0.54      0.51        13



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Taglish Results

In [42]:
tlen_results = full_test[full_test['Language'] =='tlen']
tlen_results

Unnamed: 0,Comment,Language,Sentiment,Lexicon Sentiment,NB-SVM Sentiment,SVM-NB Sentiment
0,binabasa lang powerpoint,tlen,-1,0,1,1
1,chill laid style teaching maayos rin chill lang,tlen,1,1,1,1
2,manyak youre not pretty low grade mehn biased,tlen,-1,-1,-1,-1
3,pinapahirap lessons pag inaral youtube dali lang,tlen,-1,1,1,1
4,maganda gamit visuals,tlen,1,1,1,1
5,di naniniwala kasi di nagsasabi totoo,tlen,-1,1,1,1
6,palaging absent,tlen,-1,0,1,1
7,yung website lang yung teacher naming kasi dun...,tlen,-1,1,1,1
8,wala book diniscuss yung nasa exams nakuha ata...,tlen,-1,-1,1,1
9,prof namemersonal bumabawi sobrang hirap exam ...,tlen,-1,-1,1,0


In [43]:
accuracy_NB_SVM_tlen = accuracy_score(tlen_results['Sentiment'], tlen_results['NB-SVM Sentiment'])
correct_class_NB_SVM_tlen = accuracy_score(tlen_results['Sentiment'], tlen_results['NB-SVM Sentiment'], normalize=False)
tlen_count = len(tlen_results)

print('NB before SVM Taglish')
print('Accuracy Score: ', accuracy_NB_SVM_tlen)
print('Count of Correctly Classified Comments: ', correct_class_NB_SVM_tlen, '/', tlen_count)
print(classification_report(tlen_results['Sentiment'], tlen_results['NB-SVM Sentiment']))

NB before SVM Taglish
Accuracy Score:  0.5714285714285714
Count of Correctly Classified Comments:  16 / 28
              precision    recall  f1-score   support

          -1       0.80      0.31      0.44        13
           0       0.00      0.00      0.00         2
           1       0.52      0.92      0.67        13

    accuracy                           0.57        28
   macro avg       0.44      0.41      0.37        28
weighted avg       0.61      0.57      0.52        28



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [46]:
accuracy_SVM_NB_tlen = accuracy_score(tlen_results['Sentiment'], tlen_results['SVM-NB Sentiment'])
correct_class_SVM_NB_tlen = accuracy_score(tlen_results['Sentiment'], tlen_results['SVM-NB Sentiment'], normalize=False)

print('SVM before NB Taglish')
print('Accuracy Score: ', accuracy_SVM_NB_tlen)
print('Count of Correctly Classified Comments: ', correct_class_SVM_NB_tlen, '/', tlen_count)
print(classification_report(tlen_results['Sentiment'], tlen_results['SVM-NB Sentiment']))

SVM before NB Taglish
Accuracy Score:  0.5357142857142857
Count of Correctly Classified Comments:  15 / 28
              precision    recall  f1-score   support

          -1       0.71      0.38      0.50        13
           0       0.00      0.00      0.00         2
           1       0.53      0.77      0.62        13

    accuracy                           0.54        28
   macro avg       0.41      0.38      0.38        28
weighted avg       0.58      0.54      0.52        28

