In [231]:
import pandas as pd
import numpy as np
import nltk
import string
from sklearn import *
from bs4 import BeautifulSoup
from snowballstemmer import TurkishStemmer
import sklearn
from sklearn.model_selection import train_test_split

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression,SGDClassifier
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
from sklearn.naive_bayes import MultinomialNB
import re

In [232]:
#loading dataset
tw_data = pd.read_csv(r"C:\Users\Kofana\Desktop\data_analysis\gsm-tweets.csv", encoding="latin5")
tw_data.columns = [ 'Tweets', 'Sentiment']
tw_data.head()

Unnamed: 0,Tweets,Sentiment
0,çekmiyor tsk berbat güzel,negatif
1,vodafone bana niye trip atıyorsun acaba öğrene...,negatif
2,@manyetikhamsi @vodafonetr maalesef müşteri hi...,negatif
3,"@tenshiroi vodafone , çekmiyor diyorlar ama he...",pozitif
4,"#türktelekom bodrumda yok lakin , #vodafone on...",pozitif


In [233]:
#information about data
tw_data.describe()

Unnamed: 0,Tweets,Sentiment
count,1729,1729
unique,1716,2
top,allah belanı versin #türkcell bir daha seni te...,negatif
freq,2,1466


In [234]:
#drop the "NAN" values from dataset
tw_data.dropna(inplace=True)

In [235]:
#information about data
tw_data.describe()

Unnamed: 0,Tweets,Sentiment
count,1729,1729
unique,1716,2
top,allah belanı versin #türkcell bir daha seni te...,negatif
freq,2,1466


In [236]:
tw_data['TextSizeBeforeRemoveStopWords'] = [len(t) for t in tw_data.Tweets]
tw_data.head()

Unnamed: 0,Tweets,Sentiment,TextSizeBeforeRemoveStopWords
0,çekmiyor tsk berbat güzel,negatif,25
1,vodafone bana niye trip atıyorsun acaba öğrene...,negatif,288
2,@manyetikhamsi @vodafonetr maalesef müşteri hi...,negatif,187
3,"@tenshiroi vodafone , çekmiyor diyorlar ama he...",pozitif,106
4,"#türktelekom bodrumda yok lakin , #vodafone on...",pozitif,165


In [237]:
#normalization methods
from unicode_tr import unicode_tr  #https://github.com/emre/unicode_tr
def convertLowerCase(text):
    return unicode_tr(text)

#remove username
def remove_username(text):
    return re.sub('@[^\s]+','',text)

#remove hashtags
def remove_hashtags(text):
    return re.sub('#[^\s]+','',text)

#remove punctuation
def remove_punctuation(text):
    # define punctuation
    punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
    no_punct = ''
    for char in text:
        if char not in punctuations:
            no_punct = no_punct + char
    return no_punct

def remove_numericChars(text):
    result = ''.join([i for i in text if not i.isdigit()])
    return result

#remove the html
def strip_html(text):
    soup = BeautifulSoup(text, "html.parser")
    return soup.get_text()


In [238]:

# Remove the noisy text
def denoise_text(text):
 
    text = convertLowerCase(text)
    text = remove_username(text)
    text = remove_hashtags(text)
    text = remove_punctuation(text)
    text = remove_numericChars(text)
   
   # text = strip_html(text)
    return text

tw_data['AfterPreProcessing'] = tw_data['Tweets'].apply(denoise_text)


In [239]:
tw_data.head()

Unnamed: 0,Tweets,Sentiment,TextSizeBeforeRemoveStopWords,AfterPreProcessing
0,çekmiyor tsk berbat güzel,negatif,25,çekmiyor tsk berbat güzel
1,vodafone bana niye trip atıyorsun acaba öğrene...,negatif,288,vodafone bana niye trip atıyorsun acaba öğrene...
2,@manyetikhamsi @vodafonetr maalesef müşteri hi...,negatif,187,maalesef müşteri hizmetlerine ulaşamazsınız ...
3,"@tenshiroi vodafone , çekmiyor diyorlar ama he...",pozitif,106,vodafone çekmiyor diyorlar ama hem paketleri...
4,"#türktelekom bodrumda yok lakin , #vodafone on...",pozitif,165,bodrumda yok lakin onun bana hediye ettiği ...


In [240]:
# remove stopwords

from trtokenizer.tr_tokenizer import SentenceTokenizer, WordTokenizer
tokenizer = WordTokenizer()

#Setting Turkish stopwords
stopword_list = open(r"C:\Users\Kofana\Desktop\data_analysis\stop_words_turkish.txt",  encoding = 'latin5').read().split()

stop = set(stopword_list)

def remove_stopwords(text, is_lower_case = True):
    tokens = tokenizer.tokenize(text)
    tokens = [token.strip() for token in tokens]
    if is_lower_case:
        filtered_tokens = [token for token in tokens if token not in stopword_list]
    else:
        filtered_tokens = [token for token in tokens if token.lower() not in stopword_list]
    filtered_text = ' '.join(filtered_tokens)    
    return filtered_text

tw_data['AfterPreProcessing'] = tw_data['AfterPreProcessing'].apply(remove_stopwords)

In [241]:
tw_data['TextSizeAfterRemoveStopWords'] = [len(t) for t in tw_data.Tweets]
tw_data.head()

Unnamed: 0,Tweets,Sentiment,TextSizeBeforeRemoveStopWords,AfterPreProcessing,TextSizeAfterRemoveStopWords
0,çekmiyor tsk berbat güzel,negatif,25,çekmiyor tsk berbat güzel,25
1,vodafone bana niye trip atıyorsun acaba öğrene...,negatif,288,vodafone trip atıyorsun öğrenebilir miyim tıkı...,288
2,@manyetikhamsi @vodafonetr maalesef müşteri hi...,negatif,187,maalesef müşteri hizmetlerine ulaşamazsınız ul...,187
3,"@tenshiroi vodafone , çekmiyor diyorlar ama he...",pozitif,106,vodafone çekmiyor diyorlar paketleri fiyatları...,106
4,"#türktelekom bodrumda yok lakin , #vodafone on...",pozitif,165,bodrumda yok lakin hediye ettiği gb sayesinde ...,165


In [242]:
#stemming
turkStem = TurkishStemmer()

# Stemming
def simple_stemmer(text):
    ss = TurkishStemmer()
    text = ' '.join([ss.stemWord(word) for word in text.split()])
    return text

tw_data['AfterPreProcessing'] = tw_data['AfterPreProcessing'].apply(simple_stemmer)

In [243]:
tw_data.head()

Unnamed: 0,Tweets,Sentiment,TextSizeBeforeRemoveStopWords,AfterPreProcessing,TextSizeAfterRemoveStopWords
0,çekmiyor tsk berbat güzel,negatif,25,çekmiyor tsk berbat güzel,25
1,vodafone bana niye trip atıyorsun acaba öğrene...,negatif,288,vodafone trip atıyor öğrenebilir mi tıkır tıkı...,288
2,@manyetikhamsi @vodafonetr maalesef müşteri hi...,negatif,187,maalesef müşter hizmet ulaşamaz ulaşma sor çöz...,187
3,"@tenshiroi vodafone , çekmiyor diyorlar ama he...",pozitif,106,vodafone çekmiyor diyor paket fiyat mükemmel g...,106
4,"#türktelekom bodrumda yok lakin , #vodafone on...",pozitif,165,bodr yok lak hedi ettik gb saye çekirdek aile ...,165


In [244]:
#drop unnecessary columnd
tw_data = tw_data.drop(['TextSizeBeforeRemoveStopWords'], axis=1)
tw_data.head()

Unnamed: 0,Tweets,Sentiment,AfterPreProcessing,TextSizeAfterRemoveStopWords
0,çekmiyor tsk berbat güzel,negatif,çekmiyor tsk berbat güzel,25
1,vodafone bana niye trip atıyorsun acaba öğrene...,negatif,vodafone trip atıyor öğrenebilir mi tıkır tıkı...,288
2,@manyetikhamsi @vodafonetr maalesef müşteri hi...,negatif,maalesef müşter hizmet ulaşamaz ulaşma sor çöz...,187
3,"@tenshiroi vodafone , çekmiyor diyorlar ama he...",pozitif,vodafone çekmiyor diyor paket fiyat mükemmel g...,106
4,"#türktelekom bodrumda yok lakin , #vodafone on...",pozitif,bodr yok lak hedi ettik gb saye çekirdek aile ...,165


In [245]:
#feature extraction
tw_data.Sentiment.replace(['negatif', 'pozitif'], [0, 1], inplace = True)
tw_data.head()

Unnamed: 0,Tweets,Sentiment,AfterPreProcessing,TextSizeAfterRemoveStopWords
0,çekmiyor tsk berbat güzel,0,çekmiyor tsk berbat güzel,25
1,vodafone bana niye trip atıyorsun acaba öğrene...,0,vodafone trip atıyor öğrenebilir mi tıkır tıkı...,288
2,@manyetikhamsi @vodafonetr maalesef müşteri hi...,0,maalesef müşter hizmet ulaşamaz ulaşma sor çöz...,187
3,"@tenshiroi vodafone , çekmiyor diyorlar ama he...",1,vodafone çekmiyor diyor paket fiyat mükemmel g...,106
4,"#türktelekom bodrumda yok lakin , #vodafone on...",1,bodr yok lak hedi ettik gb saye çekirdek aile ...,165


In [246]:
#split dataset into train and test
sent = tw_data['Sentiment']
tw = tw_data['AfterPreProcessing']

X_train, X_test, y_train, y_test = train_test_split(tw, sent, test_size = 0.20, random_state= 1)

In [247]:
# CountVectorizer for Bag of Words
cv = CountVectorizer(min_df = 0, max_df = 1, binary = False, ngram_range = (1, 3))

# Transformed train tweets
cv_train_tweets = cv.fit_transform(X_train)

# Transformed test tweets
cv_test_tweets = cv.transform(X_test)

print('BoW_CV_Train:',cv_train_tweets.shape)
print('BoW_CV_Test:',cv_test_tweets.shape)

BoW_CV_Train: (1383, 23843)
BoW_CV_Test: (346, 23843)


In [248]:
s = cv_train_tweets[1]
print(s)

  (0, 10740)	1
  (0, 14387)	1
  (0, 8670)	1
  (0, 17411)	1
  (0, 2474)	1
  (0, 19926)	1
  (0, 22607)	1
  (0, 10741)	1
  (0, 14388)	1
  (0, 10739)	1
  (0, 8671)	1


In [249]:
ss = cv_test_tweets[1]
print(ss)




In [250]:
# TfidfVectorizer
tv = TfidfVectorizer(min_df = 0, max_df = 1, use_idf = True, ngram_range = (1, 3))

# Transformed train tweets
tv_train_tweets = tv.fit_transform(X_train)

# Transformed test tweets
tv_test_tweets = tv.transform(X_test)

print('Tfidf_Train:',tv_train_tweets.shape)
print('Tfidf_Test:',tv_test_tweets.shape)

Tfidf_Train: (1383, 23843)
Tfidf_Test: (346, 23843)


In [251]:
s = tv_train_tweets[1]
print(s)

  (0, 8671)	0.30151134457776363
  (0, 10739)	0.30151134457776363
  (0, 14388)	0.30151134457776363
  (0, 10741)	0.30151134457776363
  (0, 22607)	0.30151134457776363
  (0, 19926)	0.30151134457776363
  (0, 2474)	0.30151134457776363
  (0, 17411)	0.30151134457776363
  (0, 8670)	0.30151134457776363
  (0, 14387)	0.30151134457776363
  (0, 10740)	0.30151134457776363


In [252]:
ss = tv_test_tweets[1]
print(ss)




In [253]:
#logistic regression
# Training the Model
lr = LogisticRegression(penalty = 'l2', max_iter = 500, C = 1.1, random_state = 42)

# Fitting the model for Bag of Words
lr_bow = lr.fit(cv_train_tweets, y_train)
print(lr_bow)

# Fitting the model for TFIDF features
lr_tfidf = lr.fit(tv_train_tweets, y_train)
print(lr_tfidf)

LogisticRegression(C=1.1, max_iter=500, random_state=42)
LogisticRegression(C=1.1, max_iter=500, random_state=42)


In [254]:
# Predicting the model for Bag of Words
lr_bow_predict = lr.predict(cv_test_tweets)
print(lr_bow_predict)

# Predicting the model for TFIDF features
lr_tfidf_predict = lr.predict(tv_test_tweets)
print(lr_tfidf_predict)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

In [255]:
# Accuracy score for Bag of Words
lr_bow_score = accuracy_score(y_test, lr_bow_predict)
print("LR BoW Score :",lr_bow_score)

# Accuracy score for TFIDF features
lr_tfidf_score = accuracy_score(y_test, lr_tfidf_predict)
print("LR TFIDF Score :",lr_tfidf_score)

LR BoW Score : 0.8583815028901735
LR TFIDF Score : 0.8526011560693642


In [256]:
# Classification report for Bag of Words
lr_bow_report = classification_report(y_test, lr_bow_predict, target_names = ['negatif','pozitif'])
print(lr_bow_report)

# Classification report for TFIDF features
lr_tfidf_report = classification_report(y_test, lr_tfidf_predict, target_names = ['negatif','pozitif'])
print(lr_tfidf_report)

              precision    recall  f1-score   support

     negatif       0.86      1.00      0.92       295
     pozitif       1.00      0.04      0.08        51

    accuracy                           0.86       346
   macro avg       0.93      0.52      0.50       346
weighted avg       0.88      0.86      0.80       346

              precision    recall  f1-score   support

     negatif       0.85      1.00      0.92       295
     pozitif       0.00      0.00      0.00        51

    accuracy                           0.85       346
   macro avg       0.43      0.50      0.46       346
weighted avg       0.73      0.85      0.78       346



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [257]:
# Confusion matrix for Bag of Words
cm_bow = confusion_matrix(y_test, lr_bow_predict, labels = [1,0])
print(cm_bow)

# Confusion matrix for TFIDF features
cm_tfidf = confusion_matrix(y_test, lr_tfidf_predict, labels = [1,0])
print(cm_tfidf)

[[  2  49]
 [  0 295]]
[[  0  51]
 [  0 295]]


In [258]:
# Training the Linear SVM
svm = SGDClassifier(loss='hinge', max_iter=500, random_state=42)

# Fitting the SVM for Bag of Words
svm_bow = svm.fit(cv_train_tweets, y_train)
print(svm_bow)

# Fitting the SVM for TFIDF features
svm_tfidf = svm.fit(tv_train_tweets, y_train)
print(svm_tfidf)

SGDClassifier(max_iter=500, random_state=42)
SGDClassifier(max_iter=500, random_state=42)


In [259]:
# Predicting the model for Bag of Words
svm_bow_predict = svm.predict(cv_test_tweets)
print(svm_bow_predict)

# Predicting the model for TFIDF features
svm_tfidf_predict = svm.predict(tv_test_tweets)
print(svm_tfidf_predict)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0

In [260]:
# Accuracy score for Bag of Words
svm_bow_score = accuracy_score(y_test, svm_bow_predict)
print("SVM BoW Score :",svm_bow_score)

# Accuracy score for TFIDF features
svm_tfidf_score = accuracy_score(y_test, svm_tfidf_predict)
print("SVM TFIDF Score:",svm_tfidf_score)

SVM BoW Score : 0.8641618497109826
SVM TFIDF Score: 0.861271676300578


In [261]:
# Classification report for Bag of Words 
svm_bow_report = classification_report(y_test, svm_bow_predict, target_names = ['negatif','pozitif'])
print(svm_bow_report)

# Classification report for TFIDF features
svm_tfidf_report = classification_report(y_test, svm_tfidf_predict, target_names = ['negatif','pozitif'])
print(svm_tfidf_report)

              precision    recall  f1-score   support

     negatif       0.87      0.99      0.93       295
     pozitif       0.67      0.16      0.25        51

    accuracy                           0.86       346
   macro avg       0.77      0.57      0.59       346
weighted avg       0.84      0.86      0.83       346

              precision    recall  f1-score   support

     negatif       0.86      0.99      0.92       295
     pozitif       0.71      0.10      0.17        51

    accuracy                           0.86       346
   macro avg       0.79      0.55      0.55       346
weighted avg       0.84      0.86      0.81       346



In [262]:
# Confusion matrix for Bag of Words
cm_bow = confusion_matrix(y_test, svm_bow_predict, labels = [1,0])
print(cm_bow)

# Confusion matrix for TFIDF features
cm_tfidf = confusion_matrix(y_test, svm_tfidf_predict, labels = [1,0])
print(cm_tfidf)

[[  8  43]
 [  4 291]]
[[  5  46]
 [  2 293]]


In [263]:
# Training the model
mnb = MultinomialNB()

# Fitting the NB for Bag of Words
mnb_bow = mnb.fit(cv_train_tweets, y_train)
print(mnb_bow)

# Fitting the NB for TFIDF features
mnb_tfidf = mnb.fit(tv_train_tweets, y_train)
print(mnb_tfidf)

MultinomialNB()
MultinomialNB()


In [264]:
# Predicting the model for Bag of Words
mnb_bow_predict = mnb.predict(cv_test_tweets)
print(mnb_bow_predict)

# Predicting the model for TFIDF features
mnb_tfidf_predict = mnb.predict(tv_test_tweets)
print(mnb_tfidf_predict)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

In [265]:
# Accuracy score for Bag of Words
mnb_bow_score = accuracy_score(y_test, mnb_bow_predict)
print("MNB BoW Score :",mnb_bow_score)

# Accuracy score for TFIDF features
mnb_tfidf_score = accuracy_score(y_test, mnb_tfidf_predict)
print("MNB TFIDF Score :",mnb_tfidf_score)

MNB BoW Score : 0.8583815028901735
MNB TFIDF Score : 0.8526011560693642


In [266]:
# Classification report for Bag of Words
mnb_bow_report = classification_report(y_test, mnb_bow_predict, target_names = ['negatif','pozitif'])
print(mnb_bow_report)

# Classification report for TFIDF features
mnb_tfidf_report = classification_report(y_test, mnb_tfidf_predict, target_names = ['negatif','pozitif'])
print(mnb_tfidf_report)

              precision    recall  f1-score   support

     negatif       0.86      1.00      0.92       295
     pozitif       1.00      0.04      0.08        51

    accuracy                           0.86       346
   macro avg       0.93      0.52      0.50       346
weighted avg       0.88      0.86      0.80       346

              precision    recall  f1-score   support

     negatif       0.85      1.00      0.92       295
     pozitif       0.00      0.00      0.00        51

    accuracy                           0.85       346
   macro avg       0.43      0.50      0.46       346
weighted avg       0.73      0.85      0.78       346



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [267]:
# Confusion matrix for Bag of Words
cm_bow = confusion_matrix(y_test, mnb_bow_predict, labels = [1,0])
print(cm_bow)

# Confusion matrix for TFIDF features
cm_tfidf = confusion_matrix(y_test, mnb_tfidf_predict, labels = [1,0])
print(cm_tfidf)

[[  2  49]
 [  0 295]]
[[  0  51]
 [  0 295]]
