In [3]:
import pandas as pd
df = pd.read_csv('sentiment_telegram_messages-checkpoint.csv')
df = df[['text','sentiment_label']]
df

Unnamed: 0,text,sentiment_label
0,533,neutral
1,equity pick btst view siyaram silk cmp 860865 ...,positive
2,3690,neutral
3,join prime group ping sg005,neutral
4,feedback love see prime traders,positive
...,...,...
184,option buying view nifty 24000ce cmp 260265 su...,positive
185,equity pick bse cmp 4930 support 4850 expected...,positive
186,study bse daily chart,neutral
187,vimta lab 806 865 dev technology 168 182 hope ...,positive


In [4]:
df.shape

(189, 2)

In [5]:
df['sentiment_label'].value_counts()

sentiment_label
neutral     91
positive    79
negative    19
Name: count, dtype: int64

In [6]:
df.isnull().sum()

text               2
sentiment_label    0
dtype: int64

In [7]:
df.dropna(inplace=True)

In [8]:
df['sentiment_label'] = df['sentiment_label'].replace({'neutral':0,'positive':1,'negative':-1})
df

  df['sentiment_label'] = df['sentiment_label'].replace({'neutral':0,'positive':1,'negative':-1})


Unnamed: 0,text,sentiment_label
0,533,0
1,equity pick btst view siyaram silk cmp 860865 ...,1
2,3690,0
3,join prime group ping sg005,0
4,feedback love see prime traders,1
...,...,...
184,option buying view nifty 24000ce cmp 260265 su...,1
185,equity pick bse cmp 4930 support 4850 expected...,1
186,study bse daily chart,0
187,vimta lab 806 865 dev technology 168 182 hope ...,1


In [9]:
df['text'] = df['text'].apply(lambda x: x.lower())
df

Unnamed: 0,text,sentiment_label
0,533,0
1,equity pick btst view siyaram silk cmp 860865 ...,1
2,3690,0
3,join prime group ping sg005,0
4,feedback love see prime traders,1
...,...,...
184,option buying view nifty 24000ce cmp 260265 su...,1
185,equity pick bse cmp 4930 support 4850 expected...,1
186,study bse daily chart,0
187,vimta lab 806 865 dev technology 168 182 hope ...,1


In [10]:
def remove_special_char(text):
  clean_text = ''
  for each in text:
    if each.isalnum():
      clean_text += each
    else:
      clean_text += ' '
  return clean_text

In [11]:
df['text'] = df['text'].apply(remove_special_char)

In [12]:
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\saroj\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [13]:
def remove_stopwords(text):
  x = []
  for each in text.split():
    if each not in stopwords.words('english'):
      x.append(each)
  return x

In [14]:
df['text'] = df['text'].apply(remove_stopwords)

In [15]:
df

Unnamed: 0,text,sentiment_label
0,[533],0
1,"[equity, pick, btst, view, siyaram, silk, cmp,...",1
2,[3690],0
3,"[join, prime, group, ping, sg005]",0
4,"[feedback, love, see, prime, traders]",1
...,...,...
184,"[option, buying, view, nifty, 24000ce, cmp, 26...",1
185,"[equity, pick, bse, cmp, 4930, support, 4850, ...",1
186,"[study, bse, daily, chart]",0
187,"[vimta, lab, 806, 865, dev, technology, 168, 1...",1


In [16]:
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [17]:
def convert_to_stemming(text_list):
  stem_words = []
  for each in text_list:
    stem_words.append(ps.stem(each))

  return ' '.join(stem_words)

In [18]:
df['text'] = df['text'].apply(convert_to_stemming)
df

Unnamed: 0,text,sentiment_label
0,533,0
1,equiti pick btst view siyaram silk cmp 860865 ...,1
2,3690,0
3,join prime group ping sg005,0
4,feedback love see prime trader,1
...,...,...
184,option buy view nifti 24000ce cmp 260265 suppo...,1
185,equiti pick bse cmp 4930 support 4850 expect 5...,1
186,studi bse daili chart,0
187,vimta lab 806 865 dev technolog 168 182 hope e...,1


In [19]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=500)

X = cv.fit_transform(df['text']).toarray()
y = df['sentiment_label'].values

In [20]:
X.shape

(187, 500)

In [21]:
y.shape

(187,)

In [22]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=0.8, random_state=42)

In [23]:
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
gb = GaussianNB()
mb = MultinomialNB()
bb = BernoulliNB()

gb.fit(X_train,y_train)
mb.fit(X_train,y_train)
bb.fit(X_train,y_train)

In [24]:
y_pred_gb = gb.predict(X_test)
y_pred_mb = gb.predict(X_test)
y_pred_bb = bb.predict(X_test)

In [25]:
from sklearn.metrics import accuracy_score
gb_accuracy = accuracy_score(y_pred_gb, y_test)
mb_accuracy = accuracy_score(y_pred_mb, y_test)
bb_accuracy = accuracy_score(y_pred_bb, y_test)

print("GaussianNB:", gb_accuracy)
print("MultinomialNB:", mb_accuracy)
print("BernouliNB:", bb_accuracy)

GaussianNB: 0.5789473684210527
MultinomialNB: 0.5789473684210527
BernouliNB: 0.6578947368421053


In [26]:
text = 'My stock goes down into big loss, But I will not stop here'
text = text.lower()
text = remove_special_char(text)
text = remove_stopwords(text)
text = convert_to_stemming(text)

In [27]:
text_array = cv.transform([text]).toarray()
sentiment = bb.predict(text_array)[0]

In [28]:
sentiment

np.int64(0)