***Import Libraries and Load Trained Model***

In [14]:
import pickle
import numpy as np
from sklearn.feature_extraction.text import HashingVectorizer
import re
import nltk

nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

with open('/content/drive/MyDrive/trained_model.pkl', 'rb') as model_file:
    clf = pickle.load(model_file)

label_english = {0: 'Good', 1: 'Harmful'}
label_bengali = {0: 'ক্ষতিকর না', 1: 'ক্ষতিকর'}

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


***Define Tokenizer Functions***

In [15]:
porter = PorterStemmer()
stop = stopwords.words('english')

def tokenizer_porter(text):
    return [porter.stem(word) for word in text.split()]

def tokenizer(text):
    text = re.sub('<[^>]*>', '', text)
    emoticons = re.findall('(?::|;|=)(?:-)?(?:\(|D|P)', text.lower())
    text = re.sub('[\W]+', ' ', text.lower())
    text += ' '.join(emoticons).replace('-', '')
    tokenized = [w for w in tokenizer_porter(text) if w not in stop]
    return tokenized

***Vectorization for Both***

In [19]:
vect = HashingVectorizer(decode_error='ignore', n_features=2**21, preprocessor=None, tokenizer=tokenizer)

***Prediction in English***

In [23]:
example_english = ["Go to Hell. Fuck you"]
X_english = vect.transform(example_english)

prediction_english = label_english[clf.predict(X_english)[0]]
probability_english = np.max(clf.predict_proba(X_english)) * 100

print('English Prediction:')
print(f'Prediction: {prediction_english}\nProbability: {probability_english:.2f}%')

English Prediction:
Prediction: Harmful
Probability: 99.96%


***Prediction in Bengali***

In [24]:
example_bengali = ["একটু হিসাব করে দেখেন, আর্জেন্টিনার জনসংখ্যা ৪ কোটি... আর আমাদের ১৬ কোটির দেশে প্রায় অর্ধেকই (৮ কোটি) হলো আর্জেন্টিনার সাপোর্টার"]
X_bengali = vect.transform(example_bengali)

prediction_bengali = label_bengali[clf.predict(X_bengali)[0]]
probability_bengali = np.max(clf.predict_proba(X_bengali)) * 100

print('\nBengali Prediction:')
print(f'Prediction: {prediction_bengali}\nProbability: {probability_bengali:.2f}%')



Bengali Prediction:
Prediction: ক্ষতিকর না
Probability: 97.38%
