## Import necessary libraries

In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.preprocessing import FunctionTransformer
import pandas as pd
import joblib

## Load the data and remove empty values

Using [this](https://www.kaggle.com/cosmos98/twitter-and-reddit-sentimental-analysis-dataset) data

In [22]:
r_df = pd.read_csv('../input/twitter-and-reddit-sentimental-analysis-dataset/Reddit_Data.csv', 
                   names=['text', 'label'], header=0)
t_df = pd.read_csv('../input/twitter-and-reddit-sentimental-analysis-dataset/Twitter_Data.csv',
                  names=['text', 'label'], header=0)

df = pd.concat([r_df, t_df])

df = df.dropna()

df['label'] = df['label'].astype(int)

print(df.shape, r_df.shape, t_df.shape)

df.head()

(200118, 2) (37249, 2) (162980, 2)


Unnamed: 0,text,label
0,family mormon have never tried explain them t...,1
1,buddhism has very much lot compatible with chr...,1
2,seriously don say thing first all they won get...,-1
3,what you have learned yours and only yours wha...,0
4,for your own benefit you may want read living ...,1


## Create a pipeline that will be used for our sentiment model training

In [20]:
pred_pipeline = Pipeline([
    ('features', TfidfVectorizer(max_features=200000)),
    ('classifier', MultinomialNB())])

## Train the model with our datta

In [23]:
model = pred_pipeline.fit(df['text'], df['label'])

## Predict sentiment from text

In [30]:
y_pred = model.predict(['when the sun shine we shine together'])
print(y_pred)

[1]


## Save trained model

In [28]:
joblib.dump(model, 'sentiment_model.save')

['sentiment_model.save']