In [9]:
# imports
import numpy as np
import pandas as pd
import pickle

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegressionCV
from sklearn.metrics import plot_confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline

In [3]:
# Load the data
df = pd.read_csv('clean_posts_no_names-Copy1.csv', index_col=0)
df.head()

Unnamed: 0,subreddit,title,created_utc,selftext,title_selftext,author,media_only,permalink,text_length,clean_text_stem,clean_text_lem
0,tolkienfans,What happened to Tolkien’s Red Book?,1616977706,I was wondering if somebody could clear up my ...,what happened to tolkien s red book i was wond...,lukeskinwalker69epic,False,/r/tolkienfans/comments/mfdrms/what_happened_t...,322,happen red book wonder somebodi could clear co...,happened red book wondering somebody could cle...
1,tolkienfans,What Makes People Turn Invisible When the Put ...,1616971688,I guess this is a big question because I never...,what makes people turn invisible when the put ...,Jpmaniax26,False,/r/tolkienfans/comments/mfbzjh/what_makes_peop...,252,make peopl turn invis put one ring guess big q...,make people turn invisible put one ring guess ...
2,tolkienfans,"Theory: At some point not told in the Tale, Ar...",1616971077,This is the last dialogue between Queen Undómi...,theory at some point not told in the tale arag...,yew_eater,False,/r/tolkienfans/comments/mfbsks/theory_at_some_...,6045,theori point told tale aragorn arwen must seri...,theory point told tale aragorn arwen must seri...
3,tolkienfans,I’d like to purchase a matching set of books o...,1616969977,Can anyone help me out? Looking for matching b...,i d like to purchase a matching set of books o...,GoosePants72,False,/r/tolkienfans/comments/mfbfyj/id_like_to_purc...,302,like purchas match set book everyth silmarilli...,like purchase matching set book everything sil...
4,tolkienfans,"When Faramir ‘proposes’ to Eowyn, something we...",1616966528,After he confesses his love for her and talks ...,when faramir proposes to eowyn something weird...,PhendranaDrifter,False,/r/tolkienfans/comments/mfaae8/when_faramir_pr...,731,faramir propos eowyn someth weird happen confe...,faramir proposes eowyn something weird happens...


In [4]:
# Sets up X and y
X = df['clean_text_stem']
y = df['subreddit']

In [5]:
# Splits into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    stratify=y, 
                                                    random_state=42)

In [6]:
# Instantiates pipeline
pipe = Pipeline([
    ('cvec', CountVectorizer(max_df=0.9, max_features=5000, min_df=2, ngram_range=(1,2))),
    ('nb', MultinomialNB())
])

In [7]:
# Fits the model
pipe.fit(X_train, y_train)

Pipeline(steps=[('cvec',
                 CountVectorizer(max_df=0.9, max_features=5000, min_df=2,
                                 ngram_range=(1, 2))),
                ('nb', MultinomialNB())])

In [8]:
# Evaluates the scores of the models
pipe.score(X_train, y_train), pipe.score(X_test, y_test)

(0.9523809523809523, 0.9506146672318779)

In [10]:
# Pickles the fitted model
with open('reddit_pipe.pkl', mode='wb') as pickle_out:
    pickle.dump(pipe, pickle_out)

#### Reads the pickled file and checks the prediction

In [12]:
# Opens the pickled file
with open ('reddit_pipe.pkl', mode='rb') as pickle_in:
    pipe = pickle.load(pickle_in)

In [13]:
# Checks the pipeline to make sure model is the one we want
pipe.named_steps

{'cvec': CountVectorizer(max_df=0.9, max_features=5000, min_df=2, ngram_range=(1, 2)),
 'nb': MultinomialNB()}

#### Generates a prediction

In [14]:
pipe.predict(['I knew Frodo would get to Mount Doom and destroy the ring'])

array(['tolkienfans'], dtype='<U11')

#### Generates predictions interactively

In [16]:
user_text = input('Please enter some text: ')
pipe.predict([user_text])[0]

Please enter some text:  I knew Frodo could make it


'tolkienfans'