# SVM Simulation

## Data Preparation

In [1]:
# add required libraries
import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.preprocessing import LabelEncoder
from collections import defaultdict
from nltk.corpus import wordnet as wn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import model_selection, naive_bayes, svm
from sklearn.metrics import accuracy_score

In [7]:
!pip install -q nltk
import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [2]:
# set random seed to reproduce the same result every time
np.random.seed(500)

In [14]:
# import the corpus
Corpus = pd.read_csv("500_Reddit_users_posts_labels.csv", encoding='latin-1')

## Data Preprocessing

### Tokenization & Word Stemming/Lemmatization

In [16]:
# Step - a : Remove blank rows if any.
Corpus['Post'].dropna(inplace=True)

# Step - b : Change all the text to lower case. This is required as python interprets 'dog' and 'DOG' differently
Corpus['Post'] = [entry.lower() for entry in Corpus['Post']]

# Step - c : Tokenization : In this each entry in the corpus will be broken into set of words
Corpus['Post']= [word_tokenize(entry) for entry in Corpus['Post']]

# Step - d : Remove Stop words, Non-Numeric and perfom Word Stemming/Lemmenting.
# WordNetLemmatizer requires Pos tags to understand if the word is noun or verb or adjective etc. By default it is set to Noun
tag_map = defaultdict(lambda : wn.NOUN)
tag_map['J'] = wn.ADJ
tag_map['V'] = wn.VERB
tag_map['R'] = wn.ADV
for index,entry in enumerate(Corpus['Post']):
    # Declaring Empty List to store the words that follow the rules for this step
    Final_words = []
    # Initializing WordNetLemmatizer()
    word_Lemmatized = WordNetLemmatizer()
    # pos_tag function below will provide the 'tag' i.e if the word is Noun(N) or Verb(V) or something else.
    for word, tag in pos_tag(entry):
        # Below condition is to check for Stop words and consider only alphabets
        if word not in stopwords.words('english') and word.isalpha():
            word_Final = word_Lemmatized.lemmatize(word,tag_map[tag[0]])
            Final_words.append(word_Final)
    # The final processed set of words for each iteration will be stored in 'text_final'
    Corpus.loc[index,'text_final'] = str(Final_words)

In [17]:
!pip install -q tabulate

import pandas as pd
from tabulate import tabulate

# display a few rows of the preprocessed text
print(tabulate(Corpus.head(), headers='keys', tablefmt='psql'))

+----+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

## Split dataset

In [37]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(Corpus['text_final'],Corpus['Label'],test_size=0.3, random_state=42)

### Encoding

In [38]:
Encoder = LabelEncoder()
y_train = Encoder.fit_transform(y_train)
y_test = Encoder.fit_transform(y_test)

### Word Vectorization

In [40]:
Tfidf_vect = TfidfVectorizer(max_features=5000)
Tfidf_vect.fit(Corpus['text_final'])
Train_X_Tfidf = Tfidf_vect.transform(X_train)
Test_X_Tfidf = Tfidf_vect.transform(X_test)

In [41]:
# see the vocabulary that it has learned from the corpus
print(Tfidf_vect.vocabulary_)



## Multi-class Classification (One vs Rest)

In [21]:
!pip install -q scikit-learn

In [24]:
# import necessary libraries
import matplotlib.pyplot as plt
import numpy as np
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import ConfusionMatrixDisplay
from mlxtend.plotting import plot_decision_regions

In [32]:
# save the preprocessed text data into a new file for reusability
Corpus.to_csv('500_Reddit_users_posts_labels_preprocessed.csv', index=False)

In [33]:
# load the preprocessed text data
Corpus = pd.read_csv("500_Reddit_users_posts_labels_preprocessed.csv", encoding='latin-1')

In [34]:
# create an instance of the SVM model
svm = LinearSVC(random_state=42)

In [35]:
# make SVM an OvR classifier
ovr_classifier = OneVsRestClassifier(svm)

In [36]:
# fit the data to the OvR classifier
ovr_classifier = ovr_classifier.fit(X_train, y_train)

ValueError: could not convert string to float: "['dont', 'know', 'dbt', 'would', 'mind', 'explain', 'little', 'bit', 'really', 'sorry', 'hear', 'someone', 'suppose', 'help', 'actually', 'really', 'mean', 'thats', 'definitely', 'right', 'fair', 'people', 'person', 'talk', 'im', 'honestly', 'sure', 'shun', 'sound', 'like', 'dont', 'know', 'either', 'im', 'sure', 'would', 'anything', 'plan', 'sound', 'really', 'pain', 'make', 'sure', 'understand', 'ish', 'cut', 'arm', 'total', 'im', 'tire', 'concerned', 'safety', 'symptom', 'wake', 'life', 'bad', 'dream', 'sound', 'like', 'tire', 'scared', 'hit', 'rock', 'bottom', 'also', 'pleasant', 'thing', 'experience', 'dbt', 'lady', 'one', 'conduct', 'therapy', 'session', 'sound', 'like', 'feel', 'really', 'scar', 'much', 'want', 'kill', 'im', 'concern', 'safety', 'especially', 'since', 'say', 'youve', 'carry', 'plan', 'suicide', 'chance', 'could', 'talk', 'bet', 'would', 'make', 'really', 'tire', 'day', 'row', 'lot', 'day', 'work', 'hear', 'youre', 'feel', 'lot', 'well', 'ever', 'need', 'support', 'always', 'message', 'like', 'say', 'schedule', 'really', 'sporadic', 'get', 'time', 'share', 'thought', 'feeling', 'youve', 'share', 'therapist', 'care', 'coordinator', 'even', 'though', 'care', 'coordinator', 'mean', 'would', 'hope', 'would', 'willing', 'help', 'super', 'concern', 'safety', 'right', 'youve', 'say', 'youve', 'reopen', 'wound', 'bleed', 'want', 'keep', 'talk', 'im', 'concerned', 'wound', 'open', 'pick', 'scab', 'skin', 'knife', 'something', 'similar', 'hear', 'hard', 'almost', 'impossible', 'try', 'concern', 'want', 'keep', 'talk', 'long', 'possible', 'hate', 'manager', 'really', 'suck', 'c', 'use', 'manager', 'felt', 'tired', 'frustrate', 'best', 'could', 'sound', 'like', 'good', 'time', 'sound', 'like', 'youre', 'feeling', 'refresh', 'understand', 'would', 'hard', 'talk', 'one', 'listen', 'easy', 'feel', 'ignored', 'dont', 'really', 'know', 'youve', 'do', 'know', 'hard', 'meet', 'new', 'people', 'also', 'kind', 'feel', 'way', 'alone', 'world', 'think', 'thats', 'need', 'sometimes', 'hard', 'feel', 'alone', 'take', 'care', 'try', 'tell', 'feel', 'shes', 'mean', 'well', 'job', 'help', 'might', 'even', 'realize', 'shes', 'mean', 'make', 'sense', 'tell', 'sometimes', 'make', 'feel', 'like', 'vermin', 'maybe', 'realize', 'do', 'wrong', 'thing', 'knife', 'still', 'like', 'say', 'really', 'concern', 'safety', 'want', 'keep', 'talk', 'dont', 'want', 'talk', 'anymore', 'need', 'go', 'sleep', 'im', 'worry', 'knife', 'might', 'get', 'way', 'stay', 'safe', 'talk', 'really', 'want', 'listen', 'hear', 'feel', 'dont', 'deserve', 'feel', 'everyone', 'deserve', 'listening', 'ear', 'need', 'thank', 'honest', 'sorry', 'hear', 'bad', 'night', 'life', 'tired', 'frustrating', 'hope', 'youre', 'hold', 'glad', 'know', 'schedule', 'pretty', 'fuck', 'want', 'listen', 'thank', 'compliment', 'think', 'pretty', 'amazing', 'youre', 'willing', 'honest', 'open', 'internet', 'stranger', 'didnt', 'want', 'think', 'disappear', 'middle', 'conversation', 'really', 'like', 'name', 'hope', 'thats', 'really', 'cute', 'sound', 'like', 'shes', 'good', 'kitty', 'able', 'open', 'people', 'around', 'may', 'important', 'sound', 'tire', 'lonely', 'think', 'youre', 'strong', 'take', 'care', 'since', 'worry', 'safety', 'im', 'wonder', 'there', 'service', 'area', 'could', 'reach', 'help', 'burden', 'im', 'sure', 'see', 'therapist', 'go', 'doctor', 'would', 'option', 'im', 'worry', 'something', 'might', 'happen', 'hope', 'alive', 'wont', 'anyone', 'care', 'anymore', 'run', 'buy', 'cigarette', 'im', 'back', 'get', 'little', 'headbutt', 'kitty', 'thats', 'enough', 'know', 'feel', 'sound', 'like', 'spasm', 'tire', 'pain', 'cant', 'imagine', 'like', 'seem', 'tired', 'scary', 'people', 'talk', 'life', 'know', 'spasm', 'like', 'say', 'sound', 'tired', 'scary', 'like', 'something', 'would', 'tire', 'difficult', 'deal', 'im', 'concern', 'wound', 'bleed', 'little', 'lot', 'like', 'cat', 'already', 'king', 'isnt', 'really', 'noisy', 'he', 'definitely', 'asshole', 'live', 'name', 'haha', 'thing', 'love', 'animal', 'much', 'always', 'seem', 'know', 'im', 'sad', 'want', 'comfort', 'experience', 'youve', 'wonder', 'hope', 'would', 'miss', 'go', 'hear', 'hope', 'great', 'support', 'right', 'think', 'might', 'worth', 'find', 'support', 'shes', 'still', 'alive', 'sound', 'stressful', 'mom', 'look', 'new', 'job', 'know', 'kind', 'silly', 'favorite', 'app', 'game', 'like', 'play', 'mention', 'like', 'strategy', 'think', 'might', 'like', 'im', 'addict', 'game', 'find', 'distraction', 'help', 'know', 'beginning', 'conversation', 'hear', 'lot', 'pain', 'im', 'wonder', 'feel', 'sorry', 'take', 'long', 'reply', 'schedule', 'tendency', 'really', 'sporadic', 'wow', 'yeah', 'say', 'make', 'feel', 'like', 'youre', 'dying', 'sound', 'scary', 'dont', 'know', 'severe', 'sound', 'tire', 'severe', 'im', 'worried', 'something', 'might', 'happen', 'youre', 'spasm', 'cat', 'would', 'worry', 'know', 'say', 'didnt', 'really', 'work', 'try', 'talk', 'people', 'person', 'life', 'would', 'comfortable', 'reach', 'sound', 'like', 'youre', 'incredibly', 'brave', 'deal', 'something', 'scary', 'completely', 'feel', 'really', 'weird', 'call', 'cat', 'cat', 'name', 'cat', 'name', 'king', 'haha', 'sound', 'like', 'tire', 'beneficial', 'sound', 'like', 'fun', 'also', 'sound', 'like', 'tire', 'tough', 'time', 'ahead', 'sound', 'kind', 'intimidate', 'honestly', 'sure', 'amp', 'e', 'procedure', 'might', 'sound', 'really', 'frustrating', 'understand', 'feel', 'like', 'dont', 'want', 'help', 'sound', 'like', 'youre', 'exhaustion', 'get', 'runaround', 'amp', 'e', 'stuff', 'like', 'hear', 'youre', 'pain', 'exhaustion', 'much', 'want', 'kill', 'say', 'theres', 'part', 'still', 'want', 'help', 'definitely', 'see', 'part', 'part', 'lead', 'post', 'talk', 'people', 'thread', 'cant', 'guarantee', 'wont', 'notice', 'wound', 'think', 'youve', 'already', 'waited', 'month', 'get', 'appointment', 'show', 'would', 'give', 'part', 'want', 'live', 'chance', 'still', 'tire', 'concern', 'safety', 'especially', 'since', 'say', 'wound', 'infect', 'would', 'mind', 'talk', 'plan', 'bit', 'wound', 'rejection', 'tired', 'hard', 'thing', 'deal', 'understand', 'say', 'anything', 'really', 'make', 'feel', 'like', 'youre', 'put', 'limb', 'whatever', 'happen', 'long', 'time', 'ago', 'one', 'therapist', 'manage', 'talk', 'session', 'sound', 'really', 'unfair', 'would', 'still', 'hold', 'im', 'naturally', 'concern', 'thank', 'honest', 'youve', 'go', 'think', 'take', 'lot', 'courage', 'even', 'post', 'internet', 'talk', 'suicidal', 'thought', 'im', 'really', 'glad', 'willing', 'talk', 'really', 'dont', 'know', 'word', 'change', 'anything', 'either', 'want', 'offer', 'support', 'want', 'stop', 'talk', 'time', 'understand', 'also', 'want', 'know', 'stop', 'talk', 'still', 'pm', 'time', 'might', 'take', 'hot', 'minute', 'get', 'back', 'sound', 'tired', 'frustrating', 'especially', 'since', 'quit', 'job', 'ask', 'day', 'option', 'people', 'doesnt', 'sound', 'like', 'much', 'option', 'think', 'animal', 'understand', 'people', 'lot', 'sometimes', 'people', 'strange', 'might', 'sound', 'think', 'cat', 'really', 'great', 'support', 'understand', 'would', 'love', 'cat', 'much', 'think', 'suicide', 'time', 'sound', 'really', 'exhaust', 'mention', 'spasm', 'sound', 'really', 'stressful', 'something', 'life', 'cause', 'want', 'kill', 'think', 'pretty', 'fun', 'admit', 'everyone', 'yeah', 'thing', 'kind', 'spring', 'really', 'hard', 'deal', 'yeah', 'seem', 'like', 'make', 'feel', 'pain', 'would', 'make', 'feel', 'empty', 'work', 'sound', 'tired', 'strenuous', 'sorry', 'hear', 'sock', 'pass', 'away', 'know', 'animal', 'different', 'glad', 'hear', 'love', 'cat', 'love', 'cat', 'sound', 'like', 'youve', 'go', 'many', 'emotion', 'concern', 'safety', 'feel', 'lonely', 'thing', 'try', 'keep', 'mind', 'thing', 'sound', 'like', 'inability', 'meet', 'people', 'source', 'great', 'pain', 'frustration', 'cause', 'feel', 'tired', 'alone', 'thats', 'tire', 'understandable', 'im', 'glad', 'post', 'talk', 'even', 'though', 'pain', 'isolated', 'okay', 'didnt', 'mean', 'worry', 'tell', 'scar', 'sound', 'like', 'symptom', 'go', 'arent', 'good', 'im', 'paramedic', 'anything', 'dont', 'take', 'word', 'would', 'say', 'dbt', 'work', 'say', 'one', 'mean', 'available', 'someone', 'position', 'suppose', 'help', 'mean', 'pretty', 'disgust', 'imo', 'dont', 'deserve', 'someone', 'mean', 'vulnerable', 'sound', 'like', 'really', 'love', 'cat', 'totally', 'understand', 'love', 'love', 'cat', 'dont', 'think', 'youre', 'crazy', 'concern', 'cat', 'also', 'sound', 'like', 'youre', 'lot', 'pain', 'dont', 'mind', 'share', 'want', 'talk', 'im', 'really', 'curious', 'whats', 'mind', 'lately']"