## Therapy Chatbot

### Libraries and Utilities

In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import nltk
from nltk.corpus import stopwords
nltk.download("stopwords") 
from nltk.stem.wordnet import WordNetLemmatizer


from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

from wordcloud import WordCloud, STOPWORDS

import os
for dirname, _, filenames in os.walk('datasets'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

datasets\Sheet_1.csv
datasets\Sheet_2.csv


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\abhil\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Loading Data

In [8]:
data = pd.read_csv(r"datasets/Sheet_1.csv",encoding= "latin1" )
data.drop(["Unnamed: 3","Unnamed: 4","Unnamed: 5",
           "Unnamed: 6","Unnamed: 7",], axis = 1, inplace =True)
data = pd.concat([data["class"],data["response_text"]], axis = 1)

data.dropna(axis=0, inplace =True)
data.head(10)

Unnamed: 0,class,response_text
0,not_flagged,I try and avoid this sort of conflict
1,flagged,Had a friend open up to me about his mental ad...
2,flagged,I saved a girl from suicide once. She was goin...
3,not_flagged,i cant think of one really...i think i may hav...
4,not_flagged,Only really one friend who doesn't fit into th...
5,not_flagged,a couple of years ago my friends was going to ...
6,flagged,Roommate when he was going through death and l...
7,flagged,i've had a couple of friends (you could say mo...
8,not_flagged,Listened to someone talk about relationship tr...
9,flagged,I will always listen. I comforted my sister wh...


### 0 to Not Flagged and 1 to Flagged

In [9]:
data["class"] = [1 if each == "flagged" else 0 for each in data["class"]]
data.head()

Unnamed: 0,class,response_text
0,0,I try and avoid this sort of conflict
1,1,Had a friend open up to me about his mental ad...
2,1,I saved a girl from suicide once. She was goin...
3,0,i cant think of one really...i think i may hav...
4,0,Only really one friend who doesn't fit into th...


In [10]:
data.response_text[16]

'I have helped advise friends who have faced circumstances similar to mine'

## Regular Expression
#### We can remove non-letter characters in our text with Regular Expression method.
#### The lower() methods returns the lowercased string from the given string. It converts all uppercase characters to lowercase. If no uppercase characters exist, it returns the original string.

In [13]:
first_text = data.response_text[16]
text = re.sub("[^a-zA-Z]"," ",first_text)
text = text.lower()
print(text)

i have helped advise friends who have faced circumstances similar to mine


### Irrelevant Words (Stopwords)

In [14]:
text = nltk.word_tokenize(text)
text = [ word for word in text if not word in set(stopwords.words('english'))]
print(text)

['helped', 'advise', 'friends', 'faced', 'circumstances', 'similar', 'mine']


### Lemmatization

In [18]:
lemmatizer = WordNetLemmatizer()
text = [(lemmatizer.lemmatize(lemmatizer.lemmatize(lemmatizer.lemmatize(word,'n'),pos = 'v'),pos = 'a')) for word in text]
print(text)

['help', 'advise', 'friend', 'face', 'circumstance', 'similar', 'mine']


### All Words

In [19]:
description_list = []
for description in data.response_text:
       
    description = re.sub("[^a-zA-Z]"," ",description)
    description = description.lower() 
    
    description = nltk.word_tokenize(description)
    description = [ word for word in description if not word in set(stopwords.words("english"))]
    
    lemmatizer = WordNetLemmatizer()
    description = (lemmatizer.lemmatize(lemmatizer.lemmatize(lemmatizer.lemmatize(word, "n"),pos = "v"),pos="a") for word in description)
    
    description = " ".join(description)
    description_list.append(description)

In [20]:
description_list[16]

'help advise friend face circumstance similar mine'

### Bag of Words

In [23]:
max_features = 100
count_vectorizer = CountVectorizer(max_features = max_features)
sparce_matrix = count_vectorizer.fit_transform(description_list).toarray()
print("Top {} Most Used Words: {}".format(max_features,count_vectorizer.get_feature_names()))

Top 100 Most Used Words: ['addiction', 'advice', 'alone', 'always', 'anxiety', 'anything', 'back', 'best', 'bring', 'call', 'care', 'come', 'comfort', 'could', 'deal', 'depression', 'describe', 'dont', 'end', 'even', 'everything', 'experience', 'face', 'feel', 'find', 'friend', 'get', 'gf', 'girl', 'girlfriend', 'give', 'go', 'good', 'grade', 'happen', 'help', 'helpful', 'issue', 'kid', 'kill', 'know', 'last', 'let', 'life', 'like', 'listen', 'little', 'look', 'lot', 'make', 'many', 'may', 'much', 'need', 'never', 'night', 'offer', 'often', 'one', 'open', 'others', 'people', 'person', 'personal', 'pretty', 'problem', 'really', 'relationship', 'say', 'school', 'see', 'self', 'severe', 'share', 'shit', 'similar', 'simply', 'situation', 'someone', 'sometimes', 'start', 'struggle', 'stuff', 'suicide', 'support', 'talk', 'tell', 'think', 'though', 'time', 'trouble', 'try', 'use', 'want', 'way', 'week', 'well', 'work', 'would', 'year']


### Naive Bayes

In [22]:
y = data.iloc[:,0].values
x = sparce_matrix

NameError: name 'sparce_matrix' is not defined