# Creating a Stress Detection Tool using Data From Subreddits: Modeling

#### Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
import pickle

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression

In [2]:
import warnings
warnings.filterwarnings("ignore")

#### Import dataframe from pickle

In [3]:
df = pd.read_pickle('df.pickle')

#### Define x and y

In [4]:
x=df['text']
y=df['stress_label']

#### Define vectorizer, stopwords

In [5]:
vect = TfidfVectorizer(stop_words="english")

#### Train/test split x and y

In [6]:
X_tfidf = vect.fit_transform(x)

In [7]:
x_train, x_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.33, random_state=42)

#### Logistic Regression

In [8]:
m2 = LogisticRegression()
m2.fit(x_train, y_train)
score = m2.score(x_test,y_test)
y_pred = m2.predict(x_test)

#### Creating the function

In [9]:
def stress_test_demo(x):
        
    p1 = vect.transform([user_input]).toarray()
    op1 = m2.predict(p1)
    print("Using the Logistic Regression model...")
    if op1 == 'stress':
        print("...This input indicated that the user is stressed\n")
    if op1 == 'no stress':
        print("...This input indicated that the user is NOT stressed\n")

#### More testing!

In [10]:
user_input = input("Enter a sentence: ")
stress_test_demo(user_input)

Enter a sentence: Every day I feel like I'm having a panic attack
Using the Logistic Regression model...
...This input indicated that the user is stressed



In [11]:
user_input = input("Enter a sentence: ")
stress_test_demo(user_input)

Enter a sentence: Long story short, I don't want to give up
Using the Logistic Regression model...
...This input indicated that the user is NOT stressed



In [12]:
user_input = input("Enter a sentence: ")
stress_test_demo(user_input)

Enter a sentence: I don't really know how I feel, any advice would be greatly appreciated
Using the Logistic Regression model...
...This input indicated that the user is stressed



In [13]:
user_input = input("Enter a sentence: ")
stress_test_demo(user_input)

Enter a sentence: You're welcome, don't give up!
Using the Logistic Regression model...
...This input indicated that the user is NOT stressed



In [14]:
user_input = input("Enter a sentence: ")
stress_test_demo(user_input)

Enter a sentence: I don't know anymore
Using the Logistic Regression model...
...This input indicated that the user is stressed



#### Conclusion
* Overall the model is pretty good at identifying stress vs. no stress in comments, especially when the input includes keywords from the bigrams and trigrams found in each category