# Preprocessing & Flagging Tweets as either
1. Meditation
2. Therapy
3. Exercise

In [1]:
import tweepy as tw
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 

In [2]:
# pip install tweepy
# pip install textblob

In [3]:
df=pd.read_csv("data/mental_health_tweets2.csv")

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,text
0,0,A way for people to place problems in better p...
1,1,Are you ready to unlock your full potential an...
2,2,Setting yearly short term goals and meeting th...
3,3,Sometimes you have to ask yourself is it worth...
4,4,Having a rough day? #BeKind to your mind.. #Jo...


In [274]:
meditation_refs=["yoga","meditation","meditate","breathwork","yoga", "mindfulness", "mindful", "mantra", "wim hoff", "wimhoff" "pranayama", "vipassana", "samatha", "headspace", "insight timer"]
therapy_refs=["therapy", "counseling", "psychotherapy", "therapist", "dialectical behavioral", "cognitive behavioral"]
workout_refs =["workout","exercise","exercising","weight-training", "weight training", "running", "lifting", "cardio","fitness", "marathon"]

In [275]:
def identify_subject(tweet,refs):
    flag = 0
    for ref in refs:
        if tweet.find(ref) !=-1:
            flag = 1
    return flag
df["meditation"]= df["text"].apply(lambda x: identify_subject(x.lower(), meditation_refs))
df["therapy"]= df["text"].apply(lambda x: identify_subject(x.lower(), therapy_refs))
df["workout"]= df["text"].apply(lambda x: identify_subject(x.lower(), workout_refs))

In [276]:
df.head()

Unnamed: 0.1,Unnamed: 0,text,meditation,therapy,workout,Processed_tweet
0,0,A way for people to place problems in better p...,0,0,0,A way people place problem better perspective ...
1,1,Are you ready to unlock your full potential an...,0,0,0,Are ready unlock full potential achieve goals?...
2,2,Setting yearly short term goals and meeting th...,0,0,0,Setting yearly short term goal meeting positiv...
3,3,Sometimes you have to ask yourself is it worth...,0,0,0,Sometimes ask worth it. #MentalHealthAwareness...
4,4,Having a rough day? #BeKind to your mind.. #Jo...,0,0,0,Having rough day? #BeKind mind.. #JoyTrain #Se...


In [277]:
print(f"Meditation Tweets: {len(df.loc[df['meditation'] == 1])}")
print(f"Therapy Tweets: {len(df.loc[df['therapy'] == 1])}")
print(f"Workout Tweets: {len(df.loc[df['workout'] == 1])}")

Meditation Tweets: 588
Therapy Tweets: 759
Workout Tweets: 255


In [278]:
import nltk
from nltk.corpus import stopwords
from textblob import Word, TextBlob

In [279]:
nltk.download("stopwords")
nltk.download("wordnet")
stop_words = stopwords.words("english")
custom_stopwords = ["RT","#mentalhealth"]
nltk.download('omw-1.4')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/joezoll/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/joezoll/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /Users/joezoll/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [280]:
def preprocess_tweets(tweet, custom_stopwords):
    preprocessed_tweet = tweet
    preprocessed_tweet.replace('[^\w\s]','')
    preprocessed_tweet = " ".join(word for word in preprocessed_tweet.split() if word not in stop_words)
    preprocessed_tweet = " ".join(word for word in preprocessed_tweet.split() if word not in custom_stopwords)
    preprocessed_tweet = " ".join(Word(word).lemmatize()for word in preprocessed_tweet.split())
    return(preprocessed_tweet)

df["Processed_tweet"] = df['text'].apply(lambda x: preprocess_tweets(x,custom_stopwords))

In [281]:
df.head()

Unnamed: 0.1,Unnamed: 0,text,meditation,therapy,workout,Processed_tweet
0,0,A way for people to place problems in better p...,0,0,0,A way people place problem better perspective ...
1,1,Are you ready to unlock your full potential an...,0,0,0,Are ready unlock full potential achieve goals?...
2,2,Setting yearly short term goals and meeting th...,0,0,0,Setting yearly short term goal meeting positiv...
3,3,Sometimes you have to ask yourself is it worth...,0,0,0,Sometimes ask worth it. #MentalHealthAwareness...
4,4,Having a rough day? #BeKind to your mind.. #Jo...,0,0,0,Having rough day? #BeKind mind.. #JoyTrain #Se...


In [282]:
df.to_csv("data/processed_data2.csv", index=False)