# Habit Recommendation System - Team Pufferfish

## Import libraries

In [1]:
# Data Analysis Libraries
import pandas as pd
import numpy as np

# For text processing
from nltk.stem.wordnet import WordNetLemmatizer
import string
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer

# For pretrained sentence processing model (based on HuggingFace)
from sentence_transformers import SentenceTransformer, util
import torch

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\jpnor\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\jpnor\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\jpnor\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


## Table of Habits
- Name: Name of Habit
- Description: Short Description shown to end user
- Keywords: List of keywords related to Habit (words related to someone who is struggling and would benefit from starting the habit + some benefits)
- Resources: Link to a resource for learnning more about the habit
- Image: Image shown to user
- Order: Order of habits (to be used potentially for historical recommendations)
- Prompt (Description/Keywords): GPT-3 prompt used to generate- Prompt (Description/Keywords): GPT-3 prompt used to generate 
- Keywords_filtered: The cleaned set of keywords - used in model. (done beforehand to save time)

In [5]:
habit_table = pd.read_csv("Habits_Cleaned_V1.csv").drop("Unnamed: 0", axis=1)
habit_table.head()

Unnamed: 0,Name,Description,Keywords,Resources,Image,Order,Prompt (Description),Prompt (Keywords),Keywords_filtered
0,Keep a Gratitude Journal,A gratitude journal can help you keep track an...,"Stressed, anxious, overwhelmed, negative, pess...",https://positivepsychology.com/gratitude-journ...,https://positive.b-cdn.net/wp-content/uploads/...,1,"Give a brief, 1-2 sentence explanation on Keep...",Give me about 40 keywords describing the feeli...,jealous grateful positive negative mindful def...
1,Drink Water Regularly,"Water helps regulate body temperature, transpo...","Dehydrated, thirsty, dry, fatigued, exhausted,...",https://www.health.harvard.edu/staying-healthy...,https://domf5oio6qrcr.cloudfront.net/medialibr...,2,"Give a brief, 1-2 sentence explanation on Drin...",Give me about 40 keywords describing the feeli...,skin tired bad forgetful unproductive healthy ...
2,Keep Track of your Goals,Keeping track of your goals can help you stay ...,"Unfocused, unmotivated, uncertain, directionle...",https://iulianionescu.com/blog/track-your-goals/,https://www.kathkyle.com/wp-content/uploads/20...,3,"Give a brief, 1-2 sentence explanation on Keep...",Give me about 40 keywords describing the feeli...,distracted scattered forgetful productivity de...
3,Consume Motivational Content,Following or consuming motivational content ca...,"Unmotivated, stagnant, directionless, discoura...",https://due.com/10-incredible-sources-for-moti...,https://media.cnn.com/api/v1/images/stellar/pr...,4,"Give a brief, 1-2 sentence explanation on Cons...",Give me about 40 keywords (NOT IN A LIST) desc...,medium distracted lack negative hopeful depres...
4,Excersise,Regular exercise has numerous physical and men...,"Lethargic, out of shape, tired, weak, low stam...",https://www.mayoclinic.org/healthy-lifestyle/f...,https://health.clevelandclinic.org/wp-content/...,5,"Give a brief, 1-2 sentence explanation on Exce...",Give me about 40 keywords (NOT IN A LIST) desc...,inflammation immune increased function tired m...


## Cleaning User prompt
The function below will clean the user prompt

In [6]:
def text_process(mess, stem = False, lem = True):
    """
    Takes in a string of text, then performs the following:
    1. Remove all punctuation
    2. Remove all stopwords
    3. Remove all numbers
    4. Remove all words with character len <= 2
    5. If stem = True, Stem the words
    6. If lem = True, Lemmanize the words
    7. Remove duplicate words in the set
    """
    stemmer = PorterStemmer()
    lemmatizer = WordNetLemmatizer()
    # Check characters to see if they are in punctuation
    nopunc = [char.lower() if char not in string.punctuation else ' ' for char in mess]

    # Join the characters again to form the string.
    nopunc = ''.join(nopunc)
    
    # Remove any stopwords, numbers and strings with less than 3 characters
    words = [word for word in nopunc.split() if word.lower() not in stopwords.words('english') and not word.isnumeric() and len(word) > 2]
    
    # Stem the words
    if stem:
      stemmed_words = []
      for word in words:
        stemmed_words.append(stemmer.stem(word))
      words = stemmed_words

    # Lemmatize the words
    if lem:
      lemmed_words = []
      for word in words:
        lemmed_words.append(lemmatizer.lemmatize(word))
      words = lemmed_words
    
    return list(set(words)) # remove duplicate words and return

## Loading and Testing the model
The model is loaded from the folder in this github repository. Modify query_sentence to test different prompts. Some example prompts are included below.

In [7]:
query_sentence = """I think im fat and unhealthy"""

"""I am depressed with my current situation"""

"""I feel unhealthy, I feel tired and my body hurts"""


"""I cant focus"""


"""
I don't feel challenged in my life, and that is affecting my motivation significantly
"""

"""
I cant seem to focus at all, and its really frusturating. Whenever i sit down to work, I get distracted by something, whether 
its instagram on my phone or going on reddit. it makes me sad:(
"""


"""
I want to do something in my life to push me out of my comfort zone and challenge myself to improve

"""


"""
I feel unfufilled in my life, i dont see any challenge and my day-to-day activities are boring and unsatisfying.

"""

# The processed sentence
filtered_sentence = " ".join(text_process(query_sentence))
print("Original: "+ query_sentence)
print("Processed: "+ filtered_sentence)

Original: I think im fat and unhealthy
Processed: unhealthy fat think


In [10]:
model = SentenceTransformer('Model')
corpus_embeddings = model.encode(habit_table.Keywords_filtered.values, convert_to_tensor=True)
query_embedding = model.encode(query_sentence, convert_to_tensor=True)

# We use cosine-similarity and torch.topk to find the highest 5 scores
cos_scores = util.pytorch_cos_sim(query_embedding, corpus_embeddings)[0]
top_results = torch.topk(cos_scores, k=5)[1].numpy().tolist() # indices of top score in df
results = habit_table[['Name', 'Description', 'Resources', 'Image']].iloc[top_results] # dataframe of results

# print(cos_scores.numpy().tolist()) 
# Use this to potentially determine habits based on historical logs. take avg of all cos_scores for logs, suggest top 5

# Show results
print("Query:", query_sentence)
print("\nRecommendations:")
results.head()

Query: I think im fat and unhealthy

Recommendations:


Unnamed: 0,Name,Description,Resources,Image
21,Develop Healthy Eating Habits,"Developing healthy eating habits, like eating ...",PLACEHOLDER,PLACEHOLDER
8,Limit your Social Media Consumption,Limiting your social media consumption can hel...,https://www.helpguide.org/articles/mental-heal...,https://www.helpguide.org/wp-content/uploads/2...
4,Excersise,Regular exercise has numerous physical and men...,https://www.mayoclinic.org/healthy-lifestyle/f...,https://health.clevelandclinic.org/wp-content/...
17,Practice Good Hygiene,"Practicing good hygiene, such as washing your ...",PLACEHOLDER,PLACEHOLDER
12,Get Enough Sleep,Getting enough sleep is essential for optimal ...,PLACEHOLDER,PLACEHOLDER


In [11]:
# JSON Form
results_json = {
    "Habits": list(results.Name.values), # Length: 5. Contains habit name
    "Description": list(results.Description.values), # Length: 5. Contains habit description
    "Resources": list(results.Resources.values), # Length: 5. Contains urls to habit resource
    "Images": list(results.Image.values), # Length: 5. Contains image urls
    "Rankings": cos_scores.numpy().tolist() # Length: 25 (25 total habits)
    # A list containing the cosine similarity scores for each habit. Can be used to recommend habits based on historical logs.
}
print(results_json)

{'Habits': ['Develop Healthy Eating Habits', 'Limit your Social Media Consumption', 'Excersise', 'Practice Good Hygiene', 'Get Enough Sleep'], 'Description': ['Developing healthy eating habits, like eating a variety of fruits, vegetables, and whole grains, can give you more energy, make you feel better, and reduce the chance of getting sick.', 'Limiting your social media consumption can help reduce the negative effects of excessive screen time, such as anxiety, depression, and poor sleep, and free up time and mental space for more productive and fulfilling activities.', 'Regular exercise has numerous physical and mental health benefits, including improving cardiovascular health, building strength and endurance, reducing the risk of chronic diseases, enhancing mood and cognitive function, and promoting better sleep.', 'Practicing good hygiene, such as washing your hands regularly and showering daily, can prevent the spread of illness, promote physical health, and improve self-esteem and