# 1. Starting with Imports

In [None]:
# Importing pandas and fastai
from fastai.text.all import *
import pandas as pd

# Importing torch and verifying that we're using my GPU
import os
import torch

print(f"torch version:\t{torch.version.cuda}")

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

if torch.cuda.is_available():
    print(f"GPU Selected:\t{torch.cuda.get_device_name(0)}")

# 2. Loading and inspecting our Data

In [None]:
# Set the URL where the data is
url = 'https://raw.githubusercontent.com/riverar9/cuny-msds/main/data607/projects/final-project/data-mental-health-corpus/mental_health.csv'

In [None]:
# Read the data into a regular pandas dataframe
df = pd.read_csv(url)
df.head()

In [None]:
# Take a look at summary stats for our label metrics
df['label'].value_counts(
    normalize = True
)*100

From here we can see that the dataset is pretty closely split in half.

# 3. Loading the data into a data loader

In [None]:
dls = TextDataLoaders.from_df(
    df,
    valid_pct = 0.2
)

In [None]:
# Show a few entries
dls.show_batch(max_n = 2)

In [None]:
# Set up our learner
learn = text_classifier_learner(
  dls
  , AWD_LSTM
  , drop_mult=0.5
  , metrics=accuracy
)

In [None]:
# Start the learning process.
learn.fine_tune(3, 1e-2)

In [None]:
# Show the data with the latest classifications
learn.show_results(max_n = 2)

# 4 Running some quick tests

In [None]:
# Creating a function to do pre-model processing
import re

def clean_text_input(input_text : str) -> str:
    return re.sub(
        r'[^a-zA-Z]'
        , ''
        , input_text
    ).lower()

In [None]:
# Run a few test predicitons on new data
quick_tests = [
    "life isn't worth living",
    "It's all looking good from here!",
    "I love myself",
    "Tetris Effect. Unlimited replay ability. Amazing music and flow states. Psychedelic and interesting visuals. Most satisfying gameplay.",
    "Oblivion. It was my introduction to the series and rpgs as a whole. There was so many things that blew my mind and I honestly think oblivion was one of the games for the time to push technological limits."
]

for each_test in quick_tests:
  e_pred_positive = round(100*each_test[2][0].item(),2)
  e_pred_negative = round(100*each_test[2][1].item(),2)
  print(f'"{each_test}"\n\tPositive: {e_pred_positive} %\n\tNegative: {e_pred_negative} %')

## Exporting the model

In [None]:
learn.export('minima/mental-health-model.pkl')