In [48]:
# ! pip install -U sentence-transformers 

In [49]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/reddit-mental-health-v2/train_roberta.npy
/kaggle/input/reddit-mental-health-v2/crawl_matrix_reddit_mental_health_cut_v2.npy
/kaggle/input/reddit-mental-health-v2/crawl_oov_reddit_mental_health_cut_v3.pickle
/kaggle/input/reddit-mental-health-v2/crawl_oov_reddit_mental_health_cut.pickle
/kaggle/input/reddit-mental-health-v2/crawl_matrix_reddit_mental_health_cut.npy
/kaggle/input/reddit-mental-health-v2/test_df_cut.csv
/kaggle/input/reddit-mental-health-v2/test_df_cut_processed_v2.csv
/kaggle/input/reddit-mental-health-v2/cleaned_reddit.csv
/kaggle/input/reddit-mental-health-v2/train_df_cut.csv
/kaggle/input/reddit-mental-health-v2/test_roberta.npy
/kaggle/input/reddit-mental-health-v2/train_df_cut_processed_v2.csv
/kaggle/input/reddit-mental-health-v2/crawl_matrix_reddit_mental_health_cut_v3.npy
/kaggle/input/reddit-mental-health-v2/cleaned_reddit_lemmatized.csv
/kaggle/input/reddit-mental-health-v2/test_df_cut_processed.csv
/kaggle/input/reddit-mental-health-v2/crawl_oov

In [50]:
import random
import unidecode
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import time
import torch.nn.functional as F
from keras.preprocessing import text # depreciated?
from torch.utils.data import Dataset, DataLoader,TensorDataset
from sklearn.model_selection import train_test_split

from keras.utils import pad_sequences # new
import gc
import re
import pickle
import csv
from tqdm import tqdm
tqdm.pandas()
from gensim.models import KeyedVectors
from flashtext import KeywordProcessor

CRAWL_EMBEDDING_PATH = '../input/fasttext-crawl-300d-2m/crawl-300d-2M.vec'
PARAD_EMBEDDING_PATH = '../input/paragram-dandrocec/paragram_300_sl999.txt'
GLOVE_EMBEDDING_PATH = '../input/glove840b300dtxt/glove.840B.300d.txt'

BATCH_SIZE = 256
EPOCHS = 5
MAX_LEN = 220
NUM_MODEL = 3
SEED = 6089

In [51]:
def seed_everything(seed=SEED):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything()

In [52]:
def get_coefs(word, *arr):
    return word, np.asarray(arr, dtype='float32')

def load_embeddings(path):
    with open(path, encoding="utf8", errors='ignore') as f:
        return dict(get_coefs(*line.strip().split(' ')) for line in f)

def build_matrix(word_index, path):

    """
    https://www.kaggle.com/bminixhofer/simple-lstm-pytorch-version
    """

    embedding_index = load_embeddings(path)
    embedding_matrix = np.zeros((len(word_index) + 1, 300))
#     unknown_vector = np.zeros((300,), dtype=np.float32) - 1.
    
    unknown_words = []

    for word, i in word_index.items():
        
        if word in embedding_index:
            embedding_matrix[i] = embedding_index[word]
            continue
        if word.upper() in embedding_index:
            embedding_matrix[i] = embedding_index[word.upper()]
            continue
        if word.capitalize() in embedding_index:
            embedding_matrix[i] = embedding_index[word.capitalize()]
            continue
        if unidecode.unidecode(word) in embedding_index:
            embedding_matrix[i] = embedding_index[unidecode.unidecode(word)]
            continue
        if word.title() in embedding_index:
            embedding_matrix[i] = embedding_index[word.title()]
            continue
        word = re.sub('[0-9]', '', word)
        if word in embedding_index:
            embedding_matrix[i] = embedding_index[word]
            continue
        
#         embedding_matrix[i] = unknown_vector
        unknown_words.append(word)
            
    return embedding_matrix, unknown_words

In [53]:
def custom_loss(data, targets):

    ''' Define custom loss function for weighted BCE on 'target' column '''
    bce_loss = nn.BCELoss(weight=targets[:,1])(data[:,0],targets[:,0])
    return bce_loss

In [54]:
# SWEAR_WORDS_PATH = '../input/entxt1/en.txt'

# swear_words = []
# with open(SWEAR_WORDS_PATH, 'r') as f:
#     for token in f:
#         swear_words.append(re.sub('\n', '', token))
# swear_words.extend(['<q>', '<a>', '<s>', '<x>', '<c>', '<b>','<n>', 'trump'])


punc_sign = r"\ə\ᴵ\'∞θ÷α•à−β∅³π‘₹´°£€\×™√²—–&\\…\/\{\}\''\[\]\_\/\@\$\%\^\&\*\(\)\+\#\:\!\-\;\!\"\\(\),\.?'+`~$=|•！？。＂＃＄％＆＇（）＊＋，－／：；<>＜＝＞＠［＼］＾＿｀｛｜｝～｟｠｢｣､、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟‧﹏"


mispell_dict = {'colour': 'color', 'centre': 'center', 'favourite': 'favorite', 'travelling': 'traveling', 
                'counselling': 'counseling', 'theatre': 'theater', 'cancelled': 'canceled', 'labour': 'labor', 
                'organisation': 'organization', 'wwii': 'world war 2', 'citicise': 'criticize', 'youtu ': 'youtube ', 
                'qoura': 'quora', 'sallary': 'salary', 'Whta': 'What', 'narcisist': 'narcissist', 'howdo': 'how do', 
                'whatare': 'what are', 'howcan': 'how can', 'howmuch': 'how much', 'howmany': 'how many', 'em': 'them',
                'whydo': 'why do', 'doI': 'do I', 'theBest': 'the best', 'howdoes': 'how does', 'mastrubation': 'masturbation', 
                'mastrubate': 'masturbate', "mastrubating": 'masturbating', 'pennis': 'penis', 'etherium': 'ethereum', 
                'narcissit': 'narcissist', 'bigdata': 'big data', '2k17': '2017', '2k18': '2018', '2k19':'2019', 'qouta': 'quota', 
                'exboyfriend': 'ex boyfriend', 'airhostess': 'air hostess', "whst": 'what', 'watsapp': 'whatsapp', 
                'demonitisation': 'demonetization', 'demonitization': 'demonetization', 
                'demonetisation': 'demonetization', 'pokémon': 'pokemon', 'n*gga':'nigga', 'p*':'pussy', 
                'b***h':'bitch', 'a***h****':'asshole', 'a****le-ish':'asshole', 'b*ll-s***':'bullshit', 'd*g':'dog', 
                'st*up*id':'stupid','d***':'dick','di**':'dick',"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have",
                "couldn't": "could not", "didn't": "did not",  "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not", 
                "haven't": "have not", "he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", 
                "how's": "how is",  "I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would", 
                "i'd've": "i would have", "i'll": "i will",  "i'll've": "i will have","i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would", "it'd've": "it would have", 
                "it'll": "it will", "it'll've": "it will have","it's": "it is", "let's": "let us", "ma'am": "madam", "mayn't": "may not", "might've": "might have","mightn't": "might not",
                "mightn't've": "might not have", "must've": "must have", "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have","o'clock": "of the clock",
                "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have", "she'd": "she would", "she'd've": "she would have",
                "she'll": "she will", "she'll've": "she will have", "she's": "she is", "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have","so's": "so as",
                "this's": "this is","that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would", "there'd've": "there would have", "there's": "there is", "here's": "here is",
                "they'd": "they would", "they'd've": "they would have", "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have", "wasn't": "was not",
                "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are", "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have",
                "what're": "what are",  "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is", "where've": "where have", "who'll": "who will",
                "who'll've": "who will have", "who's": "who is", "who've": "who have", "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have", "would've": "would have",
                "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all", "y'all'd": "you all would","y'all'd've": "you all would have","y'all're": "you all are","y'all've": "you all have","you'd": "you would",
                "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have", "you're": "you are", "you've": "you have", 'colour': 'color', 'centre': 'center', 'favourite': 'favorite', 'travelling': 'traveling',
                'counselling': 'counseling', 'theatre': 'theater', 'cancelled': 'canceled', 'labour': 'labor', 'organisation': 'organization', 'wwii': 'world war 2', 'citicise': 'criticize', 'youtu ': 'youtube ', 'Qoura': 'Quora',
                'sallary': 'salary', 'Whta': 'What', 'narcisist': 'narcissist', 'howdo': 'how do', 'whatare': 'what are', 'howcan': 'how can', 'howmuch': 'how much', 'howmany': 'how many', 'whydo': 'why do', 'doI': 'do I', 'theBest': 'the best',
                'howdoes': 'how does', 'mastrubation': 'masturbation', 'mastrubate': 'masturbate', "mastrubating": 'masturbating', 'pennis': 'penis', 'Etherium': 'Ethereum', 'narcissit': 'narcissist', 'bigdata': 'big data',
                '2k17': '2017', '2k18': '2018', 'qouta': 'quota', 'exboyfriend': 'ex boyfriend', 'airhostess': 'air hostess', "whst": 'what', 'watsapp': 'whatsapp', 'demonitisation': 'demonetization',
                'demonitization': 'demonetization', 'demonetisation': 'demonetization','\u200b': ' ', '\ufeff': '', 'करना': '', 'है': '',
                'sh*tty': 'shitty','s**t':'shit',
                'nigg*r':'nigger','bulls**t':'bullshit','n*****':'nigger',
                'p*ssy':'pussy','p***y':'pussy',
                'f***':'fuck','f*^k':'fuck','f*cked':'fucked','f*ck':'fuck','f***ing':'fucking',
                'sh*t':'shit', 'su*k':'suck', 'a**holes':'assholes','a**hole':'asshole',
                'di*k':'dick', 'd*ck': 'dick', 'd**k':'dick', 'd***':'dick',
                'bull**it':'bullshit', 'c**t':'cunt', 'cu*t':'cunt', 'c*nt':'cunt','troʊl':'trool',
                'trumpian':'bombast','realdonaldtrump':'trump','drumpf':'trump','trumpist':'trump',
                "i'ma": "i am","is'nt": "is not","‘I":'I',
                'ᴀɴᴅ':'and','ᴛʜᴇ':'the','ʜᴏᴍᴇ':'home','ᴜᴘ':'up','ʙʏ':'by','ᴀᴛ':'at','…and':'and','civilbeat':'civil beat',\
                'TrumpCare':'Trump care','Trumpcare':'Trump care', 'OBAMAcare':'Obama care','ᴄʜᴇᴄᴋ':'check','ғᴏʀ':'for','ᴛʜɪs':'this','ᴄᴏᴍᴘᴜᴛᴇʀ':'computer',\
                'ᴍᴏɴᴛʜ':'month','ᴡᴏʀᴋɪɴɢ':'working','ᴊᴏʙ':'job','ғʀᴏᴍ':'from','Sᴛᴀʀᴛ':'start','gubmit':'submit','CO₂':'carbon dioxide','ғɪʀsᴛ':'first',\
                'ᴇɴᴅ':'end','ᴄᴀɴ':'can','ʜᴀᴠᴇ':'have','ᴛᴏ':'to','ʟɪɴᴋ':'link','ᴏғ':'of','ʜᴏᴜʀʟʏ':'hourly','ᴡᴇᴇᴋ':'week','ᴇɴᴅ':'end','ᴇxᴛʀᴀ':'extra',\
                'Gʀᴇᴀᴛ':'great','sᴛᴜᴅᴇɴᴛs':'student','sᴛᴀʏ':'stay','ᴍᴏᴍs':'mother','ᴏʀ':'or','ᴀɴʏᴏɴᴇ':'anyone','ɴᴇᴇᴅɪɴɢ':'needing','ᴀɴ':'an','ɪɴᴄᴏᴍᴇ':'income',\
                'ʀᴇʟɪᴀʙʟᴇ':'reliable','ғɪʀsᴛ':'first','ʏᴏᴜʀ':'your','sɪɢɴɪɴɢ':'signing','ʙᴏᴛᴛᴏᴍ':'bottom','ғᴏʟʟᴏᴡɪɴɢ':'following','Mᴀᴋᴇ':'make',\
                'ᴄᴏɴɴᴇᴄᴛɪᴏɴ':'connection','ɪɴᴛᴇʀɴᴇᴛ':'internet','financialpost':'financial post', 'ʜaᴠᴇ':' have ', 'ᴄaɴ':' can ', 'Maᴋᴇ':' make ', 'ʀᴇʟɪaʙʟᴇ':' reliable ', 'ɴᴇᴇᴅ':' need ',
                'ᴏɴʟʏ':' only ', 'ᴇxᴛʀa':' extra ', 'aɴ':' an ', 'aɴʏᴏɴᴇ':' anyone ', 'sᴛaʏ':' stay ', 'Sᴛaʀᴛ':' start', 'SHOPO':'shop','ᴀ':'A',
                'theguardian':'the guardian','deplorables':'deplorable', 'theglobeandmail':'the globe and mail', 'justiciaries': 'justiciary','creditdation': 'Accreditation',
                'doctrne':'doctrine','fentayal': 'fentanyl','designation-': 'designation','CONartist' : 'con-artist','Mutilitated' : 'Mutilated','Obumblers': 'bumblers',
                'negotiatiations': 'negotiations','dood-': 'dood','irakis' : 'iraki','cooerate': 'cooperate','COx':'cox','racistcomments':'racist comments','envirnmetalists': 'environmentalists',
                'SB91':'senate bill','tRump':'trump','utmterm':'utm term','FakeNews':'fake news','Gʀᴇat':'great','ʙᴏᴛtoᴍ':'bottom','washingtontimes':'washington times','garycrum':'gary crum','htmlutmterm':'html utm term',
                'RangerMC':'car','TFWs':'tuition fee waiver','SJWs':'social justice warrior','Koncerned':'concerned','Vinis':'vinys','Yᴏᴜ':'you',
                'trumpists': 'trump', 'trumpkins': 'trump','trumpism': 'trump','trumpsters':'trump','thedonald':'trump',
                'trumpty': 'trump', 'trumpettes': 'trump','trumpland': 'trump','trumpies':'trump','trumpo':'trump',
                'drump': 'trump', 'dtrumpview': 'trump','drumph': 'trump','trumpanzee':'trump','trumpite':'trump',
                'chumpsters': 'trump', 'trumptanic': 'trump', 'itʻs': 'it is', 'donʻt': 'do not','pussyhats':'pussy hats',
                'trumpdon': 'trump', 'trumpisms': 'trump','trumperatti':'trump', 'legalizefreedom': 'legalize freedom',
                'trumpish': 'trump', 'ur': 'you are','twitler':'twitter','trumplethinskin':'trump','trumpnuts':'trump','trumpanzees':'trump',
                'justmaybe':'just maybe','trumpie':'trump','trumpistan':'trump','trumphobic':'trump','piano2':'piano','trumplandia':'trump',
                'globalresearch':'global research','trumptydumpty':'trump','frank1':'frank','trumpski':'trump','trumptards':'trump',
                'alwaysthere':'always there','clickbait':'click bait','antifas':'antifa','dtrump':'trump','trumpflakes':'trump flakes',
                'trumputin':'trump putin','fakesarge':'fake sarge','civilbot':'civil bot','tumpkin':'trump','trumpians':'trump',
                'drumpfs':'trump','dtrumpo':'trump','trumpistas':'trump','trumpity':'trump','trump nut':'trump','tumpkin':'trump',
                'russiagate':'russia gate','trumpsucker':'trump sucker','trumpbart':'trump bart', 'trumplicrat':'trump','dtrump0':'trump',
                'tfixstupid':'stupid','brexit':'<a>','Brexit':'<a>',
               }
               

mispell_dict2 = {'americanophobia': '<q>', 'klastri':'<s>','thisisurl':'url','magaphants':'<x>','cheetolini':'<c>','daesh':'<b>',
                'trumpelthinskin':'<n>'}
emoji_re = re.compile(u'['
                        u'\U00010000-\U0010ffff' 
                        u'\U0001F600-\U0001F64F'
                        u'\U0001F300-\U0001F5FF'
                        u'\U0001F30D-\U0001F567'
                        u'\U0001F680-\U0001F6FF'
                        u'\u2122-\u2B55]', re.UNICODE)

kp = KeywordProcessor(case_sensitive=True)
                
mix_mispell_dict = {}
for k, v in mispell_dict.items():
    mix_mispell_dict[k] = v
    mix_mispell_dict[k.lower()] = v.lower()
    mix_mispell_dict[k.upper()] = v.upper()
    mix_mispell_dict[k.capitalize()] = v.capitalize()
    mix_mispell_dict[k.title()] = v.title()
    
for k, v in mix_mispell_dict.items():
    kp.add_keyword(k, v)    
    

kp2 = KeywordProcessor(case_sensitive=True)
for k, v in mispell_dict2.items():
    kp2.add_keyword(k, v)
    

def statistics_upper_words(text):
    upper_count = 0
    for token in text.split():
        if re.search(r'[A-Z]', token):
            upper_count += 1
    return upper_count

def statistics_unique_words(text):
    words_set = set()

    for token in text.split():
        words_set.add(token)

    return len(words_set)

def statistics_characters_nums(text):

    chars_set = set()

    for char in text:
        chars_set.add(char)
    
    return len(chars_set)

def statistics_swear_words(text):
    swear_count = 0
    for swear_word in swear_words:
        if swear_word in text:
            swear_count += 1
    return swear_count

puncts = [',', '.', '"', ':', ')', '(', '!', '?', '|', ';', "'", '$', '&',
    '/', '[', ']', '>', '%', '=', '#', '+', '\\', '•',  '~', '@', '£',
    '·', '_', '{', '}', '©', '^', '®', '`',  '<', '→', '°', '€', '™', '›',
    '♥', '←', '×', '§', '″', '′', 'Â', '█', '½', 'à', '…', '“', '★', '”',
    '–', '●', 'â', '►', '−', '¢', '²', '¬', '░', '¶', '↑', '±', '¿', '▾',
    '═', '¦', '║', '―', '¥', '▓', '—', '‹', '─', '▒', '：', '¼', '⊕', '▼',
    '▪', '†', '■', '’', '▀', '¨', '▄', '♫', '☆', 'é', '¯', '♦', '¤', '▲',
    'è', '¸', '¾', 'Ã', '⋅', '‘', '∞', '∙', '）', '↓', '、', '│', '（', '»',
    '，', '♪', '╩', '╚', '³', '・', '╦', '╣', '╔', '╗', '▬', '❤', 'ï', 'Ø',
    '¹', '≤', '‡', '√', '«', '»', '´', 'º', '¾', '¡', '§', '£', '₤', 'ə', '√',
    'ᴵ', '∞', 'θ', '÷', 'α', '•', 'à', '−', 'β', '∅', '³', 'π', '‘', '₹', '´', '£', '€',
    '×','™', '√', '²', '—', '…', ':', ';', '•', '！', '?', '$', '＄', '％', '＆', '（', '）']

def clean_text(x):
    x = str(x)
    for punct in puncts:
        x = x.replace(punct, f' {punct} ')
    return x
    
    
def content_preprocessing(text):
    
    text = text.lower()
    text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', ' thisisurl ', text)
    text = kp.replace_keywords(text)
    # text = re.sub("[%s]+" %punc_sign , ' ' ,text)
    text = clean_text(text)
    emoji_num = len(emoji_re.findall(text))
    text = emoji_re.sub(' ', text)
    text = kp2.replace_keywords(text)
    text = re.sub(r'\s{2,}', ' ', text)
    text = re.sub(r'\n|\t', '', text)
    # upper_count = statistics_upper_words(text)
    # characters_num = statistics_characters_nums(text)
    # unique_words_num = statistics_unique_words(text)
    # swear_words_num = statistics_swear_words(text)
    
    return text # , swear_words_num, len(text.split()), emoji_num, upper_count, unique_words_num, characters_num
    

In [55]:
# class Attention(nn.Module):
#     def __init__(self, feature_dim, step_dim, bias=True, **kwargs):
#         super(Attention, self).__init__(**kwargs)

#         self.supports_masking = True
#         self.bias = bias
#         self.feature_dim = feature_dim
#         self.step_dim = step_dim
#         self.features_dim = 0

#         weight = torch.zeros(feature_dim, 1)
#         nn.init.xavier_uniform_(weight)
#         self.weight = nn.Parameter(weight)

#         if bias:
#             self.b = nn.Parameter(torch.zeros(1))

#     def forward(self, x, mask=None):

#         feature_dim = self.feature_dim
#         step_dim = self.step_dim
        
#         eij = torch.mm(
#                 x.contiguous().view(-1, feature_dim), 
#                 self.weight
#         ).view(-1, step_dim)

#         if self.bias:
#                 eij = eij + self.b

#         eij = torch.tanh(eij)
#         a = torch.exp(eij)

#         if mask is not None:
#             a = a * mask

#         a = a / torch.sum(a, 1, keepdim=True) + 1e-10
#         weighted_input = x * torch.unsqueeze(a, -1)
#         return torch.sum(weighted_input, 1)

# class SpatialDropout(nn.Module):

#     def __init__(self,p):
#         super(SpatialDropout, self).__init__()
#         self.dropout = nn.Dropout2d(p)

#     def forward(self, x):

#             x = x.permute(0, 2, 1)   # convert to [batch, feature, timestep]
#             x = self.dropout(x)
#             x = x.permute(0, 2, 1)   # back to [batch, timestep, feature]
#             return x

# class NeuralNet(nn.Module):

#     def __init__(self,embedding_matrix, num_unit, num_heads):
#         super(NeuralNet, self).__init__()
#         self.max_feature = embedding_matrix.shape[0]
#         self.embedding_size = embedding_matrix.shape[1]
#         self.embedding = nn.Embedding(self.max_feature, self.embedding_size)
#         self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
#         self.embedding.weight.requires_grad = False
#         self.embedding_dropout = SpatialDropout(0.1)
#         self.lstm1 = nn.LSTM(self.embedding_size, num_unit, bidirectional=True, batch_first=True)
#         self.lstm2 = nn.LSTM(num_unit*2, int(num_unit/2), bidirectional=True, batch_first=True)
#         self.attention = Attention(num_unit, MAX_LEN)
#         self.linear1 = nn.Linear(num_unit*3, num_unit)
#         self.linear_out = nn.Linear(num_unit, 1)
#         self.cat_linear = nn.Linear(1024, num_unit*3)
#         self.multihead_attn = nn.MultiheadAttention(num_unit*3, num_heads, batch_first=True)
        
#     def forward(self, x, cat_embedding):

#         h_embedding = self.embedding(x)
#         h_embedding = self.embedding_dropout(h_embedding)
#         h_lstm1, _ = self.lstm1(h_embedding)
#         h_lstm2, _ = self.lstm2(h_lstm1) # 512,300,2*num_unit

#         # attention
#         att = self.attention(h_lstm2)

#         # global average pooling
#         avg_pool = torch.mean(h_lstm2, 1)

#         # global max pooling
#         max_pool, _ = torch.max(h_lstm2, 1)
        
#         # concatenation
#         h = torch.cat((max_pool, avg_pool, att), 1) ### concat or h equal Q/V   # orignal: num_unit*3
#         h_flat = torch.unsqueeze(h, -1).permute(0, 2, 1)
        
#         category_embedding = self.cat_linear(cat_embedding)
        
#         attn_output, attn_output_weights = self.multihead_attn(category_embedding, h_flat, h_flat)
#         # attn_output = torch.squeeze(attn_output)

#         avg_pool_attn = torch.mean(attn_output, 1)

#         # post 也可不用過sentmodel
#         # k -> sentence model 得到contextual embedding 
#         # attn_output, attn_weights拿來驗證
        
#         h_linear1 = F.relu(self.linear1(avg_pool_attn))
#         # h_linear1 = F.relu(self.linear1(torch.cat((h, avg_pool_attn), 1)))

#         out1 = torch.sigmoid(self.linear_out(h_linear1))

#         return out1

In [56]:
# class Attention(nn.Module):
#     def __init__(self, feature_dim, step_dim, bias=True, **kwargs):
#         super(Attention, self).__init__(**kwargs)

#         self.supports_masking = True
#         self.bias = bias
#         self.feature_dim = feature_dim
#         self.step_dim = step_dim
#         self.features_dim = 0

#         weight = torch.zeros(feature_dim, 1)
#         nn.init.xavier_uniform_(weight)
#         self.weight = nn.Parameter(weight)

#         if bias:
#             self.b = nn.Parameter(torch.zeros(1))

#     def forward(self, x, mask=None):

#         feature_dim = self.feature_dim
#         step_dim = self.step_dim
        
#         eij = torch.mm(
#                 x.contiguous().view(-1, feature_dim), 
#                 self.weight
#         ).view(-1, step_dim)

#         if self.bias:
#                 eij = eij + self.b

#         eij = torch.tanh(eij)
#         a = torch.exp(eij)

#         if mask is not None:
#             a = a * mask

#         a = a / torch.sum(a, 1, keepdim=True) + 1e-10
#         weighted_input = x * torch.unsqueeze(a, -1)
#         return torch.sum(weighted_input, 1)

# class SpatialDropout(nn.Module):

#     def __init__(self,p):
#         super(SpatialDropout, self).__init__()
#         self.dropout = nn.Dropout2d(p)

#     def forward(self, x):

#             x = x.permute(0, 2, 1)   # convert to [batch, feature, timestep]
#             x = self.dropout(x)
#             x = x.permute(0, 2, 1)   # back to [batch, timestep, feature]
#             return x

# class NeuralNet(nn.Module):

#     def __init__(self,embedding_matrix, num_unit, num_heads):
#         super(NeuralNet, self).__init__()
#         self.max_feature = embedding_matrix.shape[0]
#         self.embedding_size = embedding_matrix.shape[1]
#         self.embedding = nn.Embedding(self.max_feature, self.embedding_size)
#         self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
#         self.embedding.weight.requires_grad = False
#         self.embedding_dropout = SpatialDropout(0.1)
#         self.lstm1 = nn.LSTM(self.embedding_size, num_unit, bidirectional=True, batch_first=True)
#         self.lstm2 = nn.LSTM(num_unit*2, int(num_unit/2), bidirectional=True, batch_first=True)
#         self.attention = Attention(num_unit, MAX_LEN)
#         self.linear1 = nn.Linear(num_unit*6, num_unit)
#         self.linear_out = nn.Linear(num_unit, 1)
#         self.cat_linear = nn.Linear(1024, num_unit*3)
#         self.multihead_attn = nn.MultiheadAttention(num_unit*3, num_heads, batch_first=True)
        
#     def forward(self, x, cat_embedding):

#         h_embedding = self.embedding(x)
#         h_embedding = self.embedding_dropout(h_embedding)
#         h_lstm1, _ = self.lstm1(h_embedding)
#         h_lstm2, _ = self.lstm2(h_lstm1) # 512,300,2*num_unit

#         # attention
#         att = self.attention(h_lstm2)

#         # global average pooling
#         avg_pool = torch.mean(h_lstm2, 1)

#         # global max pooling
#         max_pool, _ = torch.max(h_lstm2, 1)
        
#         # concatenation
#         h = torch.cat((max_pool, avg_pool, att), 1) ### concat or h equal Q/V   # orignal: num_unit*3
#         h_flat = torch.unsqueeze(h, -1).permute(0, 2, 1)
        
#         category_embedding = self.cat_linear(cat_embedding)
        
#         attn_output, attn_output_weights = self.multihead_attn(category_embedding, h_flat, h_flat)
#         # attn_output = torch.squeeze(attn_output)

#         avg_pool_attn = torch.mean(attn_output, 1)

#         # post 也可不用過sentmodel
#         # k -> sentence model 得到contextual embedding 
#         # attn_output, attn_weights拿來驗證
        
#         # h_linear1 = F.relu(self.linear1(avg_pool_attn))
#         h_linear1 = F.relu(self.linear1(torch.cat((h, avg_pool_attn), 1)))

#         out1 = torch.sigmoid(self.linear_out(h_linear1))

#         return out1

In [57]:
class Attention(nn.Module):
    def __init__(self, feature_dim, step_dim, bias=True, **kwargs):
        super(Attention, self).__init__(**kwargs)

        self.supports_masking = True
        self.bias = bias
        self.feature_dim = feature_dim
        self.step_dim = step_dim
        self.features_dim = 0

        weight = torch.zeros(feature_dim, 1)
        nn.init.xavier_uniform_(weight)
        self.weight = nn.Parameter(weight)

        if bias:
            self.b = nn.Parameter(torch.zeros(1))

    def forward(self, x, mask=None):

        feature_dim = self.feature_dim
        step_dim = self.step_dim
        
        eij = torch.mm(
                x.contiguous().view(-1, feature_dim), 
                self.weight
        ).view(-1, step_dim)

        if self.bias:
                eij = eij + self.b

        eij = torch.tanh(eij)
        a = torch.exp(eij)

        if mask is not None:
            a = a * mask

        a = a / torch.sum(a, 1, keepdim=True) + 1e-10
        weighted_input = x * torch.unsqueeze(a, -1)
        return torch.sum(weighted_input, 1)

class SpatialDropout(nn.Module):

    def __init__(self,p):
        super(SpatialDropout, self).__init__()
        self.dropout = nn.Dropout2d(p)

    def forward(self, x):

            x = x.permute(0, 2, 1)   # convert to [batch, feature, timestep]
            x = self.dropout(x)
            x = x.permute(0, 2, 1)   # back to [batch, timestep, feature]
            return x

class NeuralNet(nn.Module):

    def __init__(self,embedding_matrix, num_unit, num_heads):
        super(NeuralNet, self).__init__()
        self.max_feature = embedding_matrix.shape[0]
        self.embedding_size = embedding_matrix.shape[1]
        self.embedding = nn.Embedding(self.max_feature, self.embedding_size)
        self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = False
        self.embedding_dropout = SpatialDropout(0.1)
        self.lstm1 = nn.LSTM(self.embedding_size, num_unit, bidirectional=True, batch_first=True)
        self.lstm2 = nn.LSTM(num_unit*2, int(num_unit/2), bidirectional=True, batch_first=True)
        self.attention = Attention(num_unit, MAX_LEN)
        self.linear1 = nn.Linear(num_unit*3+1024*2, num_unit)
        self.linear_out = nn.Linear(num_unit, 1)
        # self.cat_linear = nn.Linear(1024, num_unit)
        self.multihead_attn = nn.MultiheadAttention(1024, num_heads, batch_first=True)
        
    def forward(self, x, x_context_embedding, cat_embedding):

        h_embedding = self.embedding(x)
        h_embedding = self.embedding_dropout(h_embedding)
        h_lstm1, _ = self.lstm1(h_embedding)
        h_lstm2, _ = self.lstm2(h_lstm1) # 512,300,2*num_unit
        
        # category_embedding = self.cat_linear(cat_embedding)
        # attn_output, attn_output_weights = self.multihead_attn(category_embedding, h_lstm2, h_lstm2)
        x_context_embedding = torch.unsqueeze(x_context_embedding, 1)
        attn_output, attn_output_weights = self.multihead_attn(cat_embedding, x_context_embedding, x_context_embedding)
        avg_pool_attn = torch.mean(attn_output, 1)
        max_pool_attn, _ = torch.max(attn_output, 1)
        
        # attention
        att = self.attention(h_lstm2)

        # global average pooling
        avg_pool = torch.mean(h_lstm2, 1)

        # global max pooling
        max_pool, _ = torch.max(h_lstm2, 1)
        
        # concatenation
        # h = torch.cat((max_pool, avg_pool, att), 1) ### concat or h equal Q/V   # orignal: num_unit*3
        h = torch.cat((max_pool, avg_pool, att, avg_pool_attn, max_pool_attn), 1) ### concat or h equal Q/V   # orignal: num_unit*3
        # h_flat = torch.unsqueeze(h, -1).permute(0, 2, 1)
        
        # post 也可不用過sentmodel
        # k -> sentence model 得到contextual embedding 
        # attn_output, attn_weights拿來驗證
        
        # h_linear1 = F.relu(self.linear1(avg_pool_attn))
        h_linear1 = F.relu(self.linear1(h))

        out1 = torch.sigmoid(self.linear_out(h_linear1))

        return out1

In [58]:
mental_health_groups = [
    'EDAnonymous',
    'addiction',
    'alcoholism',
    'adhd',
    'anxiety',
    'autism',
    'bipolarreddit',
    'bpd',
    'depression',
    'healthanxiety',
    'lonely',
    'ptsd',
    'schizophrenia',
    'socialanxiety',
    'suicidewatch'
]

non_mental_health = [
    'conspiracy',
    'divorce',
    'fitness', 
    'guns', 
    'jokes', 
    'legaladvice', 
    'meditation', 
    'parenting', 
    'personalfinance', 
    'relationships', 
    'teaching',
]

In [59]:
# # prepare simplified version of the reddit mental health dataset
# mh_fnames = []
# for g in mental_health_groups:
#     mh_fnames.append(f'{g}_2018_features_tfidf_256.csv')
#     mh_fnames.append(f'{g}_2019_features_tfidf_256.csv')
#     mh_fnames.append(f'{g}_pre_features_tfidf_256.csv')
#     mh_fnames.append(f'{g}_post_features_tfidf_256.csv')
# mh_fnames.remove('EDAnonymous_2018_features_tfidf_256.csv')

# path = '/kaggle/input/reddit-mental-health/'
# all_fname = os.listdir(path)

# df_mh = pd.DataFrame()
# for f in mh_fnames:
#     df_mh = df_mh.append(pd.read_csv(path + f))
# df_mh = df_mh.reset_index(drop = True)

# non_mh_fnames = []
# for g in non_mental_health:
#     non_mh_fnames.append(f'{g}_2018_features_tfidf_256.csv')
#     non_mh_fnames.append(f'{g}_2019_features_tfidf_256.csv')
#     non_mh_fnames.append(f'{g}_pre_features_tfidf_256.csv')
#     non_mh_fnames.append(f'{g}_post_features_tfidf_256.csv')
    
# df_non_mh = pd.DataFrame()
# for f in non_mh_fnames:
#     df_non_mh = df_non_mh.append(pd.read_csv(path + f))
# df_non_mh = df_non_mh.reset_index(drop = True)

# df_mh['label'] = 1.0
# df_non_mh['label'] = 0.0

# extracted_col = []
# for c in df_mh.columns:
#     if not c.startswith('liwc') and not c.startswith('tfidf'):
#         extracted_col.append(c)     
# df_mh = df_mh.loc[:, extracted_col]

# extracted_col = []
# for c in df_non_mh.columns:
#     if not c.startswith('liwc') and not c.startswith('tfidf'):
#         extracted_col.append(c)     
# df_non_mh = df_non_mh.loc[:, extracted_col]

# df_train = df_mh.append(df_non_mh)
# df_train = df_train.reset_index(drop = True)

# df_train.to_csv('df_train.csv', index = False)

In [60]:
# # Generate cut version (first 500 tokens) of training and testing data 
# df_train = pd.read_csv('/kaggle/input/reddit-mental-health-v2/df_train.csv')

# df_train['post'] = df_train['post'].map(lambda x: ' '.join(x.split(' ')[:500]))

# # shuffle the data
# df_train = df_train.sample(len(df_train))

# df_train = df_train.reset_index(drop = True)

# df_train.to_csv('df_train_cut.csv', index = False)

# train_df, test_df = train_test_split(df_train, test_size=0.20, random_state=5246)
# train_df = train_df.reset_index(drop=True)
# test_df = test_df.reset_index(drop=True)

# train_df.to_csv('train_df_cut.csv', index = False)
# test_df.to_csv('test_df_cut.csv', index = False)

In [61]:
# Start from here

In [62]:
# train_df = pd.read_csv('/kaggle/input/reddit-mental-health-v2/train_df_cut.csv')
# test_df = pd.read_csv('/kaggle/input/reddit-mental-health-v2/test_df_cut.csv')

In [None]:
df = pd.read_csv('/kaggle/input/reddit-mental-health-v2/cleaned_reddit_lemmatized.csv')

In [None]:
df = df[~df["subreddit"].isin(["mentalhealth", "COVID19_support"])]
df = df.groupby('subreddit', group_keys=False).apply(lambda x: x.sample(frac=0.6))
df['text_processed_trim'] = df['text_processed'].apply(lambda x: " ".join(str(x).split(" ")[:300]))
df = df.sort_values(by='date', ascending=True)
df = df.reset_index()

In [None]:
train_df = df[df["date_year"].isin([2018, 2019])]
test_df = df[df["date_year"]==2020]

train_df = train_df.reset_index(drop = True)
test_df = test_df.reset_index(drop = True)

In [None]:
del df
gc.collect()

In [None]:
# # using a subset first to test
# train_df = train_df.sample(2000)
# test_df = test_df.sample(600)

# train_df = train_df.reset_index(drop = True)
# test_df = test_df.reset_index(drop = True)

In [None]:
print(train_df.shape)
print(test_df.shape)

In [None]:
### preprocessing
x_train = train_df["post"].apply(lambda x: content_preprocessing(x))
x_test = test_df["post"].apply(lambda x: content_preprocessing(x))

In [None]:
# ### preprocessing
# x_train = train_df["text_processed_trim"]
# x_test = test_df["text_processed_trim"]

In [None]:
tokenizer = text.Tokenizer(filters='', lower=False)
# tokenizer.fit_on_texts(list(x_train))
tokenizer.fit_on_texts(list(x_train)+list(x_test))

x_train = tokenizer.texts_to_sequences(x_train)
x_test = tokenizer.texts_to_sequences(x_test)

x_train = pad_sequences(x_train, maxlen=MAX_LEN,padding='post')
x_test = pad_sequences(x_test, maxlen=MAX_LEN,padding='post')

In [None]:
del tokenizer
gc.collect()

In [None]:
# from sentence_transformers import SentenceTransformer
# # sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
# sentence_model = SentenceTransformer('all-roberta-large-v1', device='cuda')

# sentence_model.max_seq_length = 154

In [None]:
train_roberta = np.load('/kaggle/input/reddit-mental-health-v2/train_roberta.npy')
test_roberta = np.load('/kaggle/input/reddit-mental-health-v2/test_roberta.npy')

In [None]:
# train_roberta = sentence_model.encode(list(train_df['post']))
# test_roberta = sentence_model.encode(list(test_df['post']))

# np.save('train_roberta.npy', train_roberta)
# np.save('test_roberta.npy', test_roberta) # save

In [None]:
# mental_health_description = {
#     "EDAnonymous": "An eating disorder is a mental disorder defined by abnormal eating behaviors that negatively affect a person's physical or mental health. Types of eating disorders include binge eating disorder, where the patient eats a large amount in a short period of time; anorexia nervosa, where the person has an intense fear of gaining weight and restricts food or overexercises to manage this fear; bulimia nervosa, where individuals eat a large quantity (binging) then try to rid themselves of the food (purging); pica, where the patient eats non-food items; rumination syndrome, where the patient regurgitates undigested or minimally digested food; avoidant/restrictive food intake disorder (ARFID), where people have a reduced or selective food intake due to some psychological reasons; and a group of other specified feeding or eating disorders. Anxiety disorders, depression and substance abuse are common among people with eating disorders. These disorders do not include obesity. People often experience comorbidity between an eating disorder and OCD. It is estimated 20-60% of patients with an ED have a history of OCD.",
#     "addiction": "Addiction is generally a neuropsychological symptom defining pervasive and intense urge to engage in maladaptive behaviors providing immediate sensory rewards (e.g. consuming drugs, excessively gambling), despite their harmful consequences. Dependence is generally an addiction that can involve withdrawal issues. Addictive disorder is a category of mental disorders defining important intensities of addictions or dependences, which induce functional disabilities.",
#     "adhd": "Attention deficit hyperactivity disorder (ADHD) is a neurodevelopmental disorder characterised by excessive amounts of inattention, hyperactivity, and impulsivity that are pervasive, impairing in multiple contexts, and otherwise age-inappropriate.",
#     "alcoholism": "Alcoholism is, broadly, any drinking of alcohol that results in significant mental or physical health problems. Because there is disagreement on the definition of the word alcoholism, it is not a recognized diagnostic entity, and the use of alcoholism terminology is discouraged due to its heavily stigmatized connotations. Predominant diagnostic classifications are alcohol use disorder (DSM-5) or alcohol dependence (ICD-11); these are defined in their respective sources.",
#     "anxiety": "Anxiety is an emotion which is characterized by an unpleasant state of inner turmoil and includes feelings of dread over anticipated events. Anxiety is different than fear in that the former is defined as the anticipation of a future threat whereas the latter is defined as the emotional response to a real threat. It is often accompanied by nervous behavior such as pacing back and forth, somatic complaints, and rumination.",
#     "autism": "The autism spectrum, often referred to as just autism, autism spectrum disorder (ASD) or sometimes autism spectrum condition (ASC), identifies a loosely defined cluster of neurodevelopmental disorders characterized by challenges in social interaction, verbal and nonverbal communication, and often repetitive behaviors and restricted interests. Other common features include unusual responses to sensory stimuli and a preference for sameness or unusual adherence to routines.",
#     "bipolarreddit": "Bipolar disorder, previously known as manic depression, is a mental disorder characterized by periods of depression and periods of abnormally elevated mood that each last from days to weeks. If the elevated mood is severe or associated with psychosis, it is called mania; if it is less severe, it is called hypomania. During mania, an individual behaves or feels abnormally energetic, happy or irritable, and they often make impulsive decisions with little regard for the consequences. There is usually also a reduced need for sleep during manic phases. During periods of depression, the individual may experience crying and have a negative outlook on life and poor eye contact with others. The risk of suicide is high; over a period of 20 years, 6% of those with bipolar disorder died by suicide, while 30–40% engaged in self-harm. Other mental health issues, such as anxiety disorders and substance use disorders, are commonly associated with bipolar disorder.",
#     "bpd": "Borderline personality disorder (BPD), also known as emotionally unstable personality disorder (EUPD), is a personality disorder characterized by a long-term pattern of intense and unstable interpersonal relationships, distorted sense of self, and strong emotional reactions. Those affected often engage in self-harm and other dangerous behaviors, often due to their difficulty with returning their emotional level to a healthy or normal baseline. They may also struggle with a feeling of emptiness, fear of abandonment, and detachment from reality.",
#     "depression": "Depression is a mental state of low mood and aversion to activity. It affects more than 280 million people of all ages (about 3.5% of the global population). Depression affects a person's thoughts, behavior, feelings, and sense of well-being. Depressed people often experience loss of motivation or interest in, or reduced pleasure or joy from, experiences that would normally bring them pleasure or joy. Depressed mood is a symptom of some mood disorders such as major depressive disorder and dysthymia; it is a normal temporary reaction to life events, such as the loss of a loved one; and it is also a symptom of some physical diseases and a side effect of some drugs and medical treatments. It may feature sadness, difficulty in thinking and concentration and a significant increase or decrease in appetite and time spent sleeping. People experiencing depression may have feelings of dejection or hopelessness and may experience suicidal thoughts. It can either be short term or long term.",
#     "healthanxiety": "Hypochondriasis or hypochondria is a condition in which a person is excessively and unduly worried about having a serious illness. Hypochondria is an old concept whose meaning has repeatedly changed over its lifespan. It has been claimed that this debilitating condition results from an inaccurate perception of the condition of body or mind despite the absence of an actual medical diagnosis. An individual with hypochondriasis is known as a hypochondriac. Hypochondriacs become unduly alarmed about any physical or psychological symptoms they detect, no matter how minor the symptom may be, and are convinced that they have, or are about to be diagnosed with, a serious illness.",
#     "lonely": "Loneliness is an unpleasant emotional response to perceived isolation. Loneliness is also described as social pain – a psychological mechanism which motivates individuals to seek social connections. It is often associated with a perceived lack of connection and intimacy. Loneliness overlaps and yet is distinct from solitude. Solitude is simply the state of being apart from others; not everyone who experiences solitude feels lonely. As a subjective emotion, loneliness can be felt even when a person is surrounded by other people. Hence, there is a distinction between being alone and feeling lonely. Loneliness can be short term (state loneliness) or long term (chronic loneliness). In either case, it can be intense and painful.",
#     "ptsd": "Post-traumatic stress disorder (PTSD) is a mental and behavioral disorder that can develop because of exposure to a traumatic event, such as sexual assault, warfare, traffic collisions, child abuse, domestic violence, or other threats on a person's life. Symptoms may include disturbing thoughts, feelings, or dreams related to the events, mental or physical distress to trauma-related cues, attempts to avoid trauma-related cues, alterations in the way a person thinks and feels, and an increase in the fight-or-flight response. These symptoms last for more than a month after the event. Young children are less likely to show distress but instead may express their memories through play. A person with PTSD is at a higher risk of suicide and intentional self-harm.",
#     "schizophrenia": "Schizophrenia is a mental disorder characterized by continuous or relapsing episodes of psychosis. Major symptoms include hallucinations (typically hearing voices), delusions, and disorganized thinking. Other symptoms include social withdrawal, decreased emotional expression, and apathy. Symptoms typically develop gradually, begin during young adulthood, and in many cases never become resolved. There is no objective diagnostic test; diagnosis is based on observed behavior, a psychiatric history that includes the person's reported experiences, and reports of others familiar with the person. To be diagnosed with schizophrenia, symptoms and functional impairment need to be present for six months (DSM-5) or one month (ICD-11). Many people with schizophrenia have other mental disorders, especially substance use disorders, depressive disorders, anxiety disorders, and obsessive–compulsive disorder.",
#     "socialanxiety": "Social anxiety is the anxiety and fear specifically linked to being in social settings (i.e., interacting with others). Some categories of disorders associated with social anxiety include anxiety disorders, mood disorders, autism spectrum disorders, eating disorders, and substance use disorders. Individuals with higher levels of social anxiety often avert their gazes, show fewer facial expressions, and show difficulty with initiating and maintaining a conversation. Social anxiety commonly manifests itself in the teenage years and can be persistent throughout life, however, people who experience problems in their daily functioning for an extended period of time can develop social anxiety disorder. Trait social anxiety, the stable tendency to experience this anxiety, can be distinguished from state anxiety, the momentary response to a particular social stimulus. Half of the individuals with any social fears meet the criteria for social anxiety disorder. Age, culture, and gender impact the severity of this disorder. The function of social anxiety is to increase arousal and attention to social interactions, inhibit unwanted social behavior, and motivate preparation for future social situations.",
#     "suicidewatch": "Suicide is the act of intentionally causing one's own death. Mental disorders (including depression, bipolar disorder, schizophrenia, personality disorders, anxiety disorders), physical disorders (such as chronic fatigue syndrome), and substance abuse (including alcoholism and the use of and withdrawal from benzodiazepines) are risk factors. Some suicides are impulsive acts due to stress (such as from financial or academic difficulties), relationship problems (such as breakups or divorces), or harassment and bullying. Those who have previously attempted suicide are at a higher risk for future attempts. Effective suicide prevention efforts include limiting access to methods of suicide such as firearms, drugs, and poisons; treating mental disorders and substance abuse; careful media reporting about suicide; and improving economic conditions. Although crisis hotlines are common resources, their effectiveness has not been well studied.",
# }

In [None]:
mental_health_description = {
    "EDAnonymous": "An eating disorder is a mental disorder defined by abnormal eating behaviors that negatively affect a person's physical or mental health. Types of eating disorders include binge eating disorder, where the patient eats a large amount in a short period of time; anorexia nervosa, where the person has an intense fear of gaining weight and restricts food or overexercises to manage this fear; bulimia nervosa, where individuals eat a large quantity (binging) then try to rid themselves of the food (purging); pica, where the patient eats non-food items; rumination syndrome, where the patient regurgitates undigested or minimally digested food; avoidant/restrictive food intake disorder (ARFID), where people have a reduced or selective food intake due to some psychological reasons; and a group of other specified feeding or eating disorders. Anxiety disorders, depression and substance abuse are common among people with eating disorders. These disorders do not include obesity.",
    "addiction": "Addiction is generally a neuropsychological symptom defining pervasive and intense urge to engage in maladaptive behaviors providing immediate sensory rewards (e.g. consuming drugs, excessively gambling), despite their harmful consequences. Dependence is generally an addiction that can involve withdrawal issues. Addictive disorder is a category of mental disorders defining important intensities of addictions or dependences, which induce functional disabilities.",
    "adhd": "Attention deficit hyperactivity disorder (ADHD) is a neurodevelopmental disorder characterised by excessive amounts of inattention, hyperactivity, and impulsivity that are pervasive, impairing in multiple contexts, and otherwise age-inappropriate.",
    "alcoholism": "Alcoholism is, broadly, any drinking of alcohol that results in significant mental or physical health problems. Because there is disagreement on the definition of the word alcoholism, it is not a recognized diagnostic entity, and the use of alcoholism terminology is discouraged due to its heavily stigmatized connotations. Predominant diagnostic classifications are alcohol use disorder (DSM-5) or alcohol dependence (ICD-11).",
    "anxiety": "Anxiety is an emotion which is characterized by an unpleasant state of inner turmoil and includes feelings of dread over anticipated events. Anxiety is different than fear in that the former is defined as the anticipation of a future threat whereas the latter is defined as the emotional response to a real threat. It is often accompanied by nervous behavior such as pacing back and forth, somatic complaints, and rumination.",
    "autism": "The autism spectrum, often referred to as just autism, autism spectrum disorder (ASD) or sometimes autism spectrum condition (ASC), identifies a loosely defined cluster of neurodevelopmental disorders characterized by challenges in social interaction, verbal and nonverbal communication, and often repetitive behaviors and restricted interests. Other common features include unusual responses to sensory stimuli and a preference for sameness or unusual adherence to routines.",
    "bipolarreddit": "Bipolar disorder, previously known as manic depression, is a mental disorder characterized by periods of depression and periods of abnormally elevated mood that each last from days to weeks. If the elevated mood is severe or associated with psychosis, it is called mania; if it is less severe, it is called hypomania. During mania, an individual behaves or feels abnormally energetic, happy or irritable, and they often make impulsive decisions with little regard for the consequences. There is usually also a reduced need for sleep during manic phases. During periods of depression, the individual may experience crying and have a negative outlook on life and poor eye contact with others. The risk of suicide is high. Other mental health issues, such as anxiety disorders and substance use disorders, are commonly associated with bipolar disorder.",
    "bpd": "Borderline personality disorder (BPD), also known as emotionally unstable personality disorder (EUPD), is a personality disorder characterized by a long-term pattern of intense and unstable interpersonal relationships, distorted sense of self, and strong emotional reactions. Those affected often engage in self-harm and other dangerous behaviors, often due to their difficulty with returning their emotional level to a healthy or normal baseline. They may also struggle with a feeling of emptiness, fear of abandonment, and detachment from reality.",
    "depression": "Depression is a mental state of low mood and aversion to activity. It affects more than 280 million people of all ages (about 3.5% of the global population). Depression affects a person's thoughts, behavior, feelings, and sense of well-being. Depressed people often experience loss of motivation or interest in, or reduced pleasure or joy from, experiences that would normally bring them pleasure or joy. Depressed mood is a symptom of some mood disorders such as major depressive disorder and dysthymia; it is a normal temporary reaction to life events, such as the loss of a loved one; and it is also a symptom of some physical diseases and a side effect of some drugs and medical treatments. It may feature sadness, difficulty in thinking and concentration and a significant increase or decrease in appetite and time spent sleeping. People experiencing depression may have feelings of dejection or hopelessness and may experience suicidal thoughts.",
    "healthanxiety": "Hypochondriasis or hypochondria is a condition in which a person is excessively and unduly worried about having a serious illness. Hypochondria is an old concept whose meaning has repeatedly changed over its lifespan. It has been claimed that this debilitating condition results from an inaccurate perception of the condition of body or mind despite the absence of an actual medical diagnosis. An individual with hypochondriasis is known as a hypochondriac. Hypochondriacs become unduly alarmed about any physical or psychological symptoms they detect, no matter how minor the symptom may be, and are convinced that they have, or are about to be diagnosed with, a serious illness.",
    "lonely": "Loneliness is an unpleasant emotional response to perceived isolation. Loneliness is also described as social pain – a psychological mechanism which motivates individuals to seek social connections. It is often associated with a perceived lack of connection and intimacy. Loneliness overlaps and yet is distinct from solitude. Solitude is simply the state of being apart from others; not everyone who experiences solitude feels lonely. As a subjective emotion, loneliness can be felt even when a person is surrounded by other people. Hence, there is a distinction between being alone and feeling lonely. Loneliness can be short term (state loneliness) or long term (chronic loneliness). In either case, it can be intense and painful.",
    "ptsd": "Post-traumatic stress disorder (PTSD) is a mental and behavioral disorder that can develop because of exposure to a traumatic event, such as sexual assault, warfare, traffic collisions, child abuse, domestic violence, or other threats on a person's life. Symptoms may include disturbing thoughts, feelings, or dreams related to the events, mental or physical distress to trauma-related cues, attempts to avoid trauma-related cues, alterations in the way a person thinks and feels, and an increase in the fight-or-flight response. These symptoms last for more than a month after the event. Young children are less likely to show distress but instead may express their memories through play. A person with PTSD is at a higher risk of suicide and intentional self-harm.",
    "schizophrenia": "Schizophrenia is a mental disorder characterized by continuous or relapsing episodes of psychosis. Major symptoms include hallucinations (typically hearing voices), delusions, and disorganized thinking. Other symptoms include social withdrawal, decreased emotional expression, and apathy. Symptoms typically develop gradually, begin during young adulthood, and in many cases never become resolved. There is no objective diagnostic test; diagnosis is based on observed behavior, a psychiatric history that includes the person's reported experiences, and reports of others familiar with the person. To be diagnosed with schizophrenia, symptoms and functional impairment need to be present for six months (DSM-5) or one month (ICD-11). Many people with schizophrenia have other mental disorders, especially substance use disorders, depressive disorders, anxiety disorders, and obsessive–compulsive disorder.",
    "socialanxiety": "Social anxiety is the anxiety and fear specifically linked to being in social settings (i.e., interacting with others). Some categories of disorders associated with social anxiety include anxiety disorders, mood disorders, autism spectrum disorders, eating disorders, and substance use disorders. Individuals with higher levels of social anxiety often avert their gazes, show fewer facial expressions, and show difficulty with initiating and maintaining a conversation. Social anxiety commonly manifests itself in the teenage years and can be persistent throughout life, however, people who experience problems in their daily functioning for an extended period of time can develop social anxiety disorder. Trait social anxiety, the stable tendency to experience this anxiety, can be distinguished from state anxiety, the momentary response to a particular social stimulus. Half of the individuals with any social fears meet the criteria for social anxiety disorder. Age, culture, and gender impact the severity of this disorder.",
    "suicidewatch": "Suicide is the act of intentionally causing one's own death. Mental disorders (including depression, bipolar disorder, schizophrenia, personality disorders, anxiety disorders), physical disorders (such as chronic fatigue syndrome), and substance abuse (including alcoholism and the use of and withdrawal from benzodiazepines) are risk factors. Some suicides are impulsive acts due to stress (such as from financial or academic difficulties), relationship problems (such as breakups or divorces), or harassment and bullying. Those who have previously attempted suicide are at a higher risk for future attempts. Effective suicide prevention efforts include limiting access to methods of suicide such as firearms, drugs, and poisons; treating mental disorders and substance abuse; careful media reporting about suicide; and improving economic conditions.",
}

In [None]:
# for k, v in mental_health_description.items():
#     print(k, len(v.split(' ')))

In [None]:
# mental_health_sentence_embedding = {k: sentence_model.encode(s) for k, s in mental_health_description.items()}

In [None]:
with open('/kaggle/input/reddit-mental-health-v2/mental_health_sentence_embedding.pickle', 'rb') as handle:
    mental_health_sentence_embedding = pickle.load(handle)

In [None]:
# ### preprocessing
# x_train, x_train_swear_words, x_train_token_num, x_train_emoji_num, x_train_upper_count, x_train_unique_words_num, x_train_characters_num = zip(*train_df["post"].apply(lambda x: content_preprocessing(x)))
# # x_train_swear_words = np.array(x_train_swear_words, dtype=np.long).reshape(-1, 1)
# # x_train_token_num = np.array(x_train_token_num, dtype=np.long).reshape(-1, 1)
# # x_train_emoji_num = np.array(x_train_emoji_num, dtype=np.long).reshape(-1, 1)
# # x_train_upper_count = np.array(x_train_upper_count, dtype=np.long).reshape(-1, 1)
# # x_train_unique_words_num = np.array(x_train_unique_words_num, dtype=np.long).reshape(-1, 1)
# # x_train_characters_num = np.array(x_train_characters_num, dtype=np.long).reshape(-1, 1)

In [None]:
# x_test, x_test_swear_words, x_test_token_num, x_test_emoji_num, x_test_upper_count, x_test_unique_words_num, x_test_characters_num  = zip(*test_df["Text"].apply(lambda x: content_preprocessing(x)))
# x_test_swear_words = np.array(x_test_swear_words).reshape(-1, 1)
# x_test_token_num = np.array(x_test_token_num, dtype=np.long).reshape(-1, 1)
# x_test_emoji_num = np.array(x_test_emoji_num, dtype=np.long).reshape(-1, 1)
# x_test_upper_count = np.array(x_test_upper_count, dtype=np.long).reshape(-1, 1)
# x_test_unique_words_num = np.array(x_test_unique_words_num, dtype=np.long).reshape(-1, 1)
# x_test_characters_num = np.array(x_test_characters_num, dtype=np.long).reshape(-1, 1)

In [None]:
y_train = train_df['label'].values

In [None]:
del train_df
gc.collect()

In [None]:
# # generate word2vec embedding
# crawl_matrix, oov = build_matrix(tokenizer.word_index, CRAWL_EMBEDDING_PATH)
# # glove_matrix , oov2 = build_matrix(tokenizer.word_index, GLOVE_EMBEDDING_PATH)
# # embedding_matrix = (crawl_matrix+glove_matrix)/2
# # embedding_matrix = np.concatenate((crawl_matrix, glove_matrix), axis=-1)
crawl_matrix = np.load("../input/reddit-mental-health-v2/crawl_matrix_reddit_mental_health_cut.npy")

# with open('crawl_matrix_reddit_mental_health_cut_v4.npy', 'wb') as f:
#     np.save(f, crawl_matrix)

In [None]:
# # shuffle the data
# permu_idx = np.random.permutation(len(x_train))
# x_train = x_train[permu_idx]
# y_train = y_train[permu_idx]

In [None]:
x_test_tensor = torch.tensor(x_test, dtype=torch.long)#.cuda()
x_test_roberta_tensor = torch.tensor(test_roberta, dtype=torch.float)#.cuda()
test_data = torch.utils.data.TensorDataset(x_test_tensor, x_test_roberta_tensor)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

del x_test_tensor, x_test_roberta_tensor, test_data
gc.collect()

In [None]:
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import f1_score

In [None]:
# def loss_fn(outputs, targets):
#     return torch.nn.BCEWithLogitsLoss()(outputs, targets)

In [None]:
# def stratified_group_k_fold(X, y, groups, k, seed=None):
#     labels_num = np.max(y) + 1
#     y_counts_per_group = defaultdict(lambda: np.zeros(labels_num))
#     y_distr = Counter()
#     for label, g in zip(y, groups):
#         y_counts_per_group[g][label] += 1
#         y_distr[label] += 1

#     y_counts_per_fold = defaultdict(lambda: np.zeros(labels_num))
#     groups_per_fold = defaultdict(set)

#     def eval_y_counts_per_fold(y_counts, fold):
#         y_counts_per_fold[fold] += y_counts
#         std_per_label = []
#         for label in range(labels_num):
#             label_std = np.std([y_counts_per_fold[i][label] / y_distr[label] for i in range(k)])
#             std_per_label.append(label_std)
#         y_counts_per_fold[fold] -= y_counts
#         return np.mean(std_per_label)
    
#     groups_and_y_counts = list(y_counts_per_group.items())
#     random.Random(seed).shuffle(groups_and_y_counts)

#     for g, y_counts in sorted(groups_and_y_counts, key=lambda x: -np.std(x[1])):
#         best_fold = None
#         min_eval = None
#         for i in range(k):
#             fold_eval = eval_y_counts_per_fold(y_counts, i)
#             if min_eval is None or fold_eval < min_eval:
#                 min_eval = fold_eval
#                 best_fold = i
#         y_counts_per_fold[best_fold] += y_counts
#         groups_per_fold[best_fold].add(g)

#     all_groups = set(groups)
#     for i in range(k):
#         train_groups = all_groups - groups_per_fold[i]
#         test_groups = groups_per_fold[i]

#         train_indices = [i for i, g in enumerate(groups) if g in train_groups]
#         test_indices = [i for i, g in enumerate(groups) if g in test_groups]

#         yield train_indices, test_indices

In [None]:
mhe_depression_np = mental_health_sentence_embedding['depression'].reshape(1, -1)
mhe_autism_np = mental_health_sentence_embedding['autism'].reshape(1, -1)
mhe_ptsd_np = mental_health_sentence_embedding['ptsd'].reshape(1, -1)

In [None]:
mhe_EDAnonymous_np = mental_health_sentence_embedding['EDAnonymous'].reshape(1, -1)
mhe_addiction_np = mental_health_sentence_embedding['addiction'].reshape(1, -1)
mhe_adhd_np = mental_health_sentence_embedding['adhd'].reshape(1, -1)
mhe_alcoholism_np = mental_health_sentence_embedding['alcoholism'].reshape(1, -1)
mhe_anxiety_np = mental_health_sentence_embedding['anxiety'].reshape(1, -1)
mhe_bipolarreddit_np = mental_health_sentence_embedding['bipolarreddit'].reshape(1, -1)
mhe_bpd_np = mental_health_sentence_embedding['bpd'].reshape(1, -1)
mhe_healthanxiety_np = mental_health_sentence_embedding['healthanxiety'].reshape(1, -1)
mhe_lonely_np = mental_health_sentence_embedding['lonely'].reshape(1, -1)
mhe_schizophrenia_np = mental_health_sentence_embedding['schizophrenia'].reshape(1, -1)
mhe_socialanxiety_np = mental_health_sentence_embedding['socialanxiety'].reshape(1, -1)
mhe_suicidewatch_np = mental_health_sentence_embedding['suicidewatch'].reshape(1, -1)

In [None]:
# # oof = np.zeros(len(x_train)*NUM_MODEL)
# final_test = list()
# val_f1_score = list()

# for index in range(NUM_MODEL):
    
#     print("model: {}".format(index))
    
#     x_train_fold, x_val, y_train_fold, y_val = train_test_split(x_train, y_train, test_size=0.2)

#     x_train_fold = torch.tensor(x_train_fold, dtype=torch.long).cuda()
#     x_val = torch.tensor(x_val, dtype=torch.long).cuda()
#     y_train_fold = torch.tensor(y_train_fold, dtype=torch.float).cuda()
#     y_val = torch.tensor(y_val, dtype=torch.float).cuda()

#     train_data = torch.utils.data.TensorDataset(x_train_fold, y_train_fold)
#     val_data = torch.utils.data.TensorDataset(x_val, y_val)

#     train_loader = torch.utils.data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
#     val_loader = torch.utils.data.DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False)
   
#     del x_train_fold, x_val, y_train_fold, train_data, val_data
#     gc.collect()

#     net = NeuralNet(crawl_matrix, 256, 1)
#     net.cuda()
#     loss_fn = torch.nn.BCELoss(reduction='mean')
#     # loss_fn = nn.CrossEntropyLoss()
#     # optimizer = torch.optim.Adam(net.parameters(), lr=0.002)
#     optimizer = torch.optim.AdamW(params =  net.parameters(), lr=0.002, weight_decay=1e-7)

#     test_checkpoint = list()
#     loss_checkpoint = list()
#     val_f1_epoch = list()
    
#     for epoch in range(EPOCHS): 
        
#         start_time = time.time()

#         avg_loss = 0.0
        
#         net.train()
#         for i, data in enumerate(train_loader):
            
#             # get the inputs
#             inputs, labels = data
            
#             mhe_depression = np.tile(mhe_depression_np,(inputs.shape[0],1)) # 512,1024
#             mhe_autism = np.tile(mhe_autism_np,(inputs.shape[0],1)) # 512,1024
#             mhe_ptsd = np.tile(mhe_ptsd_np,(inputs.shape[0],1)) # 512,1024 
#             mhe_EDAnonymous = np.tile(mhe_EDAnonymous_np,(inputs.shape[0],1)) # 512,1024
#             mhe_addiction = np.tile(mhe_addiction_np,(inputs.shape[0],1)) # 512,1024
#             mhe_adhd = np.tile(mhe_adhd_np,(inputs.shape[0],1)) # 512,1024             
#             mhe_alcoholism = np.tile(mhe_alcoholism_np,(inputs.shape[0],1)) # 512,1024
#             mhe_anxiety = np.tile(mhe_anxiety_np,(inputs.shape[0],1)) # 512,1024
#             mhe_bipolarreddit = np.tile(mhe_bipolarreddit_np,(inputs.shape[0],1)) # 512,1024  
#             mhe_bpd = np.tile(mhe_bpd_np,(inputs.shape[0],1)) # 512,1024
#             mhe_healthanxiety = np.tile(mhe_healthanxiety_np,(inputs.shape[0],1)) # 512,1024
#             mhe_lonely = np.tile(mhe_lonely_np,(inputs.shape[0],1)) # 512,1024             
#             mhe_schizophrenia = np.tile(mhe_schizophrenia_np,(inputs.shape[0],1)) # 512,1024
#             mhe_socialanxiety = np.tile(mhe_socialanxiety_np,(inputs.shape[0],1)) # 512,1024
#             mhe_suicidewatch = np.tile(mhe_suicidewatch_np,(inputs.shape[0],1)) # 512,1024            
            
#             mhe_depression = torch.tensor(mhe_depression, dtype=torch.float).cuda()
#             mhe_autism = torch.tensor(mhe_autism, dtype=torch.float).cuda()
#             mhe_ptsd = torch.tensor(mhe_ptsd, dtype=torch.float).cuda()
#             mhe_EDAnonymous = torch.tensor(mhe_EDAnonymous, dtype=torch.float).cuda()
#             mhe_addiction = torch.tensor(mhe_addiction, dtype=torch.float).cuda()
#             mhe_adhd = torch.tensor(mhe_adhd, dtype=torch.float).cuda()
#             mhe_alcoholism = torch.tensor(mhe_alcoholism, dtype=torch.float).cuda()
#             mhe_anxiety = torch.tensor(mhe_anxiety, dtype=torch.float).cuda()
#             mhe_bipolarreddit = torch.tensor(mhe_bipolarreddit, dtype=torch.float).cuda()   
#             mhe_bpd = torch.tensor(mhe_bpd, dtype=torch.float).cuda()
#             mhe_healthanxiety = torch.tensor(mhe_healthanxiety, dtype=torch.float).cuda()
#             mhe_lonely = torch.tensor(mhe_lonely, dtype=torch.float).cuda()
#             mhe_schizophrenia = torch.tensor(mhe_schizophrenia, dtype=torch.float).cuda()
#             mhe_socialanxiety = torch.tensor(mhe_socialanxiety, dtype=torch.float).cuda()
#             mhe_suicidewatch = torch.tensor(mhe_suicidewatch, dtype=torch.float).cuda()
  
#             mhe_total = torch.stack([mhe_depression, 
#                                      mhe_autism, 
#                                      mhe_ptsd,
#                                      mhe_EDAnonymous,
#                                      mhe_addiction,
#                                      mhe_adhd,
#                                      mhe_alcoholism,
#                                      mhe_anxiety,
#                                      mhe_bipolarreddit,
#                                      mhe_bpd,
#                                      mhe_healthanxiety,
#                                      mhe_lonely,
#                                      mhe_schizophrenia,
#                                      mhe_socialanxiety,
#                                      mhe_suicidewatch
#                                     ], dim=1)
#             # mhe_depression = torch.unsqueeze(mhe_depression, 1) # 512,1,1024
            
#             ## forward + backward + optimize
#             pred1 = net(inputs, mhe_total)
            
#             loss1 = loss_fn(pred1, labels.unsqueeze(1))
#             # loss2 = loss_fn(pred2,label2)
#             # loss = loss1*loss_weight+loss2
#             # loss = loss1
           
#             # zero the parameter gradients
#             optimizer.zero_grad()

#             loss1.backward()
#             optimizer.step()

#             avg_loss += loss1.item()

#         net.eval()
        
#         valid_preds = np.zeros((len(y_val),))
#         true_label = np.zeros((len(y_val),))

#         avg_val_loss = 0.0

#         for j, data in enumerate(val_loader):
            
#             # get the inputs
#             inputs, labels = data
            
#             mhe_depression = np.tile(mhe_depression_np,(inputs.shape[0],1)) # 512,1024
#             mhe_autism = np.tile(mhe_autism_np,(inputs.shape[0],1)) # 512,1024
#             mhe_ptsd = np.tile(mhe_ptsd_np,(inputs.shape[0],1)) # 512,1024 
#             mhe_EDAnonymous = np.tile(mhe_EDAnonymous_np,(inputs.shape[0],1)) # 512,1024
#             mhe_addiction = np.tile(mhe_addiction_np,(inputs.shape[0],1)) # 512,1024
#             mhe_adhd = np.tile(mhe_adhd_np,(inputs.shape[0],1)) # 512,1024             
#             mhe_alcoholism = np.tile(mhe_alcoholism_np,(inputs.shape[0],1)) # 512,1024
#             mhe_anxiety = np.tile(mhe_anxiety_np,(inputs.shape[0],1)) # 512,1024
#             mhe_bipolarreddit = np.tile(mhe_bipolarreddit_np,(inputs.shape[0],1)) # 512,1024  
#             mhe_bpd = np.tile(mhe_bpd_np,(inputs.shape[0],1)) # 512,1024
#             mhe_healthanxiety = np.tile(mhe_healthanxiety_np,(inputs.shape[0],1)) # 512,1024
#             mhe_lonely = np.tile(mhe_lonely_np,(inputs.shape[0],1)) # 512,1024             
#             mhe_schizophrenia = np.tile(mhe_schizophrenia_np,(inputs.shape[0],1)) # 512,1024
#             mhe_socialanxiety = np.tile(mhe_socialanxiety_np,(inputs.shape[0],1)) # 512,1024
#             mhe_suicidewatch = np.tile(mhe_suicidewatch_np,(inputs.shape[0],1)) # 512,1024            
            
#             mhe_depression = torch.tensor(mhe_depression, dtype=torch.float).cuda()
#             mhe_autism = torch.tensor(mhe_autism, dtype=torch.float).cuda()
#             mhe_ptsd = torch.tensor(mhe_ptsd, dtype=torch.float).cuda()
#             mhe_EDAnonymous = torch.tensor(mhe_EDAnonymous, dtype=torch.float).cuda()
#             mhe_addiction = torch.tensor(mhe_addiction, dtype=torch.float).cuda()
#             mhe_adhd = torch.tensor(mhe_adhd, dtype=torch.float).cuda()
#             mhe_alcoholism = torch.tensor(mhe_alcoholism, dtype=torch.float).cuda()
#             mhe_anxiety = torch.tensor(mhe_anxiety, dtype=torch.float).cuda()
#             mhe_bipolarreddit = torch.tensor(mhe_bipolarreddit, dtype=torch.float).cuda()   
#             mhe_bpd = torch.tensor(mhe_bpd, dtype=torch.float).cuda()
#             mhe_healthanxiety = torch.tensor(mhe_healthanxiety, dtype=torch.float).cuda()
#             mhe_lonely = torch.tensor(mhe_lonely, dtype=torch.float).cuda()
#             mhe_schizophrenia = torch.tensor(mhe_schizophrenia, dtype=torch.float).cuda()
#             mhe_socialanxiety = torch.tensor(mhe_socialanxiety, dtype=torch.float).cuda()
#             mhe_suicidewatch = torch.tensor(mhe_suicidewatch, dtype=torch.float).cuda()
  
#             mhe_total = torch.stack([mhe_depression, 
#                                      mhe_autism, 
#                                      mhe_ptsd,
#                                      mhe_EDAnonymous,
#                                      mhe_addiction,
#                                      mhe_adhd,
#                                      mhe_alcoholism,
#                                      mhe_anxiety,
#                                      mhe_bipolarreddit,
#                                      mhe_bpd,
#                                      mhe_healthanxiety,
#                                      mhe_lonely,
#                                      mhe_schizophrenia,
#                                      mhe_socialanxiety,
#                                      mhe_suicidewatch
#                                     ], dim=1)
#             # mhe_depression = torch.unsqueeze(mhe_depression, 1) # 512,1,1024
            
#             ## forward + backward + optimize
#             pred1 = net(inputs, mhe_total)
            
#             loss1_val = loss_fn(pred1, labels.unsqueeze(1))

#             avg_val_loss += loss1_val.item()
#             # (torch.argmax(y_pred, 1) == torch.argmax(y_test, 1)).float().mean()
            
#             valid_preds[j * BATCH_SIZE:(j+1) * BATCH_SIZE] = (pred1.squeeze().cpu().detach().numpy()>=0.5).astype(float)
#             # true_label[j * BATCH_SIZE:(j+1) * BATCH_SIZE]  = torch.argmax(labels, 1).cpu().detach().numpy()
#             true_label[j * BATCH_SIZE:(j+1) * BATCH_SIZE]  = labels.cpu().detach().numpy()
            
#         elapsed_time = time.time() - start_time 

#         print('Epoch {}/{} \t loss={:.4f}\t val_loss={:.4f} \t val_f1_score={:.4f} \t time={:.2f}s'.format(
#                         epoch+1, EPOCHS, avg_loss/len(train_loader),avg_val_loss/len(val_loader), f1_score(true_label, valid_preds, average='micro'), elapsed_time))
#         val_f1_epoch.append(f1_score(true_label, valid_preds, average='micro'))
        
#         ## inference
#         result = list()
#         with torch.no_grad():
#             for (x_batch,) in test_loader:
                
#                 mhe_depression = np.tile(mhe_depression_np,(x_batch.shape[0],1)) # 512,1024
#                 mhe_autism = np.tile(mhe_autism_np,(x_batch.shape[0],1)) # 512,1024
#                 mhe_ptsd = np.tile(mhe_ptsd_np,(x_batch.shape[0],1)) # 512,1024 
#                 mhe_EDAnonymous = np.tile(mhe_EDAnonymous_np,(x_batch.shape[0],1)) # 512,1024
#                 mhe_addiction = np.tile(mhe_addiction_np,(x_batch.shape[0],1)) # 512,1024
#                 mhe_adhd = np.tile(mhe_adhd_np,(x_batch.shape[0],1)) # 512,1024             
#                 mhe_alcoholism = np.tile(mhe_alcoholism_np,(x_batch.shape[0],1)) # 512,1024
#                 mhe_anxiety = np.tile(mhe_anxiety_np,(x_batch.shape[0],1)) # 512,1024
#                 mhe_bipolarreddit = np.tile(mhe_bipolarreddit_np,(x_batch.shape[0],1)) # 512,1024  
#                 mhe_bpd = np.tile(mhe_bpd_np,(x_batch.shape[0],1)) # 512,1024
#                 mhe_healthanxiety = np.tile(mhe_healthanxiety_np,(x_batch.shape[0],1)) # 512,1024
#                 mhe_lonely = np.tile(mhe_lonely_np,(x_batch.shape[0],1)) # 512,1024             
#                 mhe_schizophrenia = np.tile(mhe_schizophrenia_np,(x_batch.shape[0],1)) # 512,1024
#                 mhe_socialanxiety = np.tile(mhe_socialanxiety_np,(x_batch.shape[0],1)) # 512,1024
#                 mhe_suicidewatch = np.tile(mhe_suicidewatch_np,(x_batch.shape[0],1)) # 512,1024            

#                 mhe_depression = torch.tensor(mhe_depression, dtype=torch.float).cuda()
#                 mhe_autism = torch.tensor(mhe_autism, dtype=torch.float).cuda()
#                 mhe_ptsd = torch.tensor(mhe_ptsd, dtype=torch.float).cuda()
#                 mhe_EDAnonymous = torch.tensor(mhe_EDAnonymous, dtype=torch.float).cuda()
#                 mhe_addiction = torch.tensor(mhe_addiction, dtype=torch.float).cuda()
#                 mhe_adhd = torch.tensor(mhe_adhd, dtype=torch.float).cuda()
#                 mhe_alcoholism = torch.tensor(mhe_alcoholism, dtype=torch.float).cuda()
#                 mhe_anxiety = torch.tensor(mhe_anxiety, dtype=torch.float).cuda()
#                 mhe_bipolarreddit = torch.tensor(mhe_bipolarreddit, dtype=torch.float).cuda()   
#                 mhe_bpd = torch.tensor(mhe_bpd, dtype=torch.float).cuda()
#                 mhe_healthanxiety = torch.tensor(mhe_healthanxiety, dtype=torch.float).cuda()
#                 mhe_lonely = torch.tensor(mhe_lonely, dtype=torch.float).cuda()
#                 mhe_schizophrenia = torch.tensor(mhe_schizophrenia, dtype=torch.float).cuda()
#                 mhe_socialanxiety = torch.tensor(mhe_socialanxiety, dtype=torch.float).cuda()
#                 mhe_suicidewatch = torch.tensor(mhe_suicidewatch, dtype=torch.float).cuda()

#                 mhe_total = torch.stack([mhe_depression, 
#                                          mhe_autism, 
#                                          mhe_ptsd,
#                                          mhe_EDAnonymous,
#                                          mhe_addiction,
#                                          mhe_adhd,
#                                          mhe_alcoholism,
#                                          mhe_anxiety,
#                                          mhe_bipolarreddit,
#                                          mhe_bpd,
#                                          mhe_healthanxiety,
#                                          mhe_lonely,
#                                          mhe_schizophrenia,
#                                          mhe_socialanxiety,
#                                          mhe_suicidewatch
#                                         ], dim=1)
#                 y_pred = net(x_batch, mhe_total)
#                 y_pred = y_pred.cpu().detach().numpy()
#                 result.extend(y_pred)

#         test_checkpoint.append(result)
#         loss_checkpoint.append(avg_val_loss)
        
        
#     final_test.append(test_checkpoint[np.argmin(loss_checkpoint)])
#     val_f1_score.append(val_f1_epoch[np.argmin(loss_checkpoint)])
#     with open("final_test_{}".format(index), "wb") as fp:
#         pickle.dump(final_test, fp)



In [None]:
# oof = np.zeros(len(x_train)*NUM_MODEL)
final_test = list()
val_f1_score = list()

NFOLDS = 5
# folds = KFold(n_splits=NFOLDS, shuffle=True, random_state=SEED)

# for fold_, (trn_idx, val_idx) in enumerate(folds.split(x_train)):

skf = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=SEED)

for fold_, (trn_idx, val_idx) in enumerate(skf.split(x_train, y_train)):
        
    print("Fold: {}/{}".format(fold_ + 1, NFOLDS))
     
#     x_train_fold = x_train[trn_idx]
#     x_val = x_train[val_idx]
#     y_train_fold = y_train[trn_idx]
#     y_val = y_train[val_idx]
#     x_train_roberta = train_roberta[trn_idx]
#     x_val_roberta = train_roberta[val_idx]
    
#     x_train_fold = torch.tensor(x_train_fold, dtype=torch.long)#.cuda()
#     x_val = torch.tensor(x_val, dtype=torch.long)#.cuda()
#     y_train_fold = torch.tensor(y_train_fold, dtype=torch.float)#.cuda()
#     y_val = torch.tensor(y_val, dtype=torch.float)#.cuda()
#     x_train_roberta = torch.tensor(x_train_roberta, dtype=torch.float)#.cuda()
#     x_val_roberta = torch.tensor(x_val_roberta, dtype=torch.float)#.cuda()
#     x_train_fold = torch.tensor(x_train[trn_idx], dtype=torch.long)#.cuda()
#     x_val = torch.tensor(x_train[val_idx], dtype=torch.long)#.cuda()
#     y_train_fold = torch.tensor(y_train[trn_idx], dtype=torch.float)#.cuda()
#     y_val = torch.tensor(y_train[val_idx], dtype=torch.float)#.cuda()
#     x_train_roberta = torch.tensor(train_roberta[trn_idx], dtype=torch.float)#.cuda()
#     x_val_roberta = torch.tensor(train_roberta[val_idx], dtype=torch.float)#.cuda()
    
#     train_data = torch.utils.data.TensorDataset(x_train_fold, x_train_roberta, y_train_fold)
#     val_data = torch.utils.data.TensorDataset(x_val, x_val_roberta, y_val)
    train_data = torch.utils.data.TensorDataset(torch.tensor(x_train[trn_idx], dtype=torch.long), torch.tensor(train_roberta[trn_idx], dtype=torch.float), torch.tensor(y_train[trn_idx], dtype=torch.float))
    val_data = torch.utils.data.TensorDataset(torch.tensor(x_train[val_idx], dtype=torch.long), torch.tensor(train_roberta[val_idx], dtype=torch.float), torch.tensor(y_train[val_idx], dtype=torch.float))

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False)

#     del x_train_fold, x_val, y_train_fold, train_data, val_data, x_train_roberta, x_val_roberta
#     gc.collect()


    net = NeuralNet(crawl_matrix, 256, 1)
    net.cuda()
    loss_fn = torch.nn.BCELoss(reduction='mean')
    # loss_fn = nn.CrossEntropyLoss()
    # optimizer = torch.optim.Adam(net.parameters(), lr=0.002)
    optimizer = torch.optim.AdamW(params =  net.parameters(), lr=0.002, weight_decay=1e-7)

    test_checkpoint = list()
    loss_checkpoint = list()
    val_f1_epoch = list()
    
    for epoch in range(EPOCHS): 
        
        start_time = time.time()

        avg_loss = 0.0
        
        net.train()
        for i, data in enumerate(train_loader):
            
            # get the inputs
            inputs, inputs_roberta, labels = data
            inputs, inputs_roberta, labels = inputs.cuda(), inputs_roberta.cuda(), labels.cuda()
            
            mhe_depression = np.tile(mhe_depression_np,(inputs.shape[0],1)) # 512,1024
            mhe_autism = np.tile(mhe_autism_np,(inputs.shape[0],1)) # 512,1024
            mhe_ptsd = np.tile(mhe_ptsd_np,(inputs.shape[0],1)) # 512,1024 
            mhe_EDAnonymous = np.tile(mhe_EDAnonymous_np,(inputs.shape[0],1)) # 512,1024
            mhe_addiction = np.tile(mhe_addiction_np,(inputs.shape[0],1)) # 512,1024
            mhe_adhd = np.tile(mhe_adhd_np,(inputs.shape[0],1)) # 512,1024             
            mhe_alcoholism = np.tile(mhe_alcoholism_np,(inputs.shape[0],1)) # 512,1024
            mhe_anxiety = np.tile(mhe_anxiety_np,(inputs.shape[0],1)) # 512,1024
            mhe_bipolarreddit = np.tile(mhe_bipolarreddit_np,(inputs.shape[0],1)) # 512,1024  
            mhe_bpd = np.tile(mhe_bpd_np,(inputs.shape[0],1)) # 512,1024
            mhe_healthanxiety = np.tile(mhe_healthanxiety_np,(inputs.shape[0],1)) # 512,1024
            mhe_lonely = np.tile(mhe_lonely_np,(inputs.shape[0],1)) # 512,1024             
            mhe_schizophrenia = np.tile(mhe_schizophrenia_np,(inputs.shape[0],1)) # 512,1024
            mhe_socialanxiety = np.tile(mhe_socialanxiety_np,(inputs.shape[0],1)) # 512,1024
            mhe_suicidewatch = np.tile(mhe_suicidewatch_np,(inputs.shape[0],1)) # 512,1024            
            
            mhe_depression = torch.tensor(mhe_depression, dtype=torch.float).cuda()
            mhe_autism = torch.tensor(mhe_autism, dtype=torch.float).cuda()
            mhe_ptsd = torch.tensor(mhe_ptsd, dtype=torch.float).cuda()
            mhe_EDAnonymous = torch.tensor(mhe_EDAnonymous, dtype=torch.float).cuda()
            mhe_addiction = torch.tensor(mhe_addiction, dtype=torch.float).cuda()
            mhe_adhd = torch.tensor(mhe_adhd, dtype=torch.float).cuda()
            mhe_alcoholism = torch.tensor(mhe_alcoholism, dtype=torch.float).cuda()
            mhe_anxiety = torch.tensor(mhe_anxiety, dtype=torch.float).cuda()
            mhe_bipolarreddit = torch.tensor(mhe_bipolarreddit, dtype=torch.float).cuda()   
            mhe_bpd = torch.tensor(mhe_bpd, dtype=torch.float).cuda()
            mhe_healthanxiety = torch.tensor(mhe_healthanxiety, dtype=torch.float).cuda()
            mhe_lonely = torch.tensor(mhe_lonely, dtype=torch.float).cuda()
            mhe_schizophrenia = torch.tensor(mhe_schizophrenia, dtype=torch.float).cuda()
            mhe_socialanxiety = torch.tensor(mhe_socialanxiety, dtype=torch.float).cuda()
            mhe_suicidewatch = torch.tensor(mhe_suicidewatch, dtype=torch.float).cuda()
  
            mhe_total = torch.stack([mhe_depression, 
                                     mhe_autism, 
                                     mhe_ptsd,
                                     mhe_EDAnonymous,
                                     mhe_addiction,
                                     mhe_adhd,
                                     mhe_alcoholism,
                                     mhe_anxiety,
                                     mhe_bipolarreddit,
                                     mhe_bpd,
                                     mhe_healthanxiety,
                                     mhe_lonely,
                                     mhe_schizophrenia,
                                     mhe_socialanxiety,
                                     mhe_suicidewatch
                                    ], dim=1)
#             mhe_total = torch.unsqueeze(mhe_depression, 1) # 512,1,1024
#             del mhe_depression, mhe_autism, mhe_ptsd, mhe_EDAnonymous, mhe_addiction, mhe_adhd, mhe_alcoholism, mhe_anxiety, mhe_bipolarreddit, mhe_bpd, mhe_healthanxiety, mhe_lonely, mhe_schizophrenia, mhe_socialanxiety, mhe_suicidewatch
#             gc.collect()
        
            ## forward + backward + optimize
            pred1 = net(inputs, inputs_roberta, mhe_total)
            
            loss1 = loss_fn(pred1, labels.unsqueeze(1))
            # loss2 = loss_fn(pred2,label2)
            # loss = loss1*loss_weight+loss2
            # loss = loss1
           
            # zero the parameter gradients
            optimizer.zero_grad()

            loss1.backward()
            optimizer.step()

            avg_loss += loss1.item()

        net.eval()
        
        valid_preds = np.zeros((len(val_idx),))
        true_label = np.zeros((len(val_idx),))

        avg_val_loss = 0.0

        for j, data in enumerate(val_loader):
            
            # get the inputs
            inputs, inputs_roberta, labels = data
            inputs, inputs_roberta, labels = inputs.cuda(), inputs_roberta.cuda(), labels.cuda()
            
            mhe_depression = np.tile(mhe_depression_np,(inputs.shape[0],1)) # 512,1024
            mhe_autism = np.tile(mhe_autism_np,(inputs.shape[0],1)) # 512,1024
            mhe_ptsd = np.tile(mhe_ptsd_np,(inputs.shape[0],1)) # 512,1024 
            mhe_EDAnonymous = np.tile(mhe_EDAnonymous_np,(inputs.shape[0],1)) # 512,1024
            mhe_addiction = np.tile(mhe_addiction_np,(inputs.shape[0],1)) # 512,1024
            mhe_adhd = np.tile(mhe_adhd_np,(inputs.shape[0],1)) # 512,1024             
            mhe_alcoholism = np.tile(mhe_alcoholism_np,(inputs.shape[0],1)) # 512,1024
            mhe_anxiety = np.tile(mhe_anxiety_np,(inputs.shape[0],1)) # 512,1024
            mhe_bipolarreddit = np.tile(mhe_bipolarreddit_np,(inputs.shape[0],1)) # 512,1024  
            mhe_bpd = np.tile(mhe_bpd_np,(inputs.shape[0],1)) # 512,1024
            mhe_healthanxiety = np.tile(mhe_healthanxiety_np,(inputs.shape[0],1)) # 512,1024
            mhe_lonely = np.tile(mhe_lonely_np,(inputs.shape[0],1)) # 512,1024             
            mhe_schizophrenia = np.tile(mhe_schizophrenia_np,(inputs.shape[0],1)) # 512,1024
            mhe_socialanxiety = np.tile(mhe_socialanxiety_np,(inputs.shape[0],1)) # 512,1024
            mhe_suicidewatch = np.tile(mhe_suicidewatch_np,(inputs.shape[0],1)) # 512,1024            
            
            mhe_depression = torch.tensor(mhe_depression, dtype=torch.float).cuda()
            mhe_autism = torch.tensor(mhe_autism, dtype=torch.float).cuda()
            mhe_ptsd = torch.tensor(mhe_ptsd, dtype=torch.float).cuda()
            mhe_EDAnonymous = torch.tensor(mhe_EDAnonymous, dtype=torch.float).cuda()
            mhe_addiction = torch.tensor(mhe_addiction, dtype=torch.float).cuda()
            mhe_adhd = torch.tensor(mhe_adhd, dtype=torch.float).cuda()
            mhe_alcoholism = torch.tensor(mhe_alcoholism, dtype=torch.float).cuda()
            mhe_anxiety = torch.tensor(mhe_anxiety, dtype=torch.float).cuda()
            mhe_bipolarreddit = torch.tensor(mhe_bipolarreddit, dtype=torch.float).cuda()   
            mhe_bpd = torch.tensor(mhe_bpd, dtype=torch.float).cuda()
            mhe_healthanxiety = torch.tensor(mhe_healthanxiety, dtype=torch.float).cuda()
            mhe_lonely = torch.tensor(mhe_lonely, dtype=torch.float).cuda()
            mhe_schizophrenia = torch.tensor(mhe_schizophrenia, dtype=torch.float).cuda()
            mhe_socialanxiety = torch.tensor(mhe_socialanxiety, dtype=torch.float).cuda()
            mhe_suicidewatch = torch.tensor(mhe_suicidewatch, dtype=torch.float).cuda()
  
            mhe_total = torch.stack([mhe_depression, 
                                     mhe_autism, 
                                     mhe_ptsd,
                                     mhe_EDAnonymous,
                                     mhe_addiction,
                                     mhe_adhd,
                                     mhe_alcoholism,
                                     mhe_anxiety,
                                     mhe_bipolarreddit,
                                     mhe_bpd,
                                     mhe_healthanxiety,
                                     mhe_lonely,
                                     mhe_schizophrenia,
                                     mhe_socialanxiety,
                                     mhe_suicidewatch
                                    ], dim=1)
#             mhe_total = torch.unsqueeze(mhe_depression, 1) # 512,1,1024
#             del mhe_depression, mhe_autism, mhe_ptsd, mhe_EDAnonymous, mhe_addiction, mhe_adhd, mhe_alcoholism, mhe_anxiety, mhe_bipolarreddit, mhe_bpd, mhe_healthanxiety, mhe_lonely, mhe_schizophrenia, mhe_socialanxiety, mhe_suicidewatch
#             gc.collect()
    
            ## forward + backward + optimize
            pred1 = net(inputs, inputs_roberta, mhe_total)
    
            loss1_val = loss_fn(pred1, labels.unsqueeze(1))

            avg_val_loss += loss1_val.item()
            # (torch.argmax(y_pred, 1) == torch.argmax(y_test, 1)).float().mean()
            
            valid_preds[j * BATCH_SIZE:(j+1) * BATCH_SIZE] = (pred1.squeeze().cpu().detach().numpy()>=0.5).astype(float)
            # true_label[j * BATCH_SIZE:(j+1) * BATCH_SIZE]  = torch.argmax(labels, 1).cpu().detach().numpy()
            true_label[j * BATCH_SIZE:(j+1) * BATCH_SIZE]  = labels.cpu().detach().numpy()
            
        elapsed_time = time.time() - start_time 

        print('Epoch {}/{} \t loss={:.4f}\t val_loss={:.4f} \t val_f1_score={:.4f} \t time={:.2f}s'.format(
                        epoch+1, EPOCHS, avg_loss/len(train_loader),avg_val_loss/len(val_loader), f1_score(true_label, valid_preds, average='micro'), elapsed_time))
        val_f1_epoch.append(f1_score(true_label, valid_preds, average='micro'))
        
        ## inference
        result = list()
        with torch.no_grad():
            for (x_batch, inputs_roberta, ) in test_loader:
                x_batch, inputs_roberta = x_batch.cuda(), inputs_roberta.cuda()
                
                mhe_depression = np.tile(mhe_depression_np,(x_batch.shape[0],1)) # 512,1024
                mhe_autism = np.tile(mhe_autism_np,(x_batch.shape[0],1)) # 512,1024
                mhe_ptsd = np.tile(mhe_ptsd_np,(x_batch.shape[0],1)) # 512,1024 
                mhe_EDAnonymous = np.tile(mhe_EDAnonymous_np,(x_batch.shape[0],1)) # 512,1024
                mhe_addiction = np.tile(mhe_addiction_np,(x_batch.shape[0],1)) # 512,1024
                mhe_adhd = np.tile(mhe_adhd_np,(x_batch.shape[0],1)) # 512,1024             
                mhe_alcoholism = np.tile(mhe_alcoholism_np,(x_batch.shape[0],1)) # 512,1024
                mhe_anxiety = np.tile(mhe_anxiety_np,(x_batch.shape[0],1)) # 512,1024
                mhe_bipolarreddit = np.tile(mhe_bipolarreddit_np,(x_batch.shape[0],1)) # 512,1024  
                mhe_bpd = np.tile(mhe_bpd_np,(x_batch.shape[0],1)) # 512,1024
                mhe_healthanxiety = np.tile(mhe_healthanxiety_np,(x_batch.shape[0],1)) # 512,1024
                mhe_lonely = np.tile(mhe_lonely_np,(x_batch.shape[0],1)) # 512,1024             
                mhe_schizophrenia = np.tile(mhe_schizophrenia_np,(x_batch.shape[0],1)) # 512,1024
                mhe_socialanxiety = np.tile(mhe_socialanxiety_np,(x_batch.shape[0],1)) # 512,1024
                mhe_suicidewatch = np.tile(mhe_suicidewatch_np,(x_batch.shape[0],1)) # 512,1024            

                mhe_depression = torch.tensor(mhe_depression, dtype=torch.float).cuda()
                mhe_autism = torch.tensor(mhe_autism, dtype=torch.float).cuda()
                mhe_ptsd = torch.tensor(mhe_ptsd, dtype=torch.float).cuda()
                mhe_EDAnonymous = torch.tensor(mhe_EDAnonymous, dtype=torch.float).cuda()
                mhe_addiction = torch.tensor(mhe_addiction, dtype=torch.float).cuda()
                mhe_adhd = torch.tensor(mhe_adhd, dtype=torch.float).cuda()
                mhe_alcoholism = torch.tensor(mhe_alcoholism, dtype=torch.float).cuda()
                mhe_anxiety = torch.tensor(mhe_anxiety, dtype=torch.float).cuda()
                mhe_bipolarreddit = torch.tensor(mhe_bipolarreddit, dtype=torch.float).cuda()   
                mhe_bpd = torch.tensor(mhe_bpd, dtype=torch.float).cuda()
                mhe_healthanxiety = torch.tensor(mhe_healthanxiety, dtype=torch.float).cuda()
                mhe_lonely = torch.tensor(mhe_lonely, dtype=torch.float).cuda()
                mhe_schizophrenia = torch.tensor(mhe_schizophrenia, dtype=torch.float).cuda()
                mhe_socialanxiety = torch.tensor(mhe_socialanxiety, dtype=torch.float).cuda()
                mhe_suicidewatch = torch.tensor(mhe_suicidewatch, dtype=torch.float).cuda()

                mhe_total = torch.stack([mhe_depression, 
                                         mhe_autism, 
                                         mhe_ptsd,
                                         mhe_EDAnonymous,
                                         mhe_addiction,
                                         mhe_adhd,
                                         mhe_alcoholism,
                                         mhe_anxiety,
                                         mhe_bipolarreddit,
                                         mhe_bpd,
                                         mhe_healthanxiety,
                                         mhe_lonely,
                                         mhe_schizophrenia,
                                         mhe_socialanxiety,
                                         mhe_suicidewatch
                                        ], dim=1)
#                 mhe_total = torch.unsqueeze(mhe_depression, 1) # 512,1,1024
#                 del mhe_depression, mhe_autism, mhe_ptsd, mhe_EDAnonymous, mhe_addiction, mhe_adhd, mhe_alcoholism, mhe_anxiety, mhe_bipolarreddit, mhe_bpd, mhe_healthanxiety, mhe_lonely, mhe_schizophrenia, mhe_socialanxiety, mhe_suicidewatch
#                 gc.collect()
        
                y_pred = net(x_batch, inputs_roberta, mhe_total)
                
                y_pred = y_pred.cpu().detach().numpy()
                result.extend(y_pred)

        test_checkpoint.append(result)
        loss_checkpoint.append(avg_val_loss)
        
        
    final_test.append(test_checkpoint[np.argmin(loss_checkpoint)])
    val_f1_score.append(val_f1_epoch[np.argmin(loss_checkpoint)])
    with open("final_test_{}".format(fold_), "wb") as fp: 
        pickle.dump(final_test, fp)



In [None]:
# # oof = np.zeros(len(x_train)*NUM_MODEL)
# final_test = list()
# val_f1_score = list()

# NFOLDS = 5
# # folds = KFold(n_splits=NFOLDS, shuffle=True, random_state=SEED)

# # for fold_, (trn_idx, val_idx) in enumerate(folds.split(x_train)):

# skf = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=SEED)

# for fold_, (trn_idx, val_idx) in enumerate(skf.split(x_train, y_train)):
        
#     print("Fold: {}/{}".format(fold_ + 1, NFOLDS))
     
#     x_train_fold = x_train[trn_idx]
#     x_val = x_train[val_idx]
#     y_train_fold = y_train[trn_idx]
#     y_val = y_train[val_idx]
    
#     x_train_fold = torch.tensor(x_train_fold, dtype=torch.long).cuda()
#     x_val = torch.tensor(x_val, dtype=torch.long).cuda()
#     y_train_fold = torch.tensor(y_train_fold, dtype=torch.float).cuda()
#     y_val = torch.tensor(y_val, dtype=torch.float).cuda()
    
#     train_data = torch.utils.data.TensorDataset(x_train_fold, y_train_fold)
#     val_data = torch.utils.data.TensorDataset(x_val, y_val)

#     train_loader = torch.utils.data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
#     val_loader = torch.utils.data.DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False)

#     del x_train_fold, x_val, y_train_fold, train_data, val_data
#     gc.collect()


#     net = NeuralNet(crawl_matrix, 256, 1)
#     net.cuda()
#     loss_fn = torch.nn.BCELoss(reduction='mean')
#     # loss_fn = nn.CrossEntropyLoss()
#     # optimizer = torch.optim.Adam(net.parameters(), lr=0.002)
#     optimizer = torch.optim.AdamW(params =  net.parameters(), lr=0.002, weight_decay=1e-7)

#     test_checkpoint = list()
#     loss_checkpoint = list()
#     val_f1_epoch = list()
    
#     for epoch in range(EPOCHS): 
        
#         start_time = time.time()

#         avg_loss = 0.0
        
#         net.train()
#         for i, data in enumerate(train_loader):
            
#             # get the inputs
#             inputs, labels = data
            
#             mhe_depression = np.tile(mhe_depression_np,(inputs.shape[0],1)) # 512,1024
#             mhe_autism = np.tile(mhe_autism_np,(inputs.shape[0],1)) # 512,1024
#             mhe_ptsd = np.tile(mhe_ptsd_np,(inputs.shape[0],1)) # 512,1024 
            
#             mhe_depression = torch.tensor(mhe_depression, dtype=torch.float).cuda()
#             mhe_autism = torch.tensor(mhe_autism, dtype=torch.float).cuda()
#             mhe_ptsd = torch.tensor(mhe_ptsd, dtype=torch.float).cuda()
            
#             mhe_total = torch.stack([mhe_depression, mhe_autism, mhe_ptsd], dim=1)
#             # mhe_depression = torch.unsqueeze(mhe_depression, 1) # 512,1,1024
            
#             ## forward + backward + optimize
#             pred1 = net(inputs, mhe_total)
            
#             loss1 = loss_fn(pred1, labels.unsqueeze(1))
#             # loss2 = loss_fn(pred2,label2)
#             # loss = loss1*loss_weight+loss2
#             # loss = loss1
           
#             # zero the parameter gradients
#             optimizer.zero_grad()

#             loss1.backward()
#             optimizer.step()

#             avg_loss += loss1.item()

#         net.eval()
        
#         valid_preds = np.zeros((len(y_val),))
#         true_label = np.zeros((len(y_val),))

#         avg_val_loss = 0.0

#         for j, data in enumerate(val_loader):
            
#             # get the inputs
#             inputs, labels = data
            
#             mhe_depression = np.tile(mhe_depression_np,(inputs.shape[0],1)) # 512,1024
#             mhe_autism = np.tile(mhe_autism_np,(inputs.shape[0],1)) # 512,1024
#             mhe_ptsd = np.tile(mhe_ptsd_np,(inputs.shape[0],1)) # 512,1024 
            
#             mhe_depression = torch.tensor(mhe_depression, dtype=torch.float).cuda()
#             mhe_autism = torch.tensor(mhe_autism, dtype=torch.float).cuda()
#             mhe_ptsd = torch.tensor(mhe_ptsd, dtype=torch.float).cuda()
            
#             mhe_total = torch.stack([mhe_depression, mhe_autism, mhe_ptsd], dim=1)
#             # mhe_depression = torch.unsqueeze(mhe_depression, 1) # 512,1,1024
            
#             ## forward + backward + optimize
#             pred1 = net(inputs, mhe_total)
            
#             loss1_val = loss_fn(pred1, labels.unsqueeze(1))

#             avg_val_loss += loss1_val.item()
#             # (torch.argmax(y_pred, 1) == torch.argmax(y_test, 1)).float().mean()
            
#             valid_preds[j * BATCH_SIZE:(j+1) * BATCH_SIZE] = (pred1.squeeze().cpu().detach().numpy()>=0.5).astype(float)
#             # true_label[j * BATCH_SIZE:(j+1) * BATCH_SIZE]  = torch.argmax(labels, 1).cpu().detach().numpy()
#             true_label[j * BATCH_SIZE:(j+1) * BATCH_SIZE]  = labels.cpu().detach().numpy()
            
#         elapsed_time = time.time() - start_time 

#         print('Epoch {}/{} \t loss={:.4f}\t val_loss={:.4f} \t val_f1_score={:.4f} \t time={:.2f}s'.format(
#                         epoch+1, EPOCHS, avg_loss/len(train_loader),avg_val_loss/len(val_loader), f1_score(true_label, valid_preds, average='micro'), elapsed_time))
#         val_f1_epoch.append(f1_score(true_label, valid_preds, average='micro'))
        
#         ## inference
#         result = list()
#         with torch.no_grad():
#             for (x_batch,) in test_loader:
                
#                 mhe_depression = np.tile(mhe_depression_np,(x_batch.shape[0],1)) # 512,1024
#                 mhe_autism = np.tile(mhe_autism_np,(x_batch.shape[0],1)) # 512,1024
#                 mhe_ptsd = np.tile(mhe_ptsd_np,(x_batch.shape[0],1)) # 512,1024 

#                 mhe_depression = torch.tensor(mhe_depression, dtype=torch.float).cuda()
#                 mhe_autism = torch.tensor(mhe_autism, dtype=torch.float).cuda()
#                 mhe_ptsd = torch.tensor(mhe_ptsd, dtype=torch.float).cuda()

#                 mhe_total = torch.stack([mhe_depression, mhe_autism, mhe_ptsd], dim=1)
            
#                 y_pred = net(x_batch, mhe_total)
#                 y_pred = y_pred.cpu().detach().numpy()
#                 result.extend(y_pred)

#         test_checkpoint.append(result)
#         loss_checkpoint.append(avg_val_loss)
        
        
#     final_test.append(test_checkpoint[np.argmin(loss_checkpoint)])
#     val_f1_score.append(val_f1_epoch[np.argmin(loss_checkpoint)])
#     with open("final_test_{}".format(fold_), "wb") as fp: 
#         pickle.dump(final_test, fp)



In [None]:
print('mean val f1 score:', np.mean(val_f1_score))

In [None]:
# class callback:
#     def __init__(self):
#         self.score = list()
#         self.model = list()
#         self.data = list()
    
#     def put(self, model,data, score):
#         self.score.append(score)
#         self.model.append(model)
#         self.data.append(data)

#     def get_model(self):
#         ind = np.argmin(self.score)
#         return self.model[ind]
#     def get_data(self):
#         ind = np.argmin(self.score)
#         return self.data[ind]

In [None]:
from sklearn.metrics import f1_score, recall_score, precision_score

def threshold_search_fold(y_true, y_proba):

    binary_best_threshold = 0
    binary_best_score = 0
    
    for threshold in tqdm([i * 0.01 for i in range(100)], disable=True):
 
        binary_score = f1_score(y_true, np.where(y_proba>=threshold , 1 ,0), average='micro')
        if binary_score > binary_best_score:
            binary_best_threshold = threshold
            binary_best_score = binary_score
            
    recall = recall_score(y_true, np.where(y_proba>=binary_best_threshold , 1 ,0), average='micro')
    precission = precision_score(y_true, np.where(y_proba>=binary_best_threshold , 1 ,0), average='micro')
    print('best_threshold_recall:', recall)
    print('best_threshold_precision:', precission)
    
    search_result = {'f1_binary_threshold': binary_best_threshold, 'f1_binary': binary_best_score,}
    return search_result

In [None]:
predicted_prob = np.mean(final_test, axis=0)

In [None]:
search_resutls = threshold_search_fold(test_df.label.values, predicted_prob)
search_resutls

In [None]:
predicted_prob = (predicted_prob>=0.5).astype(float)

In [None]:
f1_score(test_df.label.values, predicted_prob, average='micro')