In [4]:
import re
import numpy as np
import pandas as pd
import preprocess_kgptalkie as ps

In [5]:
df = pd.read_csv("suicide_dataset.csv")

In [6]:
print(df.head())

                                               tweet  intention
0  my life is meaningless i just want to end my l...          1
1  muttering i wanna die to myself daily for a fe...          1
2  work slave i really feel like my only purpose ...          1
3  i did something on the 2 of october i overdose...          1
4  i feel like no one cares i just want to die ma...          1


In [7]:
def get_clean(x):
    x = str(x).lower().replace('\\', '').replace('_', ' ')
    x = ps.cont_exp(x)
    x = ps.remove_emails(x)
    x = ps.remove_urls(x)
    x = ps.remove_html_tags(x)
    x = ps.remove_rt(x)
    x = ps.remove_accented_chars(x)
    x = ps.remove_special_chars(x)
    x = re.sub("(.)\\1{2,}", "\\1", x)
    return x

In [8]:
df['tweet'] = df['tweet'].apply(lambda x: get_clean(x))

In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.svm import LinearSVC

In [10]:
tfidf = TfidfVectorizer(max_features=20000, ngram_range=(1,3), analyzer='char')

In [11]:
X = tfidf.fit_transform(df['tweet'])
y = df['intention']

In [12]:
x_train, x_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)

In [13]:
model = LinearSVC()
model.fit(x_train,y_train)

LinearSVC()

In [14]:
y_pred = model.predict(x_test)

In [15]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.94      0.93      0.93      1060
           1       0.91      0.91      0.91       764

    accuracy                           0.92      1824
   macro avg       0.92      0.92      0.92      1824
weighted avg       0.92      0.92      0.92      1824



In [16]:
t = "My whole life has just been mistakes and failings. When I look back on my past it’s all I see. When I look toward the future it’s all I see. I truly am an idiot and a fuckup. I am often called a loser or other such things. No use in disagreeing with the truth. Things could have been different if I had used my brain, but I’m clearly too stupid for that. I wish I could be saved, to be happy, and for others to be happy too."
u = "I had an amazing day. I got a 100$ note on the way to subway."

In [17]:
def predict_tendency(x):
    x = get_clean(x)
    vec = tfidf.transform([x])
    if model.predict(vec)[0] == 1:
        print("suicidal")
    else:
        print("fine")

In [18]:
predict_tendency(t)
predict_tendency(u)

suicidal
fine


In [19]:
#Saving Model
import joblib
pkl_file = open("SuicideModel.pkl","wb")
joblib.dump(model,pkl_file)
pkl_file.close()

In [20]:
data = joblib.load('SuicideModel.pkl')