In [1]:
import pandas as pd
import numpy as np
import re
import string

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer, PorterStemmer

from textblob import TextBlob
from sklearn.model_selection import train_test_split

In [2]:
df=pd.read_csv(r"C:\Users\Shuhaib\Downloads\drugsComTest_raw.csv")
df

Unnamed: 0,uniqueID,drugName,condition,review,rating,date,usefulCount
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",10,28-Feb-12,22
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",8,17-May-09,17
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",9,29-Sep-17,3
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",9,5-Mar-17,35
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",9,22-Oct-15,4
...,...,...,...,...,...,...,...
53761,159999,Tamoxifen,"Breast Cancer, Prevention","""I have taken Tamoxifen for 5 years. Side effe...",10,13-Sep-14,43
53762,140714,Escitalopram,Anxiety,"""I&#039;ve been taking Lexapro (escitaploprgra...",9,8-Oct-16,11
53763,130945,Levonorgestrel,Birth Control,"""I&#039;m married, 34 years old and I have no ...",8,15-Nov-10,7
53764,47656,Tapentadol,Pain,"""I was prescribed Nucynta for severe neck/shou...",1,28-Nov-11,20


In [3]:
df.isnull().sum()

uniqueID         0
drugName         0
condition      295
review           0
rating           0
date             0
usefulCount      0
dtype: int64

In [4]:
for i in df:
    print(i)
    print(df[i].unique())
    print(df[i].nunique())

uniqueID
[163740 206473 159672 ... 130945  47656 113712]
53766
drugName
['Mirtazapine' 'Mesalamine' 'Bactrim' ... 'Guarana' 'Maprotiline'
 'FluMist']
2637
condition
['Depression' "Crohn's Disease, Maintenance" 'Urinary Tract Infection'
 'Weight Loss' 'Birth Control' 'Keratosis' 'Migraine Prevention'
 'Opiate Withdrawal' 'Hot Flashes' 'Vaginal Yeast Infection'
 'Schizoaffective Disorde' 'Insomnia' 'Narcolepsy' 'Smoking Cessation'
 'Acne' 'Bipolar Disorde' 'Irritable Bowel Syndrome'
 'Keratoconjunctivitis Sicca' 'Hyperhidrosis' 'Panic Disorde'
 'Onychomycosis, Fingernail' 'Rosacea' 'Bowel Preparation'
 'Constipation, Drug Induced' nan 'Diabetes, Type 2' 'Pain'
 'Lipodystrophy' 'Alcohol Dependence' 'Emergency Contraception'
 'Major Depressive Disorde' 'Anxiety' 'Bladder Infection'
 'Benign Prostatic Hyperplasia' 'Rheumatoid Arthritis' 'Endometriosis'
 'ADHD' 'Restless Legs Syndrome' 'Conjunctivitis, Allergic'
 "Raynaud's Syndrome" 'Schizophrenia' 'Diabetes, Type 1'
 'Kidney Infections' 'G

In [5]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Shuhaib\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Shuhaib\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Shuhaib\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [6]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\d+', '', text)  # remove numbers
    text = text.translate(str.maketrans('', '', string.punctuation))  # punctuation
    text = text.strip()
    return text

df['clean_review'] = df['review'].astype(str).apply(clean_text)


In [7]:
df

Unnamed: 0,uniqueID,drugName,condition,review,rating,date,usefulCount,clean_review
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",10,28-Feb-12,22,ive tried a few antidepressants over the years...
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",8,17-May-09,17,my son has crohns disease and has done very we...
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",9,29-Sep-17,3,quick reduction of symptoms
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",9,5-Mar-17,35,contrave combines drugs that were used for alc...
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",9,22-Oct-15,4,i have been on this birth control for one cycl...
...,...,...,...,...,...,...,...,...
53761,159999,Tamoxifen,"Breast Cancer, Prevention","""I have taken Tamoxifen for 5 years. Side effe...",10,13-Sep-14,43,i have taken tamoxifen for years side effects...
53762,140714,Escitalopram,Anxiety,"""I&#039;ve been taking Lexapro (escitaploprgra...",9,8-Oct-16,11,ive been taking lexapro escitaploprgram since ...
53763,130945,Levonorgestrel,Birth Control,"""I&#039;m married, 34 years old and I have no ...",8,15-Nov-10,7,im married years old and i have no kids takin...
53764,47656,Tapentadol,Pain,"""I was prescribed Nucynta for severe neck/shou...",1,28-Nov-11,20,i was prescribed nucynta for severe neckshould...


In [8]:
df['tokens'] = df['clean_review'].apply(word_tokenize)
df


Unnamed: 0,uniqueID,drugName,condition,review,rating,date,usefulCount,clean_review,tokens
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",10,28-Feb-12,22,ive tried a few antidepressants over the years...,"[ive, tried, a, few, antidepressants, over, th..."
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",8,17-May-09,17,my son has crohns disease and has done very we...,"[my, son, has, crohns, disease, and, has, done..."
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",9,29-Sep-17,3,quick reduction of symptoms,"[quick, reduction, of, symptoms]"
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",9,5-Mar-17,35,contrave combines drugs that were used for alc...,"[contrave, combines, drugs, that, were, used, ..."
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",9,22-Oct-15,4,i have been on this birth control for one cycl...,"[i, have, been, on, this, birth, control, for,..."
...,...,...,...,...,...,...,...,...,...
53761,159999,Tamoxifen,"Breast Cancer, Prevention","""I have taken Tamoxifen for 5 years. Side effe...",10,13-Sep-14,43,i have taken tamoxifen for years side effects...,"[i, have, taken, tamoxifen, for, years, side, ..."
53762,140714,Escitalopram,Anxiety,"""I&#039;ve been taking Lexapro (escitaploprgra...",9,8-Oct-16,11,ive been taking lexapro escitaploprgram since ...,"[ive, been, taking, lexapro, escitaploprgram, ..."
53763,130945,Levonorgestrel,Birth Control,"""I&#039;m married, 34 years old and I have no ...",8,15-Nov-10,7,im married years old and i have no kids takin...,"[im, married, years, old, and, i, have, no, ki..."
53764,47656,Tapentadol,Pain,"""I was prescribed Nucynta for severe neck/shou...",1,28-Nov-11,20,i was prescribed nucynta for severe neckshould...,"[i, was, prescribed, nucynta, for, severe, nec..."


In [9]:
stop_words = set(stopwords.words('english'))

df['tokens_no_stopwords'] = df['tokens'].apply(
    lambda x: [word for word in x if word not in stop_words]
)


In [10]:
lemmatizer = WordNetLemmatizer()

df['lemmatized'] = df['tokens_no_stopwords'].apply(
    lambda x: [lemmatizer.lemmatize(word) for word in x]
)


In [11]:
stemmer = PorterStemmer()

df['stemmed'] = df['lemmatized'].apply(
    lambda x: [stemmer.stem(word) for word in x]
)


In [12]:
df['final_clean_review'] = df['stemmed'].apply(lambda x: ' '.join(x))


In [13]:
def get_sentiment(text):
    return TextBlob(text).sentiment.polarity

df['sentiment_score'] = df['final_clean_review'].apply(get_sentiment)


In [14]:
def sentiment_label(score):
    if score > 0:
        return 'Positive'
    elif score < 0:
        return 'Negative'
    else:
        return 'Neutral'

df['sentiment'] = df['sentiment_score'].apply(sentiment_label)
df

Unnamed: 0,uniqueID,drugName,condition,review,rating,date,usefulCount,clean_review,tokens,tokens_no_stopwords,lemmatized,stemmed,final_clean_review,sentiment_score,sentiment
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",10,28-Feb-12,22,ive tried a few antidepressants over the years...,"[ive, tried, a, few, antidepressants, over, th...","[ive, tried, antidepressants, years, citalopra...","[ive, tried, antidepressant, year, citalopram,...","[ive, tri, antidepress, year, citalopram, fluo...",ive tri antidepress year citalopram fluoxetin ...,-0.122222,Negative
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",8,17-May-09,17,my son has crohns disease and has done very we...,"[my, son, has, crohns, disease, and, has, done...","[son, crohns, disease, done, well, asacol, com...","[son, crohn, disease, done, well, asacol, comp...","[son, crohn, diseas, done, well, asacol, compl...",son crohn diseas done well asacol complaint sh...,-0.300000,Negative
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",9,29-Sep-17,3,quick reduction of symptoms,"[quick, reduction, of, symptoms]","[quick, reduction, symptoms]","[quick, reduction, symptom]","[quick, reduct, symptom]",quick reduct symptom,0.333333,Positive
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",9,5-Mar-17,35,contrave combines drugs that were used for alc...,"[contrave, combines, drugs, that, were, used, ...","[contrave, combines, drugs, used, alcohol, smo...","[contrave, combine, drug, used, alcohol, smoki...","[contrav, combin, drug, use, alcohol, smoke, o...",contrav combin drug use alcohol smoke opioid c...,0.700000,Positive
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",9,22-Oct-15,4,i have been on this birth control for one cycl...,"[i, have, been, on, this, birth, control, for,...","[birth, control, one, cycle, reading, reviews,...","[birth, control, one, cycle, reading, review, ...","[birth, control, one, cycl, read, review, type...",birth control one cycl read review type simila...,0.208974,Positive
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53761,159999,Tamoxifen,"Breast Cancer, Prevention","""I have taken Tamoxifen for 5 years. Side effe...",10,13-Sep-14,43,i have taken tamoxifen for years side effects...,"[i, have, taken, tamoxifen, for, years, side, ...","[taken, tamoxifen, years, side, effects, sever...","[taken, tamoxifen, year, side, effect, severe,...","[taken, tamoxifen, year, side, effect, sever, ...",taken tamoxifen year side effect sever sweat d...,-0.078571,Negative
53762,140714,Escitalopram,Anxiety,"""I&#039;ve been taking Lexapro (escitaploprgra...",9,8-Oct-16,11,ive been taking lexapro escitaploprgram since ...,"[ive, been, taking, lexapro, escitaploprgram, ...","[ive, taking, lexapro, escitaploprgram, since,...","[ive, taking, lexapro, escitaploprgram, since,...","[ive, take, lexapro, escitaploprgram, sinc, fe...",ive take lexapro escitaploprgram sinc februari...,0.069048,Positive
53763,130945,Levonorgestrel,Birth Control,"""I&#039;m married, 34 years old and I have no ...",8,15-Nov-10,7,im married years old and i have no kids takin...,"[im, married, years, old, and, i, have, no, ki...","[im, married, years, old, kids, taking, pill, ...","[im, married, year, old, kid, taking, pill, ha...","[im, marri, year, old, kid, take, pill, hassl,...",im marri year old kid take pill hassl decid ge...,0.069949,Positive
53764,47656,Tapentadol,Pain,"""I was prescribed Nucynta for severe neck/shou...",1,28-Nov-11,20,i was prescribed nucynta for severe neckshould...,"[i, was, prescribed, nucynta, for, severe, nec...","[prescribed, nucynta, severe, neckshoulder, pa...","[prescribed, nucynta, severe, neckshoulder, pa...","[prescrib, nucynta, sever, neckshould, pain, t...",prescrib nucynta sever neckshould pain take mg...,0.000000,Neutral


In [15]:
 df['sentiment_score'] = df['sentiment'].map({
     'Positive': 1,
    'Neutral': 0,
    'Negative': -1
})
 df

Unnamed: 0,uniqueID,drugName,condition,review,rating,date,usefulCount,clean_review,tokens,tokens_no_stopwords,lemmatized,stemmed,final_clean_review,sentiment_score,sentiment
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",10,28-Feb-12,22,ive tried a few antidepressants over the years...,"[ive, tried, a, few, antidepressants, over, th...","[ive, tried, antidepressants, years, citalopra...","[ive, tried, antidepressant, year, citalopram,...","[ive, tri, antidepress, year, citalopram, fluo...",ive tri antidepress year citalopram fluoxetin ...,-1,Negative
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",8,17-May-09,17,my son has crohns disease and has done very we...,"[my, son, has, crohns, disease, and, has, done...","[son, crohns, disease, done, well, asacol, com...","[son, crohn, disease, done, well, asacol, comp...","[son, crohn, diseas, done, well, asacol, compl...",son crohn diseas done well asacol complaint sh...,-1,Negative
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",9,29-Sep-17,3,quick reduction of symptoms,"[quick, reduction, of, symptoms]","[quick, reduction, symptoms]","[quick, reduction, symptom]","[quick, reduct, symptom]",quick reduct symptom,1,Positive
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",9,5-Mar-17,35,contrave combines drugs that were used for alc...,"[contrave, combines, drugs, that, were, used, ...","[contrave, combines, drugs, used, alcohol, smo...","[contrave, combine, drug, used, alcohol, smoki...","[contrav, combin, drug, use, alcohol, smoke, o...",contrav combin drug use alcohol smoke opioid c...,1,Positive
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",9,22-Oct-15,4,i have been on this birth control for one cycl...,"[i, have, been, on, this, birth, control, for,...","[birth, control, one, cycle, reading, reviews,...","[birth, control, one, cycle, reading, review, ...","[birth, control, one, cycl, read, review, type...",birth control one cycl read review type simila...,1,Positive
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53761,159999,Tamoxifen,"Breast Cancer, Prevention","""I have taken Tamoxifen for 5 years. Side effe...",10,13-Sep-14,43,i have taken tamoxifen for years side effects...,"[i, have, taken, tamoxifen, for, years, side, ...","[taken, tamoxifen, years, side, effects, sever...","[taken, tamoxifen, year, side, effect, severe,...","[taken, tamoxifen, year, side, effect, sever, ...",taken tamoxifen year side effect sever sweat d...,-1,Negative
53762,140714,Escitalopram,Anxiety,"""I&#039;ve been taking Lexapro (escitaploprgra...",9,8-Oct-16,11,ive been taking lexapro escitaploprgram since ...,"[ive, been, taking, lexapro, escitaploprgram, ...","[ive, taking, lexapro, escitaploprgram, since,...","[ive, taking, lexapro, escitaploprgram, since,...","[ive, take, lexapro, escitaploprgram, sinc, fe...",ive take lexapro escitaploprgram sinc februari...,1,Positive
53763,130945,Levonorgestrel,Birth Control,"""I&#039;m married, 34 years old and I have no ...",8,15-Nov-10,7,im married years old and i have no kids takin...,"[im, married, years, old, and, i, have, no, ki...","[im, married, years, old, kids, taking, pill, ...","[im, married, year, old, kid, taking, pill, ha...","[im, marri, year, old, kid, take, pill, hassl,...",im marri year old kid take pill hassl decid ge...,1,Positive
53764,47656,Tapentadol,Pain,"""I was prescribed Nucynta for severe neck/shou...",1,28-Nov-11,20,i was prescribed nucynta for severe neckshould...,"[i, was, prescribed, nucynta, for, severe, nec...","[prescribed, nucynta, severe, neckshoulder, pa...","[prescribed, nucynta, severe, neckshoulder, pa...","[prescrib, nucynta, sever, neckshould, pain, t...",prescrib nucynta sever neckshould pain take mg...,0,Neutral


In [16]:
df[['review', 'final_clean_review', 'sentiment_score', 'sentiment']].head()


Unnamed: 0,review,final_clean_review,sentiment_score,sentiment
0,"""I&#039;ve tried a few antidepressants over th...",ive tri antidepress year citalopram fluoxetin ...,-1,Negative
1,"""My son has Crohn&#039;s disease and has done ...",son crohn diseas done well asacol complaint sh...,-1,Negative
2,"""Quick reduction of symptoms""",quick reduct symptom,1,Positive
3,"""Contrave combines drugs that were used for al...",contrav combin drug use alcohol smoke opioid c...,1,Positive
4,"""I have been on this birth control for one cyc...",birth control one cycl read review type simila...,1,Positive


In [17]:
df

Unnamed: 0,uniqueID,drugName,condition,review,rating,date,usefulCount,clean_review,tokens,tokens_no_stopwords,lemmatized,stemmed,final_clean_review,sentiment_score,sentiment
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",10,28-Feb-12,22,ive tried a few antidepressants over the years...,"[ive, tried, a, few, antidepressants, over, th...","[ive, tried, antidepressants, years, citalopra...","[ive, tried, antidepressant, year, citalopram,...","[ive, tri, antidepress, year, citalopram, fluo...",ive tri antidepress year citalopram fluoxetin ...,-1,Negative
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",8,17-May-09,17,my son has crohns disease and has done very we...,"[my, son, has, crohns, disease, and, has, done...","[son, crohns, disease, done, well, asacol, com...","[son, crohn, disease, done, well, asacol, comp...","[son, crohn, diseas, done, well, asacol, compl...",son crohn diseas done well asacol complaint sh...,-1,Negative
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",9,29-Sep-17,3,quick reduction of symptoms,"[quick, reduction, of, symptoms]","[quick, reduction, symptoms]","[quick, reduction, symptom]","[quick, reduct, symptom]",quick reduct symptom,1,Positive
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",9,5-Mar-17,35,contrave combines drugs that were used for alc...,"[contrave, combines, drugs, that, were, used, ...","[contrave, combines, drugs, used, alcohol, smo...","[contrave, combine, drug, used, alcohol, smoki...","[contrav, combin, drug, use, alcohol, smoke, o...",contrav combin drug use alcohol smoke opioid c...,1,Positive
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",9,22-Oct-15,4,i have been on this birth control for one cycl...,"[i, have, been, on, this, birth, control, for,...","[birth, control, one, cycle, reading, reviews,...","[birth, control, one, cycle, reading, review, ...","[birth, control, one, cycl, read, review, type...",birth control one cycl read review type simila...,1,Positive
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53761,159999,Tamoxifen,"Breast Cancer, Prevention","""I have taken Tamoxifen for 5 years. Side effe...",10,13-Sep-14,43,i have taken tamoxifen for years side effects...,"[i, have, taken, tamoxifen, for, years, side, ...","[taken, tamoxifen, years, side, effects, sever...","[taken, tamoxifen, year, side, effect, severe,...","[taken, tamoxifen, year, side, effect, sever, ...",taken tamoxifen year side effect sever sweat d...,-1,Negative
53762,140714,Escitalopram,Anxiety,"""I&#039;ve been taking Lexapro (escitaploprgra...",9,8-Oct-16,11,ive been taking lexapro escitaploprgram since ...,"[ive, been, taking, lexapro, escitaploprgram, ...","[ive, taking, lexapro, escitaploprgram, since,...","[ive, taking, lexapro, escitaploprgram, since,...","[ive, take, lexapro, escitaploprgram, sinc, fe...",ive take lexapro escitaploprgram sinc februari...,1,Positive
53763,130945,Levonorgestrel,Birth Control,"""I&#039;m married, 34 years old and I have no ...",8,15-Nov-10,7,im married years old and i have no kids takin...,"[im, married, years, old, and, i, have, no, ki...","[im, married, years, old, kids, taking, pill, ...","[im, married, year, old, kid, taking, pill, ha...","[im, marri, year, old, kid, take, pill, hassl,...",im marri year old kid take pill hassl decid ge...,1,Positive
53764,47656,Tapentadol,Pain,"""I was prescribed Nucynta for severe neck/shou...",1,28-Nov-11,20,i was prescribed nucynta for severe neckshould...,"[i, was, prescribed, nucynta, for, severe, nec...","[prescribed, nucynta, severe, neckshoulder, pa...","[prescribed, nucynta, severe, neckshoulder, pa...","[prescrib, nucynta, sever, neckshould, pain, t...",prescrib nucynta sever neckshould pain take mg...,0,Neutral


In [18]:
X = df['clean_review']
y = df['sentiment']

# split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

# check sizes
print("X_train size:", X_train.shape)
print("X_test size:", X_test.shape)
print("y_train size:", y_train.shape)
print("y_test size:", y_test.shape)


X_train size: (43012,)
X_test size: (10754,)
y_train size: (43012,)
y_test size: (10754,)


In [22]:
from sklearn.feature_extraction.text import CountVectorizer

# Sample text data
text_data = [
    "Machine learning is amazing",
    "Learning machines are powerful",
    "I love machine learning"
]

# Create CountVectorizer
vectorizer = CountVectorizer()

# Fit and transform text
X = vectorizer.fit_transform(text_data)

# Show vocabulary
print("Vocabulary:")
print(vectorizer.get_feature_names_out())

# Show vectorized matrix
print("\nVectorized Matrix:")
print(X.toarray())

Vocabulary:
['amazing' 'are' 'is' 'learning' 'love' 'machine' 'machines' 'powerful']

Vectorized Matrix:
[[1 0 1 1 0 1 0 0]
 [0 1 0 1 0 0 1 1]
 [0 0 0 1 1 1 0 0]]


In [19]:
from sklearn.feature_extraction.text import TfidfVectorizer


tfidf = TfidfVectorizer(max_features=5000)


X_train_tfidf = tfidf.fit_transform(X_train)


X_test_tfidf = tfidf.transform(X_test)


print("X_train_tfidf shape:", X_train_tfidf.shape)
print("X_test_tfidf shape:", X_test_tfidf.shape)


X_train_tfidf shape: (43012, 5000)
X_test_tfidf shape: (10754, 5000)


In [20]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report


model = LogisticRegression(max_iter=1000)


model.fit(X_train_tfidf, y_train)


y_pred = model.predict(X_test_tfidf)


print("Accuracy:", accuracy_score(y_test, y_pred))


Accuracy: 0.9000371954621537


In [21]:
new_review = "I039;ve tried a few antidepressants over the years (citalopram, fluoxetine, amitriptyline), but none of those helped with my depression, insomnia &amp; anxiety. My doctor suggested and changed me onto 45mg mirtazapine and this medicine has saved my life. Thankfully I have had no side effects especially the most common - weight gain, I&#039;ve actually lost alot of weight. I still have suicidal thoughts but mirtazapine has saved me."

# clean the review (use same function)
clean_new_review = clean_text(new_review)

# convert to TF-IDF
new_review_tfidf = tfidf.transform([clean_new_review])

# predict sentiment
prediction = model.predict(new_review_tfidf)

# show result
if prediction[0] == 1:
    print("Sentiment: Positive 😊")
else:
    print("Sentiment: Negative 😞")


Sentiment: Negative 😞
