In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.proportion import proportions_chisquare
from scipy.stats import chisquare
import pickle
from bs4 import BeautifulSoup
from collections import defaultdict
import requests
%matplotlib inline

import nltk
from nltk.tokenize import word_tokenize
import string
from nltk.stem.snowball import SnowballStemmer
import re
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from wordcloud import WordCloud

In [4]:
## only need to remove punctuation and stemize
stemmer = SnowballStemmer('english')

def stem_tokens(tokens, stemmer):
    stemmed = []
    for item in tokens:
        stemmed.append(stemmer.stem(item))
    return stemmed

def tokenize(text):
    text = "".join([ch for ch in text if ch not in string.punctuation])
    tokens = nltk.word_tokenize(text)
    stems = stem_tokens(tokens, stemmer)
    return stems

In [16]:
## remove special symbol
def rm_sym(df):
    df['review'] = df['review'].str.replace("&#039;","'").str.replace("\"","")
    df['review'].head()
    df['rating_cate'] = ''
    df.loc[df['rating'] >= 7,'rating_cate'] = 'high'
    df.loc[df['rating'] <= 4,'rating_cate'] = 'low'
    df.loc[(df['rating'] > 4) & (df['rating'] < 7),'rating_cate'] = 'medium'
    return df

df = pd.read_csv('drugsCom_raw/drugsComTrain_raw.tsv',sep='\t',index_col=0)
df = rm_sym(df)

test = pd.read_csv("drugsCom_raw/drugsComTest_raw.tsv",sep='\t', index_col=0)
test = rm_sym(test)
                                                                      

In [38]:
df.sample(1)['review'].iloc[0]

'Dr. decided to try Latuda. The side effects completely outweighed the benefits for me. For a while I was able to tolerate the headaches and nausea, and enjoyed the mood improvement. I was actually able to feel true happiness in happy situations. But it also affected my menstrual cycle to where I was experiencing a full week of cramps (not normal for me) and was 2 weeks late (also not normal for me). Then the nausea just became vomiting every time I took the pill. So I stopped it. I really wished it would work because I liked the happiness I experienced and no mania.'

In [72]:
df

Unnamed: 0,drugName,condition,review,rating,date,usefulCount,rating_cate
206461,Valsartan,Left Ventricular Dysfunction,"It has no side effect, I take it in combinatio...",9.0,"May 20, 2012",27,high
95260,Guanfacine,ADHD,My son is halfway through his fourth week of I...,8.0,"April 27, 2010",192,high
92703,Lybrel,Birth Control,"I used to take another oral contraceptive, whi...",5.0,"December 14, 2009",17,medium
138000,Ortho Evra,Birth Control,This is my first time using any form of birth ...,8.0,"November 3, 2015",10,high
35696,Buprenorphine / naloxone,Opiate Dependence,Suboxone has completely turned my life around....,9.0,"November 27, 2016",37,high
155963,Cialis,Benign Prostatic Hyperplasia,2nd day on 5mg started to work with rock hard ...,2.0,"November 28, 2015",43,low
165907,Levonorgestrel,Emergency Contraception,"He pulled out, but he cummed a bit in me. I to...",1.0,"March 7, 2017",5,low
102654,Aripiprazole,Bipolar Disorde,Abilify changed my life. There is hope. I was ...,10.0,"March 14, 2015",32,high
74811,Keppra,Epilepsy,I Ve had nothing but problems with the Keppe...,1.0,"August 9, 2016",11,low
48928,Ethinyl estradiol / levonorgestrel,Birth Control,I had been on the pill for many years. When my...,8.0,"December 8, 2016",1,high


In [42]:
df.shape

(161297, 7)

In [43]:
test.shape

(53766, 7)

In [69]:
df[df['review'].str.contains("side effects")].sample(1)['review'].iloc[0]

'Bad side effects for 2 weeks when starting this medicine. Tolerated it fairly well for a couple months. I was on 50mg, then 100 mg. It did raise my blood pressure. No libido issues like I had when on Efexxor.  It did nothing for my fibromyalgia and it took me a few weeks to stop it. Coming off the Pristiq was was worse than starting it. Very nasty headaches everyday. It also made my hair fall out at an alarming rate. Handfuls. My hair stopped falling out about a month after I stopped the Pristiq.'

In [15]:
test.replace("\"","")

"This medication worked fantastically for me! When I started taking it I was at the bottom of my class, struggling at school, and within 2 years I'd moved to the top. It totally transformed how I was able to concentrate. \r\n\r\nI always felt like this allowed me to 'be the real me' because it gave me the focus and patience to do the things I wanted to do. It helped me socially because I was able think and communicate better (more tactfully!)\r\n\r\nIn terms of side effects I can't take it too late in the day or else I wouldn't sleep at night, but nothing else. I know some people have described many more side effects but I didn't get anything like that.\r\n\r\nThe only downside is that after 19 years on the drug, I don't find it as effective anymore."

In [None]:
(df.groupby('condition').size().sort_values(ascending=False) > 100).sum()

In [20]:
df.sample(1)['review'].iloc[0]

'"I was a pretty heavy smoker-about a pack a day (even on days I can\'t smoke at work which just meant I\'d chain smoke when I left to catch up!) and I always made excuses to go have a smoke.  I also made many excuses not to quit.  I finally decided that I wanted to stop and I couldn\'t believe how well the Chantix worked for me.  I had tried the nicotine patch, Wellbutrin, cold turkey....Chantix was kind of my last resort.  The only side effect I noticed was a couple minutes of nausea right after I took the pill but it wasn\'t enough to make me stop.  I quit smoking on day 5.  I quit before my quit date.  I\'m planning to take the full 6 months but I\'m on week 6 and feel fantastic!  I highly recommend Chantix to anyone who has struggled to quit!"'

In [71]:
pd.read_csv("web_scrap.csv")

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0,Abilify review by 26 year old female patient,3,Marginally Effective,Severe Side Effects,bipolar disorder,15mg taken daily for the period of 12 weeks,none,"Cymbalta, 90mg/daily",I didn't notice any benefit at all. Supposedl...,A very uncomfortable inner restlessness was t...,I was prescribed Abilify (15mg/daily) to assi...
1,1,Abilify review by 26 year old female patient,3,Highly Effective,Moderate Side Effects,bipolar,10mg taken 1/day for the period of 8 mos,"bipolar mood disorder, acne, asthma","zoloft, wellbutrin, clonazepam, tretinoin, cl...",I had severe depression with agitation and mi...,"I became drowsy, however, with adequate sleep...",the abilify decreased the need for daily klon...
2,2,Abilify review by 26 year old female patient,3,Highly Effective,No Side Effects,depression/anxiety,2 mg taken daily for the period of 3 months,anxiety,Lexapro,Within 1 week of taking the cocktail of Abili...,no side effects have been noticed,I take one pill of each 1st thing in the am.....
3,3,Abilify review by 26 year old female patient,3,Considerably Effective,Extremely Severe Side Effects,depression not resolved with antidepressant d...,started out at 5mg the 10 & last 15mg taken ...,"Add, depression, poss. bipolar type 2, PTSD, ...","vyvance, ativan lisinopril, zantac, viville d...",While on abilify I can honestly say the depre...,but it caused memory loss and again an incid...,I am only taking ativan & getting psychologic...
4,4,Abilify review by 26 year old female patient,3,Ineffective,Severe Side Effects,bipolar,2mg to start taken once daily for the period...,anxiety,klonopin,None due to the short time taking drug.,Headache first morning at 4AM that was reliev...,Not much to tell. I was just starting treatm...
5,5,Abilify review by 26 year old female patient,3,Considerably Effective,No Side Effects,Depression,2mg taken once daily for the period of 1week,"High blood pressure, MS","Micardis, Copaxon, vidicon, alleges, cymbalta...",I've only been on it for a week but I've noti...,None so far.,My doctor added Abilify to my 60 mg of Cymbal...
6,6,Abilify review by 26 year old female patient,3,Considerably Effective,No Side Effects,Depression,2mg taken once daily for the period of 1week,"High blood pressure, MS","Micardis, Copaxon, vidicon, alleges, cymbalta...",I've only been on it for a week but I've noti...,None so far.,My doctor added Abilify to my 60 mg of Cymbal...
7,7,Abilify review by 26 year old female patient,3,Considerably Effective,No Side Effects,Depression,2mg taken once daily for the period of 1week,"High blood pressure, MS","Micardis, Copaxon, vidicon, alleges, cymbalta...",I've only been on it for a week but I've noti...,None so far.,My doctor added Abilify to my 60 mg of Cymbal...
8,8,Acanya review by 32 year old female patient,9,Highly Effective,Mild Side Effects,Cystic Acne,Small amount for affected area. (dosage freq...,Scaring,Atralin,"Acanya Gel treated cystic acne, blackheads, w...","Dryness, slight redness and a small amount of...",The combination of clindamycin and benzoyl pe...
9,9,Accolate review by 44 year old female patient,1,Ineffective,No Side Effects,Capsuar Contracture of Breast Implant,20mg taken twice daily for the period of 3 m...,none,none,"The benefits were not good, and this drug was...",I tolerated the medication very well. I didn...,I was required to take Accolate for three mon...
