In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo
import pandas as pd
import os
import csv
import numpy as np

In [2]:
#Extracting data from source 1:

In [3]:
# URL of page to be scraped
url = 'https://www.rxassist.org/pap-info/generic-drug-list-print'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'html.parser')

In [4]:
# skip the first 6 headser rows
data_rows = soup.findAll('tr')[6:]

In [5]:
#Extracting table row frist and appending the data in csv form
drug_data = [[td.getText()for td in data_rows[i].findAll('td')]
            for i in range(len(data_rows))]


In [6]:
#Defining column headers:
bg_column_headers = ["Generic Name","drugName"]

In [7]:
#Converting the list data to dataframe:
bg_df = pd.DataFrame(drug_data,columns=bg_column_headers)

In [8]:
bg_df.head()

Unnamed: 0,Generic Name,drugName
0,alendronate tablet,Fosamax
1,acyclovir capsule,Zovirax
2,acyclovir tablet,Zovirax
3,albuterol inhalation solution,Albuterol Inhalation Solution
4,albuterol sulfate,ProAir RespiClick Powder Inhaler


In [9]:
#Extracting data from source 2:

In [10]:
#Reading data from csv:
csv_path = ("Resources\drugsComTrain_raw.csv")

drug_detail_df = pd.read_csv(csv_path, encoding="ISO-8859-1")
   

In [11]:
drug_detail_df.head()


Unnamed: 0,uniqueID,drugName,condition,review,rating,date,usefulCount
0,206461,Valsartan,Left Ventricular Dysfunction,"""It has no side effect, I take it in combinati...",9,20-May-12,27
1,95260,Guanfacine,ADHD,"""My son is halfway through his fourth week of ...",8,27-Apr-10,192
2,92703,Lybrel,Birth Control,"""I used to take another oral contraceptive, wh...",5,14-Dec-09,17
3,138000,Ortho Evra,Birth Control,"""This is my first time using any form of birth...",8,3-Nov-15,10
4,35696,Buprenorphine / naloxone,Opiate Dependence,"""Suboxone has completely turned my life around...",9,27-Nov-16,37


In [12]:
#Transforming the necessary data:

In [13]:
# Create new table
drug_detail_df = drug_detail_df[["drugName", "condition","review","rating"]]
drug_detail_df.head()

Unnamed: 0,drugName,condition,review,rating
0,Valsartan,Left Ventricular Dysfunction,"""It has no side effect, I take it in combinati...",9
1,Guanfacine,ADHD,"""My son is halfway through his fourth week of ...",8
2,Lybrel,Birth Control,"""I used to take another oral contraceptive, wh...",5
3,Ortho Evra,Birth Control,"""This is my first time using any form of birth...",8
4,Buprenorphine / naloxone,Opiate Dependence,"""Suboxone has completely turned my life around...",9


In [14]:
#Merging the cleaned data extracted from the 2 data sources:
final_merge_df=pd.merge(drug_detail_df,bg_df, on="drugName")
final_merge_df.head()

Unnamed: 0,drugName,condition,review,rating,Generic Name
0,Keppra,Epilepsy,""" I Ve had nothing but problems with the Kepp...",1,levetiracetam tablet
1,Keppra,Seizures,"""It works very well""",9,levetiracetam tablet
2,Keppra,Seizures,"""My 8 year old was put on Keppra for complex p...",10,levetiracetam tablet
3,Keppra,Seizures,"""I had my first seizure in 04/04/14 whilst dri...",10,levetiracetam tablet
4,Keppra,Seizures,"""I have been on Keppra for about 15 yrs. I rec...",10,levetiracetam tablet


In [15]:
# Loading the tables from pandas to mongo database drugdb.

In [16]:
# The default port used by MongoDB is 27017
# https://docs.mongodb.com/manual/reference/default-mongodb-port/
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [17]:
# Define the 'drugsdb' database in Mongo
db = client["drugsdb"]

In [18]:
#collection = db.drugs
collection = db["drugsdb"]

#Loading data in MongoDB:
collection.insert_many(final_merge_df.to_dict('records'))

<pymongo.results.InsertManyResult at 0x1db656f4648>

In [19]:
drugs = db.drugsdb.find()

# Iterate through each student in the collection
for drug in drugs:
    print(drug)


{'_id': ObjectId('5c749eeb887c65496014b9c0'), 'drugName': 'Keppra', 'condition': 'Epilepsy', 'review': '" I Ve had  nothing but problems with the Keppera : constant shaking in my arms &amp; legs &amp; pins &amp; needles feeling in my arms &amp; legs severe light headedness no appetite &amp; etc."', 'rating': 1, 'Generic Name': 'levetiracetam tablet'}
{'_id': ObjectId('5c749eeb887c65496014b9c1'), 'drugName': 'Keppra', 'condition': 'Seizures', 'review': '"It works very well"', 'rating': 9, 'Generic Name': 'levetiracetam tablet'}
{'_id': ObjectId('5c749eeb887c65496014b9c2'), 'drugName': 'Keppra', 'condition': 'Seizures', 'review': '"My 8 year old was put on Keppra for complex partial seizures.  Except for some initial tiredness in the first two weeks she has had no side effects.  She takes 250mg twice a day.  She even won her age group in a 1mile race two weeks ago and her competitive soccer team won their soccer tournament this weekend thanks to some of the goals she scored.  I have noti

{'_id': ObjectId('5c749eeb887c65496014ba25'), 'drugName': 'Keppra', 'condition': 'Seizures', 'review': '"I&#039;m 17 years old and have been taking Keppra for three years now. Keppra has stopped my seizures except on 3 occasions where I missed a dose. Although it works very well I have been experienceing some side affects including moodiness, iritability, anger, rage, confusion and hallucinations. Although I have side effects, I like the overall results of being a well kid."', 'rating': 6, 'Generic Name': 'levetiracetam tablet'}
{'_id': ObjectId('5c749eeb887c65496014ba26'), 'drugName': 'Keppra', 'condition': 'Seizures', 'review': '"I was placed on Keppra in January 2009 by an emergency room physician after a reported seizure, which had no professional diagnosis. A neurologist whom I saw post emergency kept me on the medication as I reported no side effects. I had what I now believe to be a fainting spell related to an atrial fibrillation condition of my heart and a strenuous run, witho

{'_id': ObjectId('5c749eeb887c65496014bd29'), 'drugName': 'Lamictal', 'condition': 'Bipolar Disorde', 'review': '"I just started this med and I am on week 5 of the &quot;starter&quot; pack. I know it&#039;s a little soon to give my opinion BUT I want to give words of encouragement if you&#039;re struggling with your diagnosis and taking &quot;that pill&quot;. I spent so much time in a deep, dark hole and I started getting &quot;scared&quot; that I wasn&#039;t going to see sunshine in my life again. I became desperate enough to call my Doc and agreed that I was &quot;probably&quot; bipolar (she saw it and I was in denial!) and I was willing to talk &quot;meds&quot;. After the first two weeks I noticed that life was looking better and I felt like participating a little...by the end of week 4 I was able to have a conversation. I&#039;m starting to see beautiful colors in my once bleak world!"', 'rating': 10, 'Generic Name': 'lamotrigine'}
{'_id': ObjectId('5c749eeb887c65496014bd2a'), 'dru

{'_id': ObjectId('5c749eeb887c65496014bf1d'), 'drugName': 'Xanax', 'condition': 'Anxiety', 'review': '"I&#039;m a 19 year old girl and I&#039;ve been diagnosed with Anxiety for a long time but never had an anxiety attack until now. My doctor prescribed me this medicine yesterday after I told her I had an anxiety attack 1 day ago. My heart was literally at a speed of 100 but she gave me medicine to slow it down. It really helped me sleep last night too. This medicine took me about 30-45 minutes to kick in and then it knocked me out and I was fast asleep."', 'rating': 10, 'Generic Name': 'alprazolam'}
{'_id': ObjectId('5c749eeb887c65496014bf1e'), 'drugName': 'Xanax', 'condition': 'Panic Disorde', 'review': '"I take 0.25mg three times a day for panic attacks. I wish I could say Xanax stops the panic attacks completely, but it only lessons the symptoms until I feel okay enough to calm myself down with breathing exercises. Other than that--- Xanax is a miracle."', 'rating': 10, 'Generic Nam

{'_id': ObjectId('5c749eeb887c65496014c304'), 'drugName': 'Zoloft', 'condition': 'Depression', 'review': '"I&#039;ve dealt with many periods of depression in my life, but I hit an all-time low a year and a half ago culminating in a suicide attempt. This medication helped me get back on my feet. I started with a low dose (25mg/day-I&#039;m small and female) and have slowly increased to the normal dosage my doctor prescribes for adults (50mg/day). I went off of it for two months after five months of taking it and fell back into depression. Went back on it and have been fine since. I don&#039;t think I will stop again for a while. Only problems are occasional insomnia and inability to orgasm anymore."', 'rating': 8, 'Generic Name': 'sertraline HCI'}
{'_id': ObjectId('5c749eeb887c65496014c305'), 'drugName': 'Zoloft', 'condition': 'Depression', 'review': '"Went on Zoloft after Celexa stopped working and I quit cold turkey(don&#039;t do!!)\r\nMade me feel 10x worse within days. Not for me.\r

{'_id': ObjectId('5c749eeb887c65496014c6ec'), 'drugName': 'Klonopin', 'condition': 'Panic Disorde', 'review': '"I take 0.5 mg twice daily - actually Xanax worked slightly better, but my doctor believes Klonopon has less risk of addiction, so he prescribed it for long term use, which I need to treat panic disorder.  My panic disorder emerged as a young child and then went into remission from my twenties through my early forties, only to crop up again a few months ago.  I take 100 mg of Zoloft for depression and the Klonopin for the panic and I have my life back.  I do get a little tired each day, but my range of emotions are normal, not desperate, hopeless and fraught with terror of panic.  These medicines area a God send and I thank God daily for them."', 'rating': 9, 'Generic Name': 'clonazepam tablet'}
{'_id': ObjectId('5c749eeb887c65496014c6ed'), 'drugName': 'Klonopin', 'condition': 'Bipolar Disorde', 'review': '"When a person has mental symptoms , they&#039;re supposed to have medi

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




{'_id': ObjectId('5c7722ec887c6538f421cecd'), 'drugName': 'Zoloft', 'condition': 'Anxiety and Stress', 'review': '"I started taking Zoloft 50mg yesterday.  I feel a lot better in terms of my mood.  However, I am jittery.  My hands are trembling, pupils are dilated and have dry mouth.  I do understand that I have to let it work its way into my system so I&#039;m sticking with it.  It&#039;s been 1 day!  I&#039;m glad to see that some of the positive responses had the same immediate side effects that I&#039;m dealing with right now.  I am giving it a chance to work, honestly haven&#039;t felt anxious all day."', 'rating': 7, 'Generic Name': 'sertraline HCI'}
{'_id': ObjectId('5c7722ec887c6538f421cece'), 'drugName': 'Zoloft', 'condition': 'Panic Disorde', 'review': '"It took about 10 weeks but then all of a sudden. Bam. I feel great. Initial side effects were turture"', 'rating': 9, 'Generic Name': 'sertraline HCI'}
{'_id': ObjectId('5c7722ec887c6538f421cecf'), 'drugName': 'Zoloft', 'con

{'_id': ObjectId('5c7722ec887c6538f421d2b4'), 'drugName': 'Klonopin', 'condition': 'Panic Disorde', 'review': '"I was suffering with terrible panic attacks for a few years before the attacks became more and more severe and i was starting to have the panic attacks more often. I hope none of you ever have to go thru even one panic attack. I can&#039;t explain fully how they make you feel but I was sure during each time I had one of these attacks that I might actually die. So much starts to happen to my body during an atfack. My heart feels like it is going to literally explode. My breathing becomes hyperventalating. So finally I was prescribed 1mg of klonapin 2 x a day and it has worked wonders in helping stop attacks"', 'rating': 9, 'Generic Name': 'clonazepam tablet'}
{'_id': ObjectId('5c7722ec887c6538f421d2b5'), 'drugName': 'Klonopin', 'condition': 'Anxiety', 'review': '"As well adapt to the blind-sided panics, this medicine does thoroughly well and makes brief adjustments to trigger 

{'_id': ObjectId('5c7722ec887c6538f421d69b'), 'drugName': 'Effexor', 'condition': 'Depression', 'review': '"I took Effexor for three years due to depression. I loved the fact that I was no longer depressed but I was tired all the time. I could sleep for 10 hours get up and two hours later I was ready to sleep again. If I missed taking a dose I would get a horrible headache. The only reason I stopped taking Effexor was due to being tired all the time. Getting off of the medication was awful. I have never been so sick in my life. If you are going to take this medicine the rest of your life I highly recommend it. It did wonders for my depression."', 'rating': 8, 'Generic Name': 'venlafaxine tablet'}
{'_id': ObjectId('5c7722ec887c6538f421d69c'), 'drugName': 'Effexor', 'condition': 'Anxiety and Stress', 'review': '"I started taking Effexor XR 4 months ago.  My entire life I&#039;ve struggled with anxiety.  I have gone to cognitive behavioral therapy to help me change my responses to anxiety

{'_id': ObjectId('5c7722ec887c6538f421da83'), 'drugName': 'Prozac', 'condition': 'Major Depressive Disorde', 'review': '"I know this is a process, but it sucks.  I&#039;m now taking 40 mg of Prozac and it literally does nothing for me. I started with Zoloft, and at least that was an appetite suppressant  Both Zoloft and Prozac just make me tired, which is the opposite of what I&#039;m looking for.  My goal is to gain energy and be able to do...anything...other than sleep."', 'rating': 1, 'Generic Name': 'fluoxetine capsule'}
{'_id': ObjectId('5c7722ec887c6538f421da84'), 'drugName': 'Prozac', 'condition': 'Depression', 'review': '"On Prozac for 6 months. Never been better."', 'rating': 10, 'Generic Name': 'fluoxetine capsule'}
{'_id': ObjectId('5c7722ec887c6538f421da85'), 'drugName': 'Prozac', 'condition': 'Anxiety and Stress', 'review': '"5 weeks on Prozac. Have lost ten pounds (and needed to do so).  Appetite remains but I finally know when I&#039;m full.  Cannot drink more than an oc

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [20]:
#Collected the below dataset of top 200 drugs to merge with other data.
#no matching data found so decided not to merge the dataset with the other 2 data sets.

In [21]:
# URL of page to be scraped
url1 = 'https://clincalc.com/DrugStats/Top200Drugs.aspx'
# Retrieve page with the requests module
response1 = requests.get(url1)
# Create BeautifulSoup object; parse with 'lxml'
soup1 = BeautifulSoup(response1.text, 'lxml')

In [22]:
t200_data_rows = soup1.findAll('tr')[1:]

In [23]:
#Extracting table row frist and appending the data in csv form
t200_drug_data = [[td.getText()for td in t200_data_rows[i].findAll('td')]
            for i in range(len(t200_data_rows))]

In [24]:
print(t200_drug_data)(5)

[['1', 'Levothyroxine', '114,344,324', '\xa01'], ['2', 'Lisinopril', '110,611,324', '\xa01'], ['3', 'Atorvastatin', '96,942,508', '0'], ['4', 'Metformin Hydrochloride', '81,305,415', '0'], ['5', 'Amlodipine Besylate', '75,201,622', '\xa02'], ['6', 'Metoprolol', '74,019,645', '\xa02'], ['7', 'Omeprazole', '70,626,980', '\xa01'], ['8', 'Simvastatin', '65,144,488', '\xa03'], ['9', 'Losartan Potassium', '49,281,054', '\xa03'], ['10', 'Albuterol', '47,109,711', '0'], ['11', 'Gabapentin', '44,154,514', '\xa02'], ['12', 'Hydrochlorothiazide', '43,472,270', '\xa01'], ['13', 'Acetaminophen; Hydrocodone Bitartrate', '43,109,574', '\xa04'], ['14', 'Sertraline Hydrochloride', '37,105,238', '0'], ['15', 'Furosemide', '32,692,726', '0'], ['16', 'Fluticasone', '29,899,932', '\xa08'], ['17', 'Acetaminophen', '29,325,845', '\xa01'], ['18', 'Amoxicillin', '28,117,284', '\xa01'], ['19', 'Alprazolam', '27,030,725', '\xa04'], ['20', 'Atenolol', '26,739,322', '\xa03'], ['21', 'Citalopram', '26,387,590', '0'

TypeError: 'NoneType' object is not callable

In [25]:
column_headers = ["seq", "drugName","# of Rxs", "change"]

In [26]:
t200_df = pd.DataFrame(t200_drug_data,columns= column_headers)

In [27]:
t200_df.head()

Unnamed: 0,seq,drugName,# of Rxs,change
0,1,Levothyroxine,114344324,1
1,2,Lisinopril,110611324,1
2,3,Atorvastatin,96942508,0
3,4,Metformin Hydrochloride,81305415,0
4,5,Amlodipine Besylate,75201622,2


In [28]:
t200_df=t200_df[["drugName","# of Rxs"]]
t200_df.head()

Unnamed: 0,drugName,# of Rxs
0,Levothyroxine,114344324
1,Lisinopril,110611324
2,Atorvastatin,96942508
3,Metformin Hydrochloride,81305415
4,Amlodipine Besylate,75201622
