## Start OpenRefine service on MyBinder

In [1]:
import subprocess
import time
subprocess.call("./start_refine.sh",shell=True)

#wait for 10 seconds
time.sleep(10)

## Install Open Refine Client API and put it in the active directory

In [1]:
!git clone https://github.com/nikolausn/refine-client-py

Cloning into 'refine-client-py'...
remote: Enumerating objects: 6, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (5/5), done.[K
remote: Total 759 (delta 1), reused 3 (delta 1), pack-reused 753[K
Receiving objects: 100% (759/759), 747.99 KiB | 1.73 MiB/s, done.
Resolving deltas: 100% (375/375), done.


In [2]:
!cp -R refine-client-py/google ./

In [3]:
from google.refine import refine
import pandas as pd

## Check OpenRefine Server connection

Check connection and create an object for open refine server transaction: refine_server

In [4]:
refine.RefineServer().get_version()

{u'full_name': u'OpenRefine 3.0-beta [TRUNK]',
 u'full_version': u'3.0-beta [TRUNK]',
 u'revision': u'TRUNK',
 u'version': u'3.0-beta'}

In [5]:
refine_server = refine.Refine(refine.RefineServer())

In [6]:
def list_project(refine_server):
    return pd.DataFrame(refine_server.list_projects()).transpose().reset_index().rename({"index":"ProjectId"},axis=1)

In [7]:
def search_project(refine_server,project_name):
    t = list_project(refine_server)
    if t.shape[0]>0:
        t = t[t.name==project_name]
    return t

In [8]:
search_project(refine_server,"drugs com test")

Unnamed: 0,ProjectId,contributors,created,creator,customMetadata,description,homepage,image,importOptionMetadata,license,modified,name,rowCount,subject,tags,title,version
24,2454770285607,,2019-03-29T14:27:51Z,,{},,,,"[{u'fileSource': u'drugsComTest_raw.tsv', u'en...",,2019-03-29T14:27:54Z,drugs com test,0,,[],,


## List OpenRefine Projects using list_projects() method

In [9]:
class RefineServerHelper():
    def __init__(self, refine_server):
        self.refine_server = refine_server
            
    def list_projects(self):
        """
        list OpenRefine project 
        """
        return pd.DataFrame(self.refine_server.list_projects()).transpose().reset_index().rename({"index":"ProjectId"},axis=1)

    def search_projects(self,project_name):
        """
        search project by name
        """
        t = self.list_projects()
        if t.shape[0]>0:
            t = t[t.name==project_name]
        #return pd.DataFrame(list(zip(t.index,t.name)),columns=["Project ID","Project Name"])        
        return t    
    
    def delete_project_byname(self,project_name):
        """
        delete project by name
        """
        t = self.search_projects(project_name)
        for x in t.ProjectId.values:
            if self.refine_server.open_project(project_id=x).delete():
                print("Project {} deleted".format(x))
                
    def open_project_byname(self,project_name,project_id=None):
        """
        open an openrefine project by name
        """
        if project_id == None:
            t = self.search_projects(project_name)
            if t.shape[0] == 0:
                raise Exception("No instance with Project Name: '{}' exist".format(project_name,t.loc[:,["ProjectId","Project Name"]]))
            if t.shape[0] > 1:
                raise Exception("More than one instance with Project Name: '{}'\n{}\nPlease specify the project id".format(project_name,t.loc[:,["ProjectId","Project Name"]]))
            project_id = t.ProjectId.values[0]
        else:
            project_name = refine_server.get_project_name(project_id)
        self.active_project = self.refine_server.open_project(project_id)
        print("OpenRefine Project {} opened, access using active_project property".format(project_name))
        return self.active_project
    
    def get_number_columns(self):
        return len(self.active_project.columns)
    
    def get_number_rows(self):
        return self.active_project.get_rows().total
    
    def get_rows(self,start=0,limit=10):    
        temp_rows = []
        for x in self.active_project.get_rows(start=start,limit = limit).rows.rows_response:
            temp_cells = []
            for y_t in self.active_project.get_models()["columnModel"]["columns"]:
                y = x["cells"][y_t["cellIndex"]]
                if y!=None:
                    temp_cells.append(y["v"])
                else:
                    temp_cells.append(None)
            temp_rows.append(temp_cells)
        return pd.DataFrame(temp_rows,columns=self.active_project.columns)    
    
    def to_lowercase(self,column_name):
        return self.active_project.text_transform(column=column_name,expression="value.toLowercase()")

    def to_uppercase(self,column_name):
        return self.active_project.text_transform(column=column_name,expression="value.toUppercase()")
        
    def to_titlecase(self,column_name):
        return self.active_project.text_transform(column=column_name,expression="value.toTitlecase()")
    
    def cluster(self,column_name,cluster_type="binning",function=None,params=None):
        return pd.DataFrame(drug_project.compute_clusters(column_name,cluster_type,function,params))

In [10]:
refine_helper = RefineServerHelper(refine_server)

In [11]:
refine_helper.list_projects()

Unnamed: 0,ProjectId,contributors,created,creator,customMetadata,description,homepage,image,importOptionMetadata,license,modified,name,rowCount,subject,tags,title,version
0,1503997082560,,2019-03-29T13:04:45Z,,{},,,,,,2016-12-27T15:15:02Z,IP_publications_2015Aug28 xls,0,,[],,
1,1589857055615,,2019-03-29T13:04:44Z,,{},,,,"[{u'processQuotes': True, u'fileSource': u'Air...",,2019-03-22T22:46:41Z,Airbnblistings_dirty csv csv,7594,,[],,
2,1611470445474,,2019-03-29T13:04:44Z,,{},,,,,,2017-06-03T02:29:39Z,menu_question csv,0,,[],,
3,1627151086685,,2019-03-29T13:04:44Z,,{},,,,,,2016-12-27T18:24:20Z,HBV_Genbank_metadata xls,0,,[],,
4,1640253865572,,2019-03-29T13:04:45Z,,{},,,,"[{u'processQuotes': True, u'fileSource': u'Air...",,2019-02-21T01:52:19Z,Airbnblistings_dirty csv,7594,,[],,
5,1684046182555,,2019-03-29T13:04:45Z,,{},,,,"[{u'processQuotes': True, u'fileSource': u'Air...",,2019-03-07T01:42:36Z,Airbnblistings_dirty csv,7594,,[],,
6,1724853533027,,2019-03-29T13:04:46Z,,{},,,,,,2016-09-13T16:49:19Z,IP_publications_2015Aug28 xls,0,,[],,
7,1734568039091,,2019-03-29T13:04:45Z,,{},,,,,,2016-11-22T01:58:50Z,testTraffic2,0,,[],,
8,1751164018924,,2019-03-29T13:04:46Z,,{},,,,,,2017-01-11T15:12:51Z,menu_question csv,0,,[],,
9,1892275851398,,2019-03-29T13:04:44Z,,{},,,,"[{u'processQuotes': True, u'fileSource': u'Air...",,2019-02-21T23:36:39Z,Airbnblistings_dirty csv csv test 2,7594,,[],,


## Create a New Project

In [12]:
# Download file
!wget https://github.com/nikolausn/Data_Cleaning_Python_Exercise/blob/master/drugsComTest_raw.tsv?raw=true -O drugsComTest_raw.tsv

--2019-03-29 09:29:06--  https://github.com/nikolausn/Data_Cleaning_Python_Exercise/blob/master/drugsComTest_raw.tsv?raw=true
Resolving github.com (github.com)... 192.30.253.112, 192.30.253.113
Connecting to github.com (github.com)|192.30.253.112|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github.com/nikolausn/Data_Cleaning_Python_Exercise/raw/master/drugsComTest_raw.tsv [following]
--2019-03-29 09:29:06--  https://github.com/nikolausn/Data_Cleaning_Python_Exercise/raw/master/drugsComTest_raw.tsv
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/nikolausn/Data_Cleaning_Python_Exercise/master/drugsComTest_raw.tsv [following]
--2019-03-29 09:29:07--  https://raw.githubusercontent.com/nikolausn/Data_Cleaning_Python_Exercise/master/drugsComTest_raw.tsv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.184.133
Connecting to raw.github

In [13]:
# delete project if exist
refine_helper.delete_project_byname("drugs com test")

Project 2454770285607 deleted


In [14]:
drug_project = refine_server.new_project(project_file="drugsComTest_raw.tsv",project_name="drugs com test",separator="\t")

In [15]:
refine_helper.search_projects("drugs com test")

Unnamed: 0,ProjectId,contributors,created,creator,customMetadata,description,homepage,image,importOptionMetadata,license,modified,name,rowCount,subject,tags,title,version
18,2209775532362,,2019-03-29T14:29:09Z,,{},,,,"[{u'fileSource': u'drugsComTest_raw.tsv', u'en...",,2019-03-29T14:29:13Z,drugs com test,0,,[],,


In [17]:
drug_project.list_history()

{u'future': [], u'past': []}

## Open the drug project

In [18]:
drug_project = refine_helper.open_project_byname("drugs com test")

OpenRefine Project drugs com test opened, access using active_project property


In [19]:
refine_helper.active_project.project_name()

u'drugs com test'

In [20]:
# number of columns
refine_helper.get_number_columns()

7

In [21]:
refine_helper.get_number_rows()

53766

### Get List of Columns

In [22]:
drug_project.columns

[u'Column',
 u'drugName',
 u'condition',
 u'review',
 u'rating',
 u'date',
 u'usefulCount']

### Look at the rows

In [23]:
refine_helper.get_rows(start=0,limit=20)

Unnamed: 0,Column,drugName,condition,review,rating,date,usefulCount
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",10,"February 28, 2012",22
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",8,"May 17, 2009",17
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",9,"September 29, 2017",3
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",9,"March 5, 2017",35
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",9,"October 22, 2015",4
5,208087,Zyclara,Keratosis,"""4 days in on first 2 weeks. Using on arms an...",4,"July 3, 2014",13
6,215892,Copper,Birth Control,"""I&#039;ve had the copper coil for about 3 mon...",6,"June 6, 2016",1
7,169852,Amitriptyline,Migraine Prevention,"""This has been great for me. I&#039;ve been on...",9,"April 21, 2009",32
8,23295,Methadone,Opiate Withdrawal,"""Ive been on Methadone for over ten years and ...",7,"October 18, 2016",21
9,71428,Levora,Birth Control,"""I was on this pill for almost two years. It d...",2,"April 16, 2011",3


### create a new column for cleaning

In [24]:
drug_project.add_column(column="review",new_column="review_cleaned")

{u'code': u'ok',
 u'historyEntry': {u'description': u'Create new column review_cleaned based on column review by filling 53766 rows with value',
  u'id': 1553869828598,
  u'time': u'2019-03-29T14:29:23Z'}}

In [25]:
refine_helper.get_rows()

Unnamed: 0,Column,drugName,condition,review,review_cleaned,rating,date,usefulCount
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...","""I&#039;ve tried a few antidepressants over th...",10,"February 28, 2012",22
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...","""My son has Crohn&#039;s disease and has done ...",8,"May 17, 2009",17
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""","""Quick reduction of symptoms""",9,"September 29, 2017",3
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...","""Contrave combines drugs that were used for al...",9,"March 5, 2017",35
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...","""I have been on this birth control for one cyc...",9,"October 22, 2015",4
5,208087,Zyclara,Keratosis,"""4 days in on first 2 weeks. Using on arms an...","""4 days in on first 2 weeks. Using on arms an...",4,"July 3, 2014",13
6,215892,Copper,Birth Control,"""I&#039;ve had the copper coil for about 3 mon...","""I&#039;ve had the copper coil for about 3 mon...",6,"June 6, 2016",1
7,169852,Amitriptyline,Migraine Prevention,"""This has been great for me. I&#039;ve been on...","""This has been great for me. I&#039;ve been on...",9,"April 21, 2009",32
8,23295,Methadone,Opiate Withdrawal,"""Ive been on Methadone for over ten years and ...","""Ive been on Methadone for over ten years and ...",7,"October 18, 2016",21
9,71428,Levora,Birth Control,"""I was on this pill for almost two years. It d...","""I was on this pill for almost two years. It d...",2,"April 16, 2011",3


### tolower Operation on review_cleaned

In [26]:
refine_helper.to_lowercase("review_cleaned")

{u'code': u'ok',
 u'historyEntry': {u'description': u'Text transform on 53542 cells in column review_cleaned: value.toLowercase()',
  u'id': 1553869984629,
  u'time': u'2019-03-29T14:29:28Z'}}

In [27]:
refine_helper.get_rows()

Unnamed: 0,Column,drugName,condition,review,review_cleaned,rating,date,usefulCount
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...","""i&#039;ve tried a few antidepressants over th...",10,"February 28, 2012",22
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...","""my son has crohn&#039;s disease and has done ...",8,"May 17, 2009",17
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""","""quick reduction of symptoms""",9,"September 29, 2017",3
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...","""contrave combines drugs that were used for al...",9,"March 5, 2017",35
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...","""i have been on this birth control for one cyc...",9,"October 22, 2015",4
5,208087,Zyclara,Keratosis,"""4 days in on first 2 weeks. Using on arms an...","""4 days in on first 2 weeks. using on arms an...",4,"July 3, 2014",13
6,215892,Copper,Birth Control,"""I&#039;ve had the copper coil for about 3 mon...","""i&#039;ve had the copper coil for about 3 mon...",6,"June 6, 2016",1
7,169852,Amitriptyline,Migraine Prevention,"""This has been great for me. I&#039;ve been on...","""this has been great for me. i&#039;ve been on...",9,"April 21, 2009",32
8,23295,Methadone,Opiate Withdrawal,"""Ive been on Methadone for over ten years and ...","""ive been on methadone for over ten years and ...",7,"October 18, 2016",21
9,71428,Levora,Birth Control,"""I was on this pill for almost two years. It d...","""i was on this pill for almost two years. it d...",2,"April 16, 2011",3


### Delete the first and last quote " in the review_cleaned column using regular expression

In [28]:
# beginning quote
drug_project.text_transform(column="review_cleaned",expression="value.replace(/^\"/,\"\")")

{u'code': u'ok',
 u'historyEntry': {u'description': u'Text transform on 53766 cells in column review_cleaned: value.replace(/^"/,"")',
  u'id': 1553870065865,
  u'time': u'2019-03-29T14:29:33Z'}}

In [29]:
drug_project.text_transform(column="review_cleaned",expression='value.replace(/"$/,"")')

{u'code': u'ok',
 u'historyEntry': {u'description': u'Text transform on 53766 cells in column review_cleaned: value.replace(/"$/,"")',
  u'id': 1553870008027,
  u'time': u'2019-03-29T14:29:38Z'}}

In [30]:
refine_helper.get_rows()

Unnamed: 0,Column,drugName,condition,review,review_cleaned,rating,date,usefulCount
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",i&#039;ve tried a few antidepressants over the...,10,"February 28, 2012",22
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",my son has crohn&#039;s disease and has done v...,8,"May 17, 2009",17
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",quick reduction of symptoms,9,"September 29, 2017",3
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",contrave combines drugs that were used for alc...,9,"March 5, 2017",35
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",i have been on this birth control for one cycl...,9,"October 22, 2015",4
5,208087,Zyclara,Keratosis,"""4 days in on first 2 weeks. Using on arms an...",4 days in on first 2 weeks. using on arms and...,4,"July 3, 2014",13
6,215892,Copper,Birth Control,"""I&#039;ve had the copper coil for about 3 mon...",i&#039;ve had the copper coil for about 3 mont...,6,"June 6, 2016",1
7,169852,Amitriptyline,Migraine Prevention,"""This has been great for me. I&#039;ve been on...",this has been great for me. i&#039;ve been on ...,9,"April 21, 2009",32
8,23295,Methadone,Opiate Withdrawal,"""Ive been on Methadone for over ten years and ...",ive been on methadone for over ten years and c...,7,"October 18, 2016",21
9,71428,Levora,Birth Control,"""I was on this pill for almost two years. It d...",i was on this pill for almost two years. it do...,2,"April 16, 2011",3


## To date operation

In [31]:
# copy column date
drug_project.add_column(column="date",new_column="date_cleaned")

{u'code': u'ok',
 u'historyEntry': {u'description': u'Create new column date_cleaned based on column date by filling 53766 rows with value',
  u'id': 1553870649752,
  u'time': u'2019-03-29T14:29:42Z'}}

In [32]:
refine_helper.get_rows()

Unnamed: 0,Column,drugName,condition,review,review_cleaned,rating,date,date_cleaned,usefulCount
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",i&#039;ve tried a few antidepressants over the...,10,"February 28, 2012","February 28, 2012",22
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",my son has crohn&#039;s disease and has done v...,8,"May 17, 2009","May 17, 2009",17
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",quick reduction of symptoms,9,"September 29, 2017","September 29, 2017",3
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",contrave combines drugs that were used for alc...,9,"March 5, 2017","March 5, 2017",35
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",i have been on this birth control for one cycl...,9,"October 22, 2015","October 22, 2015",4
5,208087,Zyclara,Keratosis,"""4 days in on first 2 weeks. Using on arms an...",4 days in on first 2 weeks. using on arms and...,4,"July 3, 2014","July 3, 2014",13
6,215892,Copper,Birth Control,"""I&#039;ve had the copper coil for about 3 mon...",i&#039;ve had the copper coil for about 3 mont...,6,"June 6, 2016","June 6, 2016",1
7,169852,Amitriptyline,Migraine Prevention,"""This has been great for me. I&#039;ve been on...",this has been great for me. i&#039;ve been on ...,9,"April 21, 2009","April 21, 2009",32
8,23295,Methadone,Opiate Withdrawal,"""Ive been on Methadone for over ten years and ...",ive been on methadone for over ten years and c...,7,"October 18, 2016","October 18, 2016",21
9,71428,Levora,Birth Control,"""I was on this pill for almost two years. It d...",i was on this pill for almost two years. it do...,2,"April 16, 2011","April 16, 2011",3


In [33]:
drug_project.text_transform(column="date_cleaned",expression="value.toDate()")

{u'code': u'ok',
 u'historyEntry': {u'description': u'Text transform on 53766 cells in column date_cleaned: value.toDate()',
  u'id': 1553870654594,
  u'time': u'2019-03-29T14:29:43Z'}}

In [34]:
refine_helper.get_rows()

Unnamed: 0,Column,drugName,condition,review,review_cleaned,rating,date,date_cleaned,usefulCount
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...",i&#039;ve tried a few antidepressants over the...,10,"February 28, 2012",2012-02-28T00:00:00Z,22
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...",my son has crohn&#039;s disease and has done v...,8,"May 17, 2009",2009-05-17T00:00:00Z,17
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""",quick reduction of symptoms,9,"September 29, 2017",2017-09-29T00:00:00Z,3
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...",contrave combines drugs that were used for alc...,9,"March 5, 2017",2017-03-05T00:00:00Z,35
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...",i have been on this birth control for one cycl...,9,"October 22, 2015",2015-10-22T00:00:00Z,4
5,208087,Zyclara,Keratosis,"""4 days in on first 2 weeks. Using on arms an...",4 days in on first 2 weeks. using on arms and...,4,"July 3, 2014",2014-07-03T00:00:00Z,13
6,215892,Copper,Birth Control,"""I&#039;ve had the copper coil for about 3 mon...",i&#039;ve had the copper coil for about 3 mont...,6,"June 6, 2016",2016-06-06T00:00:00Z,1
7,169852,Amitriptyline,Migraine Prevention,"""This has been great for me. I&#039;ve been on...",this has been great for me. i&#039;ve been on ...,9,"April 21, 2009",2009-04-21T00:00:00Z,32
8,23295,Methadone,Opiate Withdrawal,"""Ive been on Methadone for over ten years and ...",ive been on methadone for over ten years and c...,7,"October 18, 2016",2016-10-18T00:00:00Z,21
9,71428,Levora,Birth Control,"""I was on this pill for almost two years. It d...",i was on this pill for almost two years. it do...,2,"April 16, 2011",2011-04-16T00:00:00Z,3


### Cluster operation

In [35]:
review_cluster = drug_project.compute_clusters("review_cleaned")

In [36]:
pd.DataFrame(review_cluster)

Unnamed: 0,0,1,2,3
0,"{u'count': 2, u'value': u'love it'}","{u'count': 2, u'value': u'love it.'}","{u'count': 1, u'value': u' love it'}","{u'count': 1, u'value': u'love it love it!!'}"
1,"{u'count': 7, u'value': u'excellent'}","{u'count': 2, u'value': u'excellent!'}","{u'count': 2, u'value': u'excellent!!'}","{u'count': 2, u'value': u'excellent.'}"
2,"{u'count': 5, u'value': u'it works.'}","{u'count': 3, u'value': u'it works'}","{u'count': 2, u'value': u'it works!!!'}","{u'count': 1, u'value': u'it works!!!!!!'}"
3,"{u'count': 7, u'value': u'works great'}","{u'count': 3, u'value': u'works great!'}","{u'count': 2, u'value': u'works great.'}","{u'count': 1, u'value': u'works great. '}"
4,"{u'count': 1, u'value': u'works for me'}","{u'count': 1, u'value': u'works for me!'}","{u'count': 1, u'value': u'works for me.'}",
5,"{u'count': 3, u'value': u'very helpful.'}","{u'count': 1, u'value': u'very helpful'}","{u'count': 1, u'value': u'very helpful!!'}",
6,"{u'count': 8, u'value': u'good'}","{u'count': 8, u'value': u'good.'}","{u'count': 1, u'value': u'good '}",
7,"{u'count': 6, u'value': u'very good'}","{u'count': 2, u'value': u'very good.'}","{u'count': 1, u'value': u'very good!'}",
8,"{u'count': 1, u'value': u'so far so good'}","{u'count': 1, u'value': u'so far so good.'}","{u'count': 1, u'value': u'so far, so good.'}",
9,"{u'count': 1, u'value': u'works very well'}","{u'count': 1, u'value': u'works very well.'}",,


## merge cluster 1

In [37]:
cluster_1 = [ x["value"] for x in review_cluster[0]]
cluster_1

[u'love it', u'love it.', u' love it', u'love it love it!!']

In [38]:
drug_project.mass_edit("review_cleaned",edits=[{'from':cluster_1,'to':"love it"}])

{u'code': u'ok',
 u'historyEntry': {u'description': u'Mass edit 6 cells in column review_cleaned',
  u'id': 1553870705674,
  u'time': u'2019-03-29T14:29:47Z'}}

### use refine helper to show new cluster

In [39]:
refine_helper.cluster("review_cleaned")

Unnamed: 0,0,1,2,3
0,"{u'count': 7, u'value': u'excellent'}","{u'count': 2, u'value': u'excellent!'}","{u'count': 2, u'value': u'excellent!!'}","{u'count': 2, u'value': u'excellent.'}"
1,"{u'count': 5, u'value': u'it works.'}","{u'count': 3, u'value': u'it works'}","{u'count': 2, u'value': u'it works!!!'}","{u'count': 1, u'value': u'it works!!!!!!'}"
2,"{u'count': 7, u'value': u'works great'}","{u'count': 3, u'value': u'works great!'}","{u'count': 2, u'value': u'works great.'}","{u'count': 1, u'value': u'works great. '}"
3,"{u'count': 1, u'value': u'works for me'}","{u'count': 1, u'value': u'works for me!'}","{u'count': 1, u'value': u'works for me.'}",
4,"{u'count': 3, u'value': u'very helpful.'}","{u'count': 1, u'value': u'very helpful'}","{u'count': 1, u'value': u'very helpful!!'}",
5,"{u'count': 8, u'value': u'good'}","{u'count': 8, u'value': u'good.'}","{u'count': 1, u'value': u'good '}",
6,"{u'count': 6, u'value': u'very good'}","{u'count': 2, u'value': u'very good.'}","{u'count': 1, u'value': u'very good!'}",
7,"{u'count': 1, u'value': u'so far so good'}","{u'count': 1, u'value': u'so far so good.'}","{u'count': 1, u'value': u'so far, so good.'}",
8,"{u'count': 1, u'value': u'works very well'}","{u'count': 1, u'value': u'works very well.'}",,
9,"{u'count': 2, u'value': u'works well.'}","{u'count': 1, u'value': u'works well '}",,


### Play with History

In [41]:
drug_project.list_history()

{u'future': [],
 u'past': [{u'description': u'Create new column review_cleaned based on column review by filling 53766 rows with value',
   u'id': 1553869828598,
   u'time': u'2019-03-29T14:29:23Z'},
  {u'description': u'Text transform on 53542 cells in column review_cleaned: value.toLowercase()',
   u'id': 1553869984629,
   u'time': u'2019-03-29T14:29:28Z'},
  {u'description': u'Text transform on 53766 cells in column review_cleaned: value.replace(/^"/,"")',
   u'id': 1553870065865,
   u'time': u'2019-03-29T14:29:33Z'},
  {u'description': u'Text transform on 53766 cells in column review_cleaned: value.replace(/"$/,"")',
   u'id': 1553870008027,
   u'time': u'2019-03-29T14:29:38Z'},
  {u'description': u'Create new column date_cleaned based on column date by filling 53766 rows with value',
   u'id': 1553870649752,
   u'time': u'2019-03-29T14:29:42Z'},
  {u'description': u'Text transform on 53766 cells in column date_cleaned: value.toDate()',
   u'id': 1553870654594,
   u'time': u'2019-0

In [42]:
drug_project.add_column(column="review",new_column="review_test_history")

{u'code': u'ok',
 u'historyEntry': {u'description': u'Create new column review_test_history based on column review by filling 53766 rows with value',
  u'id': 1553870380613,
  u'time': u'2019-03-29T14:30:50Z'}}

In [43]:
refine_helper.get_rows()

Unnamed: 0,Column,drugName,condition,review,review_test_history,review_cleaned,rating,date,date_cleaned,usefulCount
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...","""I&#039;ve tried a few antidepressants over th...",i&#039;ve tried a few antidepressants over the...,10,"February 28, 2012",2012-02-28T00:00:00Z,22
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...","""My son has Crohn&#039;s disease and has done ...",my son has crohn&#039;s disease and has done v...,8,"May 17, 2009",2009-05-17T00:00:00Z,17
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""","""Quick reduction of symptoms""",quick reduction of symptoms,9,"September 29, 2017",2017-09-29T00:00:00Z,3
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...","""Contrave combines drugs that were used for al...",contrave combines drugs that were used for alc...,9,"March 5, 2017",2017-03-05T00:00:00Z,35
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...","""I have been on this birth control for one cyc...",i have been on this birth control for one cycl...,9,"October 22, 2015",2015-10-22T00:00:00Z,4
5,208087,Zyclara,Keratosis,"""4 days in on first 2 weeks. Using on arms an...","""4 days in on first 2 weeks. Using on arms an...",4 days in on first 2 weeks. using on arms and...,4,"July 3, 2014",2014-07-03T00:00:00Z,13
6,215892,Copper,Birth Control,"""I&#039;ve had the copper coil for about 3 mon...","""I&#039;ve had the copper coil for about 3 mon...",i&#039;ve had the copper coil for about 3 mont...,6,"June 6, 2016",2016-06-06T00:00:00Z,1
7,169852,Amitriptyline,Migraine Prevention,"""This has been great for me. I&#039;ve been on...","""This has been great for me. I&#039;ve been on...",this has been great for me. i&#039;ve been on ...,9,"April 21, 2009",2009-04-21T00:00:00Z,32
8,23295,Methadone,Opiate Withdrawal,"""Ive been on Methadone for over ten years and ...","""Ive been on Methadone for over ten years and ...",ive been on methadone for over ten years and c...,7,"October 18, 2016",2016-10-18T00:00:00Z,21
9,71428,Levora,Birth Control,"""I was on this pill for almost two years. It d...","""I was on this pill for almost two years. It d...",i was on this pill for almost two years. it do...,2,"April 16, 2011",2011-04-16T00:00:00Z,3


In [44]:
drug_project.list_history()

{u'future': [],
 u'past': [{u'description': u'Create new column review_cleaned based on column review by filling 53766 rows with value',
   u'id': 1553869828598,
   u'time': u'2019-03-29T14:29:23Z'},
  {u'description': u'Text transform on 53542 cells in column review_cleaned: value.toLowercase()',
   u'id': 1553869984629,
   u'time': u'2019-03-29T14:29:28Z'},
  {u'description': u'Text transform on 53766 cells in column review_cleaned: value.replace(/^"/,"")',
   u'id': 1553870065865,
   u'time': u'2019-03-29T14:29:33Z'},
  {u'description': u'Text transform on 53766 cells in column review_cleaned: value.replace(/"$/,"")',
   u'id': 1553870008027,
   u'time': u'2019-03-29T14:29:38Z'},
  {u'description': u'Create new column date_cleaned based on column date by filling 53766 rows with value',
   u'id': 1553870649752,
   u'time': u'2019-03-29T14:29:42Z'},
  {u'description': u'Text transform on 53766 cells in column date_cleaned: value.toDate()',
   u'id': 1553870654594,
   u'time': u'2019-0

In [46]:
drug_project.undo_project(history_id=drug_project.list_history()["past"][-2]["id"])

True

In [47]:
drug_project.list_history()

{u'future': [{u'description': u'Create new column review_test_history based on column review by filling 53766 rows with value',
   u'id': 1553870380613,
   u'time': u'2019-03-29T14:30:50Z'}],
 u'past': [{u'description': u'Create new column review_cleaned based on column review by filling 53766 rows with value',
   u'id': 1553869828598,
   u'time': u'2019-03-29T14:29:23Z'},
  {u'description': u'Text transform on 53542 cells in column review_cleaned: value.toLowercase()',
   u'id': 1553869984629,
   u'time': u'2019-03-29T14:29:28Z'},
  {u'description': u'Text transform on 53766 cells in column review_cleaned: value.replace(/^"/,"")',
   u'id': 1553870065865,
   u'time': u'2019-03-29T14:29:33Z'},
  {u'description': u'Text transform on 53766 cells in column review_cleaned: value.replace(/"$/,"")',
   u'id': 1553870008027,
   u'time': u'2019-03-29T14:29:38Z'},
  {u'description': u'Create new column date_cleaned based on column date by filling 53766 rows with value',
   u'id': 1553870649752,

In [48]:
drug_project.add_column(column="review",new_column="review_test_history_new")

{u'code': u'ok',
 u'historyEntry': {u'description': u'Create new column review_test_history_new based on column review by filling 53766 rows with value',
  u'id': 1553870032560,
  u'time': u'2019-03-29T14:32:31Z'}}

In [49]:
drug_project.list_history()

{u'future': [],
 u'past': [{u'description': u'Create new column review_cleaned based on column review by filling 53766 rows with value',
   u'id': 1553869828598,
   u'time': u'2019-03-29T14:29:23Z'},
  {u'description': u'Text transform on 53542 cells in column review_cleaned: value.toLowercase()',
   u'id': 1553869984629,
   u'time': u'2019-03-29T14:29:28Z'},
  {u'description': u'Text transform on 53766 cells in column review_cleaned: value.replace(/^"/,"")',
   u'id': 1553870065865,
   u'time': u'2019-03-29T14:29:33Z'},
  {u'description': u'Text transform on 53766 cells in column review_cleaned: value.replace(/"$/,"")',
   u'id': 1553870008027,
   u'time': u'2019-03-29T14:29:38Z'},
  {u'description': u'Create new column date_cleaned based on column date by filling 53766 rows with value',
   u'id': 1553870649752,
   u'time': u'2019-03-29T14:29:42Z'},
  {u'description': u'Text transform on 53766 cells in column date_cleaned: value.toDate()',
   u'id': 1553870654594,
   u'time': u'2019-0

In [50]:
refine_helper.get_rows()

Unnamed: 0,Column,drugName,condition,review,review_test_history_new,review_cleaned,rating,date,date_cleaned,usefulCount
0,163740,Mirtazapine,Depression,"""I&#039;ve tried a few antidepressants over th...","""I&#039;ve tried a few antidepressants over th...",i&#039;ve tried a few antidepressants over the...,10,"February 28, 2012",2012-02-28T00:00:00Z,22
1,206473,Mesalamine,"Crohn's Disease, Maintenance","""My son has Crohn&#039;s disease and has done ...","""My son has Crohn&#039;s disease and has done ...",my son has crohn&#039;s disease and has done v...,8,"May 17, 2009",2009-05-17T00:00:00Z,17
2,159672,Bactrim,Urinary Tract Infection,"""Quick reduction of symptoms""","""Quick reduction of symptoms""",quick reduction of symptoms,9,"September 29, 2017",2017-09-29T00:00:00Z,3
3,39293,Contrave,Weight Loss,"""Contrave combines drugs that were used for al...","""Contrave combines drugs that were used for al...",contrave combines drugs that were used for alc...,9,"March 5, 2017",2017-03-05T00:00:00Z,35
4,97768,Cyclafem 1 / 35,Birth Control,"""I have been on this birth control for one cyc...","""I have been on this birth control for one cyc...",i have been on this birth control for one cycl...,9,"October 22, 2015",2015-10-22T00:00:00Z,4
5,208087,Zyclara,Keratosis,"""4 days in on first 2 weeks. Using on arms an...","""4 days in on first 2 weeks. Using on arms an...",4 days in on first 2 weeks. using on arms and...,4,"July 3, 2014",2014-07-03T00:00:00Z,13
6,215892,Copper,Birth Control,"""I&#039;ve had the copper coil for about 3 mon...","""I&#039;ve had the copper coil for about 3 mon...",i&#039;ve had the copper coil for about 3 mont...,6,"June 6, 2016",2016-06-06T00:00:00Z,1
7,169852,Amitriptyline,Migraine Prevention,"""This has been great for me. I&#039;ve been on...","""This has been great for me. I&#039;ve been on...",this has been great for me. i&#039;ve been on ...,9,"April 21, 2009",2009-04-21T00:00:00Z,32
8,23295,Methadone,Opiate Withdrawal,"""Ive been on Methadone for over ten years and ...","""Ive been on Methadone for over ten years and ...",ive been on methadone for over ten years and c...,7,"October 18, 2016",2016-10-18T00:00:00Z,21
9,71428,Levora,Birth Control,"""I was on this pill for almost two years. It d...","""I was on this pill for almost two years. It d...",i was on this pill for almost two years. it do...,2,"April 16, 2011",2011-04-16T00:00:00Z,3
