In [175]:
import csv                               # csv reader
import re
from sklearn.svm import LinearSVC
from nltk.classify import SklearnClassifier
from random import shuffle
from sklearn.pipeline import Pipeline
from sklearn.metrics import precision_recall_fscore_support

In [176]:
# load data from a file and append it to the rawData
def loadData(path, Text=None):
    with open(path,encoding = 'utf8') as f:
        reader = csv.reader(f, delimiter='\t')
        for line in reader:
            if line[0] == "DOC_ID":  # skip the header
                continue
            (Id, Text, Label) = parseReview(line)
            rawData.append((Id, Text, Label))


def splitData(percentage):
    # A method to split the data between trainData and testData 
    dataSamples = len(rawData)
    halfOfData = int(len(rawData)/2)
    trainingSamples = int((percentage*dataSamples)/2)
    for (_, Text, Label) in rawData[:trainingSamples] + rawData[halfOfData:halfOfData+trainingSamples]:
        trainData.append((toFeatureVector(preProcess(Text)),Label))
    for (_, Text, Label) in rawData[trainingSamples:halfOfData] + rawData[halfOfData+trainingSamples:]:
        testData.append((toFeatureVector(preProcess(Text)),Label))

# Label information

In [177]:
# Convert line from input file into an id/text/label tuple
def parseReview(reviewLine):
    # Should return a triple of an integer, a string containing the review, and a string indicating the label
    # DESCRIBE YOUR METHOD IN WORDS
        Id = reviewLine[0] # Id should equal the first colum of reviewLine 
        Text = reviewLine[8] # Text should equal the last colum of reviewLine 
        Label = reviewLine[1] # Label should equal the second colum of reviewLine 
        return (Id, Text, Label) # return the value

In [178]:
# TEXT PREPROCESSING AND FEATURE VECTORIZATION

# Input: a string of one review
def preProcess(text):
    # Should return a list of tokens
    # DESCRIBE YOUR METHOD IN WORDS
     # word tokenisation
    text = re.sub(r"(\w)([.,;:!?'\"”\)])", r"\1 \2", text) # this process will tokenise or split punctuation in sentence.
    text = re.sub(r"([.,;:!?'\"“\(])(\w)", r"\1 \2", text) # this process will tokenise or split punctuation in sentence.
    print("tokenising:", text) # test the code by printing
    tokens = re.split(r"\s+",text) # store the result from tokenisation process in "tokens"
    # normalisation
    text = re.sub(r"(\S)\1\1+",r"\1\1\1", text) 
    tokens = [t.lower() for t in tokens] # this process will normalise the sentence from tokenisation process in order to separate in word by word
    return tokens # return the parameter

# Feature engineering

In [179]:
featureDict = {} # A global dictionary of features

def toFeatureVector(tokens):
    # Should return a dictionary containing features as keys, and weights as values
    # DESCRIBE YOUR METHOD IN WORDS
    d = {} # create the variable "d" to store dictionary and set it to empty dictionary
    for w in tokens:
        try: # try the loop from tokens
            i = featureDict[w] # add the value in dictionary
        except KeyError: # if it has error, it will run the code at below
            i = len(featureDict) + 1 # store the value in i
            featureDict[w] = i # add new word in dictionary
        try:
            d[i] += (1.0/len(tokens)) #store new value from featureDict to new dictionary as d by adding the previous value
        except KeyError: # if it has error, it will run the code at below
            d[i] = (1.0/len(tokens)) # store value in new dictionary "d"
        return d

In [180]:
toFeatureVector(["hello", "to"])

{1: 0.5}

In [181]:
# TRAINING AND VALIDATING OUR CLASSIFIER
def trainClassifier(trainData):
    print("Training Classifier...")
    pipeline =  Pipeline([('svc', LinearSVC())])
    return SklearnClassifier(pipeline).train(trainData)

# Cross validation

In [182]:
def crossValidate(dataset, folds):
    shuffle(dataset)
    cv_results = []
    foldSize = int(len(dataset)/folds)
    # DESCRIBE YOUR METHOD IN WORDS
    for i in range(0,len(dataset),foldSize):
        #continue # Replace by code that trains and tests on the 10 folds of data in the dataset
        tr_x = dataset[i : i + foldSize] # tr_x is the fold for testing
        train_data = dataset[0 : i] + dataset[foldSize : ]   # the training data which is the rest of dataset
        classifier = trainClassifier(train_data)  # train the data in classifier
        trainPred = predictLabels(tr_x, classifier)  # classify the test data to get predicted labels
        trainTrue = list(map(lambda t: t[1], tr_x)) # classify the test data to get true labels
        before_results = list(precision_recall_fscore_support(trainTrue, trainPred, average='weighted')) # find the precision, recall and f-score and store in before_results
        cv_results.append(before_results) # collect the result in cv_results
    return cv_results

In [183]:
# PREDICTING LABELS GIVEN A CLASSIFIER

def predictLabels(reviewSamples, classifier):
    return classifier.classify_many(map(lambda t: t[0], reviewSamples))

def predictLabel(reviewSample, classifier):
    return classifier.classify(toFeatureVector(preProcess(reviewSample)))

In [215]:
# MAIN

# loading reviews
# initialize global lists that will be appended to by the methods below
rawData = []          # the filtered data from the dataset file (should be 21000 samples)
trainData = []        # the pre-processed training data as a percentage of the total dataset (currently 80%, or 16800 samples)
testData = []         # the pre-processed test data as a percentage of the total dataset (currently 20%, or 4200 samples)

# the output classes
fakeLabel = 'fake'
realLabel = 'real'

# references to the data files
reviewPath = 'amazon_reviews.txt'

# Do the actual stuff (i.e. call the functions we've made)
# We parse the dataset and put it in a raw data list
print("Now %d rawData, %d trainData, %d testData" % (len(rawData), len(trainData), len(testData)),
      "Preparing the dataset...",sep='\n')
loadData(reviewPath) 

# We split the raw dataset into a set of training data and a set of test data (80/20)
# You do the cross validation on the 80% (training data)
# We print the number of training samples and the number of features before the split
print("Now %d rawData, %d trainData, %d testData" % (len(rawData), len(trainData), len(testData)),
      "Preparing training and test data...",sep='\n')
splitData(0.8)
# We print the number of training samples and the number of features after the split
print("After split, %d rawData, %d trainData, %d testData" % (len(rawData), len(trainData), len(testData)),
      "Training Samples: ", len(trainData), "Features: ", len(featureDict), sep='\n')

# QUESTION 3 - Make sure there is a function call here to the
# crossValidate function on the training set to get your results
crossValidate(trainData, 10)





Now 0 rawData, 0 trainData, 0 testData
Preparing the dataset...
Now 21000 rawData, 0 trainData, 0 testData
Preparing training and test data...
tokenising: When least you think so , this product will save the day . Just keep it around just in case you need it for something .
tokenising: Lithium batteries are something new introduced in the market there average developing cost is relatively high but Stallion doesn ' t compromise on quality and provides us with the best at a low cost .<br />There are so many in built technical assistants that act like a sensor in their particular forté . The battery keeps my phone charged up and it works at every voltage and a high voltage is never risked .
tokenising: I purchased this swing for my baby . She is 6 months now and has pretty much out grown it . It is very loud and doesn ' t swing very well . It is beautiful though . I love the colors and it has a lot of settings , but I don ' t think it was worth the money .
tokenising: I was looking for an

tokenising: I was just on  the Internet a few weeks ago and heard complaints about these chinese water ionizers .  These are the Aqua Ionizer by Air , Water and Life that are supposed to be made in the USA but are really made in China .  I bought my unit about 8 months ago and we used it for a while but really didn ' t notice any difference in our health .  We are active people in our late 40 ' s and we are always looking for ways to make us healthy .  We have read a lot about what to look for an this company , Real Spirit , put on the Internet that the machine was top of the line at a low price .  We bought it based on this information and after trying it we stopped using it because of the lack of results .  A friend of my  Wife ' s was over the other day and when we were talking , this subject came up .  He showed us a bunch of negative stuff online and we realized we fell for this scam .  He also showed us some other company ' s products and said they were certified by a USA water q

tokenising: For only this is a very great scope , and very Accurate once you get done with the hassle of Sighting it in , but Overall I love it and would Definitely recommend this to a friend an only 4 stars because you can always Improve on something
tokenising: I choose probotics because I would often see ads and commercial on how probotics where good for your health however after taking probotics for 7 days  i have a felt no changes in my bowels or the way i felt , I am saying that i did not dislike the product I am saying that probotics did nothing to improve myself .<br />i would recommend probotics to anyone who is willing to give it a try it might help them , but it did not work for me .
tokenising: Great book . I enjoyed this book . Brilliance is an exciting &#34 ; futuristic&#34 ; story that could easily have taken place today . A fun read , can ' t wait to start the 2nd novel .
tokenising: My son loved Dora so much that I decided to support and encourage his interest . Compar

tokenising: Took the poles out for the first time this Saturday and already one of the poles has broken . They are cheap and cheaply made , will be returned Monday morning for something of better quality .
tokenising: I could barely tolerate the taste and texture of these wraps and I ' m usually a very tolerant person who can eat anything . Once , I ate an entire plate of pasta that was both burned and uncooked . I could not figure out how that happened , even now .
tokenising: My daughter had a problem getting my grandson to eat his veggies . At one of his checkups the doctor did a blood test . We were told his iron was low , because lack of veggies in his diet . I bought her some of the food products , he just loved them .
tokenising: This was a pretty far-fetched piece of work , and that ' s putting it mildly . If I look at it as a form of entertainment , I supposed it works . But I have a feeling that that ' s not what the author intended .
tokenising: My husband has long , thin fi

tokenising: When I tried it on my phone it didn ' t fit quite well . I have to take it off and put on my old one back .
tokenising: You will need speakers for sure for this TV . I had to return it , just did not work for my needs . And it was for my kitchen and it still wasn ' t of good quality enough . Order this for an on-suite bathroom in a small space or a small home-office .
tokenising: My wife first got a pair of these golf shoes and was bragging how light they were . I have trouble with my feet so thought I would try them out . They are as light and comfortable as she said . I may even buy another pair .
tokenising: Stability balls are , well , stability balls . What makes this one different is it has a little sand in the bottom so the ball does not roll away when you get up . This is a simple and low cost modification does give it an edge over those that do not .
tokenising: Ok this is one of those products you get what you pay for . They quality is awful so don ' t waste your 

tokenising: Used this for only a week and I can ' t use it any more because it rubs against my arm as soon as I start running . It was bearable , though irritating , at first , but when I ran 6 miles yesterday , I have been in severe pain since then . Just putting on a shirt is painful due to the inflammation on the skin . The material is way too sticky and thick with very little breathing room . Didn ' t work out as I had expected .
tokenising: I have no idea how it happened . I just took it out of my pocket and there are two large cracks on my glass protector . Disappointed and wouldn ' t recommend .
tokenising: Excellent tool kit delivered promptly by amazon as always . One small disappointment is that package seal is broken . Don ' t know why . Apart from that all is good .
tokenising: This was a decent mask for the price I paid . It shipped fast too . I would change the ribbon that is used to tie the mask on , to a non silky type of fabric because it slips and loosens and I had to

tokenising: it should actually fit . I can barely get the hood to screw onto anything 58mm , and when it does , the threads inside were so cheaply made that I ' ve had the hood fall off numerous time just from my 75-300mm ' s autofocus spinning .
tokenising: The rug itself does look very nice in our apartment and we really love the pattern of it , we just wish the color had been true to that we picked and anticipated rather than we got and that the texture of the rug was softer and didn ' t pull/shed as easily as it does , making it look and feel much cheaper than it was .
tokenising: The unit came quickly , was easy to put together , and looked great . However the more I used it I was not super happy with it . The drawers when filled are hard to slide in the tracks and can pop out . Not a good system to hold a lot of weight in the sliders .
tokenising: I bought this product reading all these good reviews thinking I was getting a masterpiece of equipment !!! HOW I WAS WRONG !!!! It doe

tokenising: Keeps my PS4 sexy when vertical . I kept seeing all these pictures online with the PS4 vertical so I had to get a PS4 stand .<br />Only complaint I have is that my grandma received the package for me and didn ' t tell me for 7 days ! Something about not cleaning my room enough . I might be letting my anger out on this rating , but whatever ; blame my grandma . In in the end , it makes my PS4 more attractive and that ' s what matters .
tokenising: I loved this film as a teen . Yet I think I would get slightly bored today , because its a one of story and doesn ' t really narrate into anything contemporary .
tokenising: It broke after I leaned on it for support to put on my loafers . I ' m not even that heavy and I was sure that I was pushing it down instead of at an odd angle . The conclusion is that this is a bad product .
tokenising: The handle of the spoon is like holding a tooth brush handle . The handle is too heavy for most bowls and falls out of the bowl easily when yo

tokenising: this watch looks great and i get a ton of compliments on it . it also looks like the expensive kind . the only problem is it stains my hand with the metal
tokenising: Normally this is a great help . I am handicapped and shopping online is a tremendous advantage , but this time I was disappointed . The ink is light and won ' t print a clear vivid picture . I have a new grandson and can ' t get one good picture printed .
tokenising: Colors do not match photo . if you are getting this carpet because of the colors-don ' t , especially if you are decorating around these colors . They are not what they appear to be in the photo . I ' m not sure what ' s up with the photo or if it ' s even the same carpet .
tokenising: This clipper has a fixed blade system or will cut hair very , very short only . You can ' t really use this to cut the body hair of the animal . The attached clipper comb guides are useless and do not cut very well . It cuts very well without the comb guides .
token

tokenising: Do not recommend ! We use ( soon to be used to ) this for a baby monitor and it would always disconnect ! What ' s the point of this if it disconnects during the night !
tokenising: So you ' ll want something well over 50 gigs to download & install this . Quite a chunk of this " cleans up " after install but it comes in like the space hog it is . IMPORTANT do NOT run any files after download , get patch 1 . 05 and apply it before attempting to run anything . I had no problems with the Amazon side of things , and apart from some space issues the install went fine ... registering the software was a bit more ... problematic as the game gets tagged in Origins , EA , and Bioware social but in my case trying to launch from local shortcuts made me enter e-mail/pw for my online account then the reg code then telling me congrads w/big green play button which goes to we ' re sorry can ' t connect and that ' s it . For a week . Nothing . Yelled help and gave it a little time . Nada . 

tokenising: This is already my second purchase , my old one got lost after having an event in my place . This stuff really works , just well as the decanter that I recently had . It is much easier to use and also easy to clean . I love this brand .
tokenising: Great price and a quality product for fine grinding . Silver color , looks much more expensive than the price tag . You won ' t be disappointed unless you have unrealistic expectations . This is a nice tool to have at a very good price . Con : No cleaning brush included as the photo shows . Mine came packaged in a thin plastic bag with no padding and no way to close the bag . But no damage in the box which did have additional cushioning inside . For the price , you can beat it man . In a hemp shop these things run for 4x the price . This things works great when grinding up your cough cough &#34 ; herbs and cooking garnishes&#34 ; :) For the price , you can beat it man . In a hemp shop these things run for 4x the price . This thin

tokenising: I ' m a new driver and this one is help me a lot because if I drive to road some people don ' t know you are a new driver so they always doing over take coz I ' m slow . Then make me nervous i can ' t afford to pay somehow to teach my how to drive so just only my husband teaching me after his work . I recognize after I put this some people don ' t drive fast beside me maybe they see my tag . So now I ' m more safe now to my tag no more scared to other driver that people drive fast .
tokenising: &#34 ; Blue bird Purchased as a gift .  Nephew loved it .  I hear he spends time in the backyard flying it .  I got the blue one , seemed like a nice color .  Good size for him .&#34 ;
tokenising: I am a frustrated magician and I bought this set to teach my 6-year old son and 9-year old daughter some tricks . I am very happy that the set contains most of the common magic tricks we all know as a child and they were very easy to master . The stuff inside are very good quality ... almos

tokenising: These kick mats work just as described ! They might be plain and basic in design but they have done a fantastic job on saving the backs of our seats . Simple to install . I would recommend these to anyone . [[ASIN : B00CASDZKC Kick Mats - Deluxe Car Seat Back Protectors 2 Pack - Keep Your Car Seats 100% Clean From All The Stains And Scuffmarks Left By The Kids With These Auto-Protective Seat Covers - Designed For Most Vehicles - Protect Your Investment - Lifetime Guarantee]]
tokenising: This performs really nicely and gets our large bedroom . We have an oversised bedroom and this was a concern . But with this machine it was not a problem at all .
tokenising: It ' s just great ! I love that my grocery bags stand up and stay put and that both segments at either end are like coolers with zip closure ! Neat and handy ! You can also open it wide or adjust to whatever size you need . You can also pick it up and carry the whole shebang inside ! I liked it so much I got one for my 

tokenising: I grew up in the shadow of my " perfect " brother . He was adored . I was the outcast that never measured up . After years of searching I finally recognized that my older brother is a narcissistic personality . I read one book after the other . This is one of the very best . It was part of my healing process . The tools offered in this book helped me to value myself as an individual , to identify narcissistic personalities quickly so I have the upper hand over them and to remain calm and confident of myself .
tokenising: What ' s with people saying the book was bad because the main character was selfish and stupid in her decisions ? Do you honestly think this book would be compelling to read if the main character was the perfect character , Mother Teresa , expert backpacker ? Name one piece of good literature where the main character is perfect . Would you prefer if Cheryl lied and talked about how perfect she was and how smooth the journey was ? Some of the reviews of this

tokenising: I have been using this for a couple of months now and it is good and strong , I use this band during my insanity asylum workouts .<br />They stand up to the beating . Color coded according to resistance .<br />I like this because it was reasonably priced and it is still in operation .<br />The fact that it is a medium resistance is probably a better challenge
tokenising: For just 2 dollars you can have this memory card carrying case . Looks durable and made up of high quality materials . It keeps my cards stored in 1 place .
tokenising: This helicopter provides a very stable flight and easy to control . I would highly recommend this helicopter for outdoor flight .
tokenising: This case is a dream . Priced right ; two positions - one for reading one for typing ; looks fabulous ; elastic loop for stylus ; elastic loop to hold closed ; and the part that holds your iPad rotates so you can use it vertically or horizontally . This case looks great and functions well . I give it 5

tokenising: This chair is really made well with superior materials .  I have been looking for an affordable replica for quite some time .  This company had really high ratings so I went ahead and bought the chair .  I was not disappointed .  The chair and company are wonderful .  I can ' t say enough good things about both of them .
tokenising: I LOVE my new Nikon Coolpix camera ! This camera suits me just fine ... especially for the price . I am still getting acquainted with this camera but overall I am very much satisfied .
tokenising: I needed extra outlets and usb outlets too for my working desk and computer , so I looked at this and I was already amazed by what it is ! It has so much outlets on it for me to use , and so I bought this , and it works great ! I tested each outlet and they all work perfectly ! This is a great price for what they ' re giving you . Very helpful for offices or working desks .
tokenising: These have become my favorite . They are very strong , and while th

tokenising: I use this on my son ' s mattress and it works great . I would say if you have kids this is something to keep .
tokenising: These goggles are awesome ! They shipped crazy fast and are so much better than my other pairs . They come with a great protective case that fits easy in my gym bag without fear of me crushing them . They fit great and don ' t fog up . I highly recommend them to anyone !
tokenising: I have used this long time so I know its quality . I use it to power a food processor from Korea . I did not need 1000 watts but it seemed safer ( recommended tripling wattage to account for any possible power spikes ) and I can now buy other foreign plugs freely .
tokenising: I like collecting jewelries ever since . However , keeping and organizing had become a problem to me . I thought of buying a jewelry box , but it was too costly . So I decided to  buy these jewelry pouches for the mean time . I was very satisfied with these cheap pouches because it really served its p

tokenising: This cuddly panda is just the exact teddy my baby always wants to see around her . I love the unique design and quality of this Paci Buddy Panda because it gives a baby the right support for growth . I recommend this product !
tokenising: I‘m very fond of wearing bracelets . I keep on purchasing different types of bracelets . So , I also decided this one too . It is affordable and fashionable . I am very pleased with this purchase as I got it on time and in perfect condition .
tokenising: perfect because it keeps toys from floating away and also acts as a toy in itself . you can then teach your kids to help put toys away and and easily remove for stoarage .
tokenising: I really like this projector and would recommend it for this price range .<br /><br />I am using this in my living room for personal home theater , We have truly loved having this great projector in our home . Movie nights have become so much more fun now ...<br /><br />Pros :<br />1 . Picture is fairly clear

tokenising: I loved Altair , but Ezio is the best assassin of the series by far . I will have to say this trilogy is the best ASSASSIN ' S games but I enjoyed Black Flag much more but it ' s not really an assassin ' s game as it is a pirates game . The graphics are a little dated , and ACII and Brotherhood mechanics were lacking up until he gains his hook blade in Revelations . Great games though , definitely worth buying . After you finish Revelations , watch Assassin ' s Creed Embers to close Ezio ' s story . I wish they would do more birth-death AC , brings you closer to the character .
tokenising: I really like travel , this bag suit for me . big enough to hold everything and to allow me to search through it with east when trying to pull stuff out and throw stuff into it .
tokenising: Since it ' s a reaction free ratchet , there aren ' t any vibration shocks . It ' s very easy to use , even in the tightest areas . I love using the high-torque wrench , it ' s definitely the price !


tokenising: I bought this game for my 8 year old son - he is huge wrestling fan !  He absolutely loves this game !  The graphics are great and the game was very easy for him to pick up and learn .
tokenising: This is not a game , but a protective case for the game . It is sturdy and inexpensive . It will absorb the shock caused by little hands dropping the unit . It works well .
tokenising: This wii title has been a big hit at my home , with family and friends when they come over as well as just us here at home .  Nice to get a group of four together I ' ve found .  Lots of fun .  Playable even for those who don ' t really play video games .  I find that nice because I have a young video game enthusiast who likes to drag either willing or non-willing people in for a round of this !  Nice you can choose the length of time you wish to play too .
tokenising: Quality of item appears very good , metal surface and coloration are free of any defects and match the color of my rifle perfectly .

tokenising: This supermarket set is indeed a very creative and sturdy toy . The build quality is very solid and the colors on plastic pieces are very bright and realistic . The supermarket items look like real-life with their creatively designed colors and shapes .<br /><br />It is very engaging for young kids ( 3+ years ) and carries an exceptional educational value . As an adult you can teach the variety of products to kids , and focus on things like counting , addition , subtraction etc .<br /><br />This toy does require some time to set up and then one has to keep track of all the small pieces , which can be lost easily . It will appeal to around kids of 3-8 years of age and has potential to be used for long time ( not just a few months ). If you have other Calico family toy , this one will fit right in the collection .<br /><br />With so many little parts , you have to be cautious if there are other younger babies or infant in your home who can possibly swallow them .<br /><br />A

tokenising: This product is made in the US with lots of healthy , nutritious ingredients . One of our dogs , an elderly Shih Tzu , is very picky , and it is difficult to cajole him into eating . He is especially bad about eating anything in the morning . However , when I added a few of these to his breakfast , he ate almost his entire plate !<br /><br />This food makes him interested in food , and that makes me happy . He has bad teeth , but these are soft enough for him to easily eat . This is worth a try if you have a picky eater .
tokenising: This is something that has been ultra effective with a few mice situations , and would be something I would consider for outdoors and not inside . We had tried a number of methods including humane methods ( live traps were mostly garbage , with the best types giving extremely mixed results ), old school methods ( these work , but some mice outsmart them ), and live animal methods ( good , except the cat is sometimes more skilled hunting the Kit

tokenising: This is a fairly entertaining retread of familiar territory , and I did find parts to be extremely moving ( especially when dealing with loss ).<br /><br />However , the book failed to impress me for a number of reasons :<br />1/ A slew of cliched situations and plot devices ( ancient prophecy , circle of power , unnamed evil afoot , etc . )<br />2/ The characters were entirely one dimensional , and it was apparent at first encounter who was good and who was evil . No shades of grey .<br />3/ Each character ' s motivation was similarly obvious . Greed/power for the evil , and duty/honor for the good .<br />4/ The major plot line was extremely weak , was resolved with minimal fuss , and ended with a just-as-weak cliffhanger .<br /><br />This is not epic GRRM , and neither is it even close to the more mass market efforts by Goodkind . What it is , is a lazy read with minimal fuss and excitement , that won ' t raise your pulse rate as you read it .<br /><br />I gave it 3 stars

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 I own a lot of Portmeirion in this theme . We had a guest over one night and after that she mentioned to people later how impressed she was with the dishes . I bought this for her as a Christmas gift since I ' m sure she ' ll never have the intention to buy a set and is great as a stand alone piece . She is very pleased . My original Portmeirion pieces have the old &#34 ; Circa 1818&#34 ;, &#34 ; 1972&#34 ; stamp on their backs/bottoms . I bought them in the late 70 ' s and have only lost one dinner plate to date with everyday use ! I ' ve found these products to be quite durable and they haven ' t faded or lost their color with age .
tokenising: I ' ve used several volleyball over the years and this Japanese leather Molten is still my favorite .  Its got a great feel when you serve it or slam it down or even when it comes off your arm/hands when blocking and digging .  As a setter it makes you more alert which really elevates your game even more .  I ' d still pick this ball over the

tokenising: Stuart Woods has given us so many Stone Barrington books over the course of the years , we have watched him grow and change , even Elaine ' s is gone .  The last several books in the series for this reviewer at least have been very weak .  SEVERE CLEAR is better than the last so many books .  Its gets four and a half stars for a few reasons .  Better that the last few , not much involving Herbie , a plot that moves quickly and is exciting , even Lance is tolerable in this one .  Lots of guest from other series is good , but why has Dino ' s role been so reduced ?<br />In this one , Stone assists , but plays a small role till near the end , in neutralizing a terrorist plot against the USA to be centered in the new Arrington Hotel complex .  Pretty good yarn .  Series and book RECOMMENDED .  Always a nice beach read .
tokenising: seems well made - kids will like it - a little smaller than I expected ( it did not hold the twin size quilt I had planned to put in it - but did lo

tokenising: I have several KS watches and bags , but this is one of my faves . It ' s cute without being gaudy and adds a little splash of color to an otherwise sedate-looking watch . It keeps excellent time ; I ' ve never had to adjust it . All in all , a great buy .
tokenising: I am an amateur musician . All I wanted was a good recording . Now I am Very much happy and contended with this little `magical ' portable recording device . It is beyond my expectations . Even the recording with its built in internal mic , is awesome . My ( average )sound came out like a professional studio recording . Worth buying it . Price also so reasonable . If you are confused with many devices , just close your eyes and go forward it and buy it .
tokenising: Give Big Skinny a try and you will instantly become annoying .  :) You will want all your friends to buy one .  I have the women ' s red leather taxicat .  Since I got it , 8 friends have purchased a Big Skinny .<br /><br />Go to their store or wat

tokenising: I love this bag and have purchased it 3 times before . It has many sections in which to organize your belongings . However , each time , the zipper wore out and separated before the leather wore out . I just wish they would make it with a metal zipper .
tokenising: Excellent quality stuff , but I would classify it as a golden brown . Not as dark as I thought it would be .
tokenising: I bought this as a gift for a friend . It was among the deals of the day . Quality is good and what I expected .
tokenising: This purchase was a perfect replacement for the safety belt in our Graco highchair . Just wish there was a choice in color .
tokenising: The Bonmusica 4/4 violin shoulder rest was one of the best accessories I purchased . It made me play my violin with more comfort , which makes me play the instrument , instead of trying to deal with grabbing it . My playing technique has improved considerably , and I can practice for longer periods without getting tired , or feeling musc

tokenising: Perfect fit . Look great too . Used these for my first cycling race and since it was in the low 50 ' s I decided to go with the knicker rather than my shorts .
tokenising: Three stars because my little male Betta doesn ' t really care for the nourishment much . I particularly dislike what these treats do to the surface of the water - the oily film that never seems to go away , and we have a very good filter . Three stars for the convenience of the packaging but that ' s about it .
tokenising: Great pillow , but somewhat pricy . Much better than usual neck pillows sold at airport or bed bath & beyond . Would recommend .
tokenising: I saw this at the local theater when i was a kid .  When I purchased this I was thinking it was a different movie but soon into it I remembered it from my childhood .  The picture quality isn ' t very good and watching it one more time for me was enough just to see Mamie Van Dooren .  I would say if your into the old black and white movies it ' s 

tokenising: Ok I bought this item in March and has been on my BBQ since .. Well the summer is over and the cover has faded to gray color ..<br />Response from Charbroil was the cover should not be in direct sunlight and they can not gaurantee that it will not fade .<br />Very Lame , The BBQ is outside am I suppose to put it in the garage after use ..<br /><br />Hey it wasnt that expensive but to fade in 5 months is not right ... I give this item 2 stars and give their customer service 1 star for customer satisfaction ...<br /><br />So if you are looking for a cover that fades quickly buy this one ...
tokenising: Love this stuff with a banana , some pnut butter and vanilla .  Did I mention I am down 40 pounds ?  Whooooppppeee .
tokenising: Piece of junk worked OK for about a month and started to require reboots daily .  Eventually stopped working 100% of the time .  Nice easy setup though .
tokenising: While I was dissappointed that the frame for the backdrop is capable of being set up 

tokenising: I found this to be far more fascinating than David Cordingly ' s books about Caribbean pirates specifically because Joseph Gibbs ' book contains so many first hand accounts . The pirates and pirate hunters provide their story in their own words ; with Gibbs ' narrative as the binder holding the parts to the central theme .<br /><br />Where Cordingly so often explains his interpretations of history , and discusses how his interpretation differs from others ; &#34 ; On the Account&#34 ; presents first-hand evidence , and allows you to make your own interpretation .
tokenising: Received these bulbs quickly , and packaged very well .... no broken bulbs .<br />Have not installed them yet , so can not speak of quality , but they seem to be genuine Phillips , so they should be good .
tokenising: Been great do far . Nice solid ride in the rear now . This product does come 2 to a box too . Ordered 2 because I was lazy and didn ' t read the Q&A stuff . But Amazon was great about taki

tokenising: Very heavy and well built keyring .  Also very pretty and would recommend to anyone to purchase .  Granddaughter loved the gift .
tokenising: I have a first generation Sony LCD HDTV that only has DVI input . This hasn ' t been a problem until I bought a Roku 3 , which only has HDMI out . DVI doesn ' t accept HDMI audio , so I needed a solution that could accept HDMI input and output both video and analog audio . Enter this unit , a 4x2 switch with analog audio output . It works great , saved me from needing to buy a new TV .<br /><br />However , there ' s a glitch . If I turn on the Roku after I turn on the Monoprice , I get a ' restricted content ' message and only the audio works -- no video . If I turn off the switch for 15 or so seconds and turn it back on after the Roku ' s been started , everything is fine . Hence the 4 start review , it does the job , it does have a minor annoyance .
tokenising: This is the excellent answer to carry into the BATHROOM !  I love it and

tokenising: I like the watches , This Invicta Men ' s 3332 is another one that I like it has a good balance to it , very simple to use
tokenising: this cord works well . right out of the package fixed the problem . The price was one tenth of that of Staples .  Good buy
tokenising: I feel like this is a great record to memorialize the trip that these four men took . That being said , there was more information about their trip antics and less about the theme of what the nation thinks of God . I also feel like their sampling of people wasn ' t very large . I was hoping that there would be more pointed conversations .
tokenising: I got this per a recommendation from my doctor to help me relax enough to get a good nights sleep .<br /><br />I found that I got a great response from the first night I took it ; that surprised me since I usually have to take supplements for approx . a month before I notice any results . It doesn ' t knock me out but relaxes me so that I can sleep well and if I 

tokenising: I received the lamp for my Hd65 . It was labeled as an Optoma part on the website but the box had no indication of the maker . The lamp appeared to be identical to my blown lamp but it was a really sloppy fit in the machine and didn ' t work at first . I removed it and reinstalled it several times until it finally worked . After an hour of use I turned it off and the next day it wouldn ' t work again . I checked the Optoma website FAQ and it warned about counterfeit lamps so I returned this one and ordered one from an authorized dealer for double the price . It came in a completely different box with Optoma written all over it . The lamp had an Optoma original part sticker on it . It slid in perfectly with no problems and worked immediately . The manufacturing tolerance on this one was way below Optoma ' s standards and much sloppier than the factory part .
tokenising: I bought this product after running out of rustoleum . I sprayed it on aluminum that was sanded and had no

tokenising: The right boot is very difficult to pull on and tight , I thought it would stretch out after a few wears , but it hasn ' t yet . The left boot fits fine .
tokenising: This is a really sweet conversation piece , its not only beautiful but funny in such a way that most people who look at it smiles .  Its a lovely piece for all people who love whimsey .
tokenising: I use this makita model in my business we have 5 of them .. they are great machines<br />they get used hard and last about 4 yrs the way we beat them up
tokenising: I ' m 100% native Cajun ! Straight born and raise from Cajun country . Put a flag on the back of my truck .
tokenising: Unless you are a shop dummy with a perfectly round head , this is a really bad product .<br /><br />As someone else mentioned , if you wear this like a headband , it tilts downwards at the back of your head , so the camera ends up shooting the sky .  I bought several different mounts at the same time when I first bought my Contour , and

tokenising: My granddaughter loves everything with Minnie and she loves this DVD as well as all the other ones I have gotten her
tokenising: This is a chai tea , so I expected some cream in the product like the traditional Chai .  This is just tea , no sweetener , no cream .  Tea is strong and pumpkin flavor is satisfying but value beyond the convenience of the K-Cup .  It is cheaper to buy Twinings Chai Pumpkin Spice Tea Bag .
tokenising: If you are looking for a shorty 3mm wetsuit , this is the one you want .  I can ' t believe I hesitated and spent so long looking at other suits trying to decide what I wanted .<br /><br />Just bought this for tropical scuba diving .  I think it ' s sized a little large- it ' s loose and I could easily wear a size smaller , but I like that it isn ' t skin tight .  I also LOVE that it ' s a front zip .  I debated on that because for whatever reason , some genius decided wetsuits would be really cool with the zippers in the back and hey , that ' s what

tokenising: The store I usually get Cottonelle from sells 18 double rolls for about $18 usually .  ( I know this sounds high , but I ' m pretty sure this is what I pay-- could be 24 rolls but no more than that ).  So this is currently $27 . 88 for 48 double rolls .  I ' ve seen a lot of negative comments about how expensive this is , but it ' s a lot of toilet paper !  I ' ve had this a month with a family of four and am only about 2/3 through it .  Even if it was a little cheaper at the store , I think for convenience I prefer to get it through Amazon .  I usually have a huge package of toilet paper and that ' s about all that will fit in my cart .<br /><br />Update :  Didn ' t realize the price fluctuates frequently ... by A LOT ...$27 . 88 is good .  Why does it go from that to $49 . 26 ?? Wait for it to go down , because you can buy for much cheaper than $49+ in stores .
tokenising: $15 bucks and I got a set of headphones that actually fit my kids heads .  WORTH !<br />Definitely n

tokenising: I don ' t have a lot of experience with pastel pencils so I wanted to buy these for small details & outlining with pan pastels which I will purchase in the future . I chose a large set so I would have versatility with color options & shading .. ( Also available at Amazon ). On regular paper I tried out a few colors and noticed they were rich & vibrant . I chose 3 colors from the same family & they blended very well . On suede paper , pastel board or pastel medium/ground I am sure the performance will be much better . I would recommend these pencils to both hobbyists , beginners  & professional artists .
tokenising: So , I dig OPI polish .  This is a pretty color .  It ' s gonna look great this summer !  OPI lasts a long time without chipping .  And no one has colors like they do !  Amazon as the best prices !
tokenising: i have not started use it , but it looks good , hope is effective ...... i have not started use it , but it looks good , hope is effective
tokenising: LOVE

tokenising: This backpack is very nice ! Just like the pictures described on the web . I want to be different so I want a different backpack ! And the price is lower than other sellers . I love this backpack !
tokenising: Very high quality . I ' ve had a few very similar mits over the years . But this one seems to be the nicest . All the microfiber braids seem softer and more plush than others I ' ve used . Also , it has a cloth interior inside the mit . Some have a plastic bag on the inside , this one is great . 5 stars .
tokenising: Tigweld 200SX comes with a high frequency start tig and a foot pedal for suprb tig and great welding experience . I am using this product since a year and it simply amazing . Tigweld 200SX operates on 110V and 220V volts and has the capacity to weld multiple stick welding electrodes like 7018 , 6013 . It is great machine for the hobbyists as well for professionals . It offers great heat control ultimately resulting in an extraordinary welding experience .

tokenising: It arrived a lot quicker then I expected and my daughter loves it . She wears it all the time since we got it .
tokenising: Bought it for my son . Loved it , played it myself . who doesn ' t love the Joker ? When you see where the Scarecrow takes you it will blow your mind . Frankster says family fun for all !!
tokenising: I was looking for a cheap side table for the couch that wasn ' t a full size end table and this worked perfectly . It is exactly the same height as my IKEA couch and is sturdy especially because of the lower shelf . It has no drawers and the shelf cannot be moved so it may not meet everyone ' s expectations . The product does not look like cheap laminated wood so for the price I think it is a good deal .
tokenising: I use this battery in my Truck and have had no problems with it yet , I also use a small trickle charger [[ASIN : B004LX3AS6 NOCO Genius G750 6V/12V 750mA Fully Automatic Battery Charger and Maintainer ( Grey )]] though on all my vehicles and 

tokenising: This product arrived just in time when I was considering buying both games separately . Definitely worth your money . Comes with a great collection of games on both discs . A must-have in your game collection !
tokenising: I LIKE THIS BLU RAY VERY MUCH . I AM VERY HAPPY WITH IT , IT HAS MANY SPECIAL FEATURES ON IT THAT MAKE IT WORTH TO BUY . HOWEVER THE MOVIE ITSELF , HAPPENS TO BE ONE OF THE BEST ONES I HAVE EVER SEEN .
tokenising: I like the design of this necklace . Horse related designs are my favorite . A horseshoe design makes me feel like I ' m on the giant ' s shoulder . I got this from my roommate , it is very nice in the sun . The only thing is the length of the chain . It ' s not long enough .
tokenising: These tables are fantastic . Just the right height . Solid construction . Love these highly recommended . For the price you can ' t go wrong !
tokenising: My family owns an Apple TV for our communal living room TV and I recently decided I wanted a similar , more

tokenising: I gave this one to my father when he turned 50 . He loved it soo much that he used it on special occasions and didn ' t want to scratch it . I eventually convinced him to wear it daily ... now , after 4 years runs perfectly . Seiko is really a brand that delivers . I don ' t like too much the strap but can ' t really complain ...
tokenising: This big guy is truly a gem for the travelers . Amazingly crafted , this bag offers so much of space that you could literally fit a cow in it . I seem to fit everything I might need while I travel and still have room for more . The outer pockets are very handy and can also hold quite a few things . I ' ve taking this guy to many places and it was a wonderful experience to use it . This was my second buy from the great Ecocraftworld and they have yet again surprised me with their outstanding quality . Hats off to them…
tokenising: I really like the design , the mouse looks very neat . The pattern on the mouse  real stand out . The scroll

tokenising: Great tiny and cute laser projector made for me .<br />I use it every weekend when I am singing and dancing in my house and always show it off in front of my friends when they come to my house party .
tokenising: Great set of sticks . I always buy 5A size sticks because of the weight and length and these sticks are great . They have actually lasted longer , by that I mean not breaking , than I originally thought . I would definitely buy again !
tokenising: A very impressive little unit . Bought it for my sons birthday so he could play video games with his friends in the basement . Long story short ... TV is in the basement and the projector is upstairs now . I really was impressed with the picture quality and the 3D capability . Hilariously , I bought this to save some money rather than buying a new big TV but after playing with it for a while I feel this is really the next wave of home entertainment . I will be buying another one when the time comes . Thanks
tokenising: I 

tokenising: My grand daughter said her foot slipped around a little when she first wore them .  But , they were much more comfortable than her previous pair .  And she ' d still growing , so I hope they ' ll last for a while .
tokenising: I have these H4 bulbs in  both of my motorcycles . They are much brighter than stock , very white light not blue at all . would definitly recommend .
tokenising: This is an odd device . I can ' t figure out what the intended market is . It seems less useful than a Kindle Fire or an Android tablet , and there is no cost advantage . It does a number of things , but none of them really well . And it has some major flaws . The most obvious is that it has an attached stand that does not fold or adjust and is not removable . This makes it awkward to hold and to carry . It also has a battery life of only 90 minutes , so it works best as a stationary object plugged into an outlet , but then there is no advantage to it being small and light . I found it to be 

tokenising: Solid book .  Clear and appropriate fundamentals , and helpful detailed tools , to get your company focused on generating maximum return .  I read it before I had to and I couldn ' t be happier I did .  Well written .
tokenising: this is nice to have but I have already had to superglue it together at the center where all the slats connect so I was not real happy about that .
tokenising: This is such a nice rich fragrance that is no longer available in the brick and mortar stores . Thank God I can get it on Amazon .
tokenising: BAD BAD BAD PRODUCT , ONLY GOOD FOR 2 MONTHS AFTER THAT OUT . DO NOT FALL FOR LOW PRICE , THIS IS BAD PRODUCT . BUY THE ORIGINAL ONE AND WILL LAST YOU FOR 1 TO 2 YEARS .
tokenising: Have purchased watches on-line before but been disappointed . This one however is genuine and is a very classy watch . The watch face glass is curved which ( i think ) gives it a really classy look . Nice size too , very happy with my purchase . Recommend .
tokenising: Ori

tokenising: These stylus , s work great on my iPad Air - even with a protective sheet that I put on . I use it with Notes Plus and INKredible very well . The two lanyards are of perfect length and gets out of the way when I write . One improvement could be a way to store this stylus with a snap cover . Perhaps a metal sheath ( removable ) would allow the pen to be placed in the side when not in use ?<br /><br />I recommend this and will buy more !
tokenising: Amazing purchase . It looks exactly like the $200 ring my husband liked in the store and this is even a much better quality material . He loves how it looks , and I don ' t have to stress about him losing it ( bc it is very likely to happen ) He has bumped it a few times and it had yet to scratch .
tokenising: I ' m a book devourer .  ( is that a word ?  gee - i should know that !)  And i read many different types .<br /><br />Over my lifetime ( i ' m 53 ), i believe i ' ve read thousands .  I love books , and rarely hate one .  I

tokenising: As others have mentioned the author is quite candid throughout the book . Brutally honest in fact At the same time it is very depressing and difficult to read . Just another talent chewwed up by drugs and Hollywood . Would like to be optimistic about his future but very hard to be . The book ends pretty abrubtly
tokenising: Este libro aporta gran conocimiento bíblico , social e histórico que ayuda a desarrollar un estudio más profundo y a tener una mejor comprensión de estas dos epístolas , ¡Valiosisimo libro para impartir estudios bíblicos !
tokenising: My wife got me this for christmas and I ' m so happy with it . After initial setup the Xbox and Kinect are a great pair . I originally didn ' t want the Kinect but I ' ve found its pretty cool and probably will have some cool uses down the road . My Xbox did slow down and freeze during downloads so I suggest only downloading one game or thing at a time . I was doing six and it was clearly too much . That being said it runs 

tokenising: I love these boots , easy to put on , easy to remove , not too warm . They fit great ! I have problems with my feet , and shoes are always too tight .. but these are perfect !! LOve LOVE LOVE
tokenising: Its easy to overlook these sunglasses nowadays but hey , there ' s a reason everyone has them ! First off , I ordered these at 4 PM on Friday and they showed up at 11 AM Saturday morning ! Amazon is awesome , if its available through Amazon I choose to order through Amazon for easier returns , etc . and still only $83 + $3 shipping upgrade ! Way better than going into a sunglass store , but still Ray-Ban .<br />On the glasses themselves : They do fit a little snug , perhaps its the size I ordered but they don ' t squeeze my head or bother me at all so I ' m fine . They ' re heavy-weight , quality glasses . You can tell they ' re Ray-Ban and they ' re worth the money . Holding and wearing these makes you realize that cheap sunglasses don ' t always cut it , unless you ' re a

Training Classifier...
Training Classifier...
Training Classifier...
Training Classifier...
Training Classifier...
Training Classifier...
Training Classifier...
Training Classifier...
[[0.532585110362888, 0.5291666666666667, 0.5214946425092538, None], [0.5647797505447639, 0.5636904761904762, 0.5580041935858397, None], [0.5726487642528001, 0.5672619047619047, 0.5596446421884106, None], [0.5801433639176357, 0.5732142857142857, 0.567741131932461, None], [0.5295349314071781, 0.5291666666666667, 0.5249142719149964, None], [0.5283590502360318, 0.5273809523809524, 0.5232703755167859, None], [0.5663071108163635, 0.5625, 0.5580417374848873, None], [0.5693421637048861, 0.5666666666666667, 0.563045108753884, None], [0.5503723481743834, 0.5476190476190477, 0.5416961903736479, None], [0.5629458380574452, 0.5625, 0.5581166444000107, None]]


# Evaluate on test set

In [185]:
# Finally, check the accuracy of your classifier by training on all the tranin data
# and testing on the test set
# Will only work once all functions are complete
functions_complete = True# set to True once you're happy with your methods for cross val
if functions_complete:
    print(testData[0])   # have a look at the first test data instance
    classifier = trainClassifier(trainData)  # train the classifier
    testTrue = [t[1] for t in testData]   # get the ground-truth labels from the data
    testPred = predictLabels(testData, classifier)  # classify the test data to get predicted labels
    finalScores = precision_recall_fscore_support(testTrue, testPred, average='weighted') # evaluate
    print("Done training!")
    print("Precision: %f\nRecall: %f\nF Score:%f" % finalScores[:3])

({9: 0.04}, '__label1__')
Training Classifier...
Done training!
Precision: 0.547544
Recall: 0.545714
F Score:0.541300


# Optimization
improve preProcess, parseReview, split_data

In [211]:
#Question 4
# we will improve pre processing by using lemmatisation and stemming 

from nltk.stem.porter import PorterStemmer #import stemming methodology
from nltk.stem import WordNetLemmatizer #import lemmatizer

def preProcess_improve(text, rating, verifyPurchase, prod_ID): # define preProcess_improve before anlyse the data by using tokenisation, 
    #normalisation, lemmatisation, and stemming
    
     # word tokenisation
    text = re.sub(r"(\w)([.,;:!?'\"”\)])", r"\1 \2", text) # this process will tokenise or split punctuation in sentence.
    text = re.sub(r"([.,;:!?'\"“\(])(\w)", r"\1 \2", text) # this process will tokenise or split punctuation in sentence.
    print("tokenising:", text) # test the code by printing
    tokens = re.split(r"\s+",text) # store the result from tokenisation process in "tokens"
    # normalisation
    text = re.sub(r"(\S)\1\1+",r"\1\1\1", text) 
    tokens = [t.lower() for t in tokens] # this process will normalise the sentence from tokenisation process in order to separate in word by word
    
    stemmer = PorterStemmer() # using stemming process and store in stemmer
    lemmatizer = WordNetLemmatizer() # using lemmatisation process and store in lemmatizer
    new_tokens = stemmer.stem(lemmatizer.lemmatize(tokens)) # generate new_tokens by using stemming after lemmatisation
    
    
    return (new_tokens,rating, verifyPurchase, prod_ID) # return the parameter



In [218]:
#Question5
# we will pick rating, VERIFIED_PURCHASE and PRODUCT_ID to increase efficiency because these parameters have involvement among the reviewText 
# so these variables would made more precision in the model

def parseReview_improve(reviewLine):
        Id = reviewLine[0] # Id should equal the first colum of reviewLine 
        Text = reviewLine[8] # Text should equal the last colum of reviewLine 
        Label = reviewLine[1] # Label should equal the second colum of reviewLine 
        rating = reviewLine[2] # rating should equal the third colum of reviewLine
        verifyPurchase = reviewLine[3] # product category should equal the forth colum of reviewLine that is VERIFIED_PURCHASE
        prod_ID = reviewLine[5] # product title should equal the sixth colum of reviewLine that is PRODUCT_ID
        return (Id, Text, Label, rating, verifyPurchase, prod_ID) # return the value


def loadData_improve(path, Text=None): # define loadData_improve for adding rating, verifyPurchase, prod_ID in the data
    with open(path,encoding = 'utf8') as f:
        reader = csv.reader(f, delimiter='\t')
        for line in reader:
            if line[0] == "DOC_ID":  # skip the header
                 continue
            (Id, Text, Label, rating, verifyPurchase, prod_ID) = parseReview_improve(line) 
            rawData.append((Id, Text, Label,  rating, verifyPurchase, prod_ID)) # add these parameters into rawData
                     
def splitData_improve(percentage): # define splitData_improve for improving splitData function by adding rating, verifyPurchase and prod_ID into the train and test data
    
      dataSamples = len(rawData)
      halfOfData = int(len(rawData)/2)
      trainingSamples = int((percentage*dataSamples)/2)
      for (_,Text, rating,verifyPurchase, prod_ID, Label) in rawData[:trainingSamples] + rawData[halfOfData:halfOfData+trainingSamples]:
          trainData.append((toFeatureVector(preProcess_improve(Text, rating, verifyPurchase, prod_ID)),Label)) #adding train dataset which include Text, rating,
                                                                                                                                                           #verify_Purchase, prod_ID
      for (_,Text, rating, verifyPurchase, prod_ID, Label) in rawData[trainingSamples:halfOfData] + rawData[halfOfData+trainingSamples:]:
          testData.append((toFeatureVector(preProcess_improve(Text, rating, verifyPurchase, prod_ID)),Label)) # adding test dataset which include Text, rating,
                                                                                                                                                          # verify_Purchase, prod_ID
        
reviewPath = 'amazon_reviews.txt'
        
loadData_improve(reviewPath) #load data

splitData_improve(0.8) # split the data in 80% training 




crossValidate(trainData, 10) # run the cross validation

[[0.532585110362888, 0.5291666666666667, 0.5214946425092538, None], [0.5647797505447639, 0.5636904761904762, 0.5580041935858397, None], [0.5726487642528001, 0.5672619047619047, 0.5596446421884106, None], [0.5801433639176357, 0.5732142857142857, 0.567741131932461, None], [0.5295349314071781, 0.5291666666666667, 0.5249142719149964, None], [0.5283590502360318, 0.5273809523809524, 0.5232703755167859, None], [0.5663071108163635, 0.5625, 0.5580417374848873, None], [0.5693421637048861, 0.5666666666666667, 0.563045108753884, None], [0.5503723481743834, 0.5476190476190477, 0.5416961903736479, None], [0.5629458380574452, 0.5625, 0.5581166444000107, None]]
