# Ratting prediction using SVM and Embeddings

In [22]:
import pandas as pd
import sklearn
import spacy

## Importing Train and Validation data

In [7]:
trainDataSet = pd.read_csv('../Kaggle-dataset/pre-processed/trainDataset.csv',encoding="latin1")
valDataSet = pd.read_csv('../Kaggle-dataset/pre-processed/valDataset.csv',encoding="latin1")
testDataSet = pd.read_csv('../Kaggle-dataset/pre-processed/testDataset.csv',encoding="latin1")


print(trainDataSet)

       Unnamed: 0  rating                                           textFull
0           66285       3  proprietary charging u outlet opened box saw p...
1           34581       2  wanted love phone maybe mine lemon switched ph...
2           45104       1  must got lucky unlucky update original phone i...
3           13902       1  phone bought phone month ago really hate touch...
4            9448       5                        great service great product
...           ...     ...                                                ...
54383       13065       1  started issue phone getting worse irritating c...
54384       58857       5  perfect known wa reconditioned look act perfec...
54385        9770       1  sell item outside united state sold phone use ...
54386        5673       5                                     five star love
54387       23391       5       five star phone excellent condition unlocked

[54388 rows x 3 columns]


In [24]:
def trainModel(X_train, Y_train):
    embedding = spacy.load("en_core_web_sm")
    svm = sklearn.svm.SVC(verbose=True)

    print("Getting embeddings")
    X_train_emb = [embedding(x).vector for x in X_train]
    
    print("Training SVM")
    svm.fit(X_train_emb, Y_train)

    return embedding, svm


embedding, svm = trainModel(trainDataSet["textFull"], trainDataSet["rating"])

Getting embeddings
Training SVM
[LibSVM]................*............*
optimization finished, #iter = 28530
obj = -6070.291640, rho = -0.859662
nSV = 7584, nBSV = 5297
.......*...*
optimization finished, #iter = 10621
obj = -6861.650189, rho = -0.876081
nSV = 7782, nBSV = 6780
.......*..*.*
optimization finished, #iter = 10266
obj = -8245.153974, rho = -0.596754
nSV = 9395, nBSV = 8628
.............*...*
optimization finished, #iter = 16813
obj = -13638.570894, rho = 0.333751
nSV = 15733, nBSV = 14172
....*.*
optimization finished, #iter = 5156
obj = -5459.224633, rho = 0.451285
nSV = 5967, nBSV = 5569
.....*.*
optimization finished, #iter = 6158
obj = -5466.965406, rho = 0.428156
nSV = 6156, nBSV = 5556
..............*........*
optimization finished, #iter = 22772
obj = -6085.166978, rho = 0.905191
nSV = 7760, nBSV = 5472
.......*..*
optimization finished, #iter = 9109
obj = -7027.356940, rho = 0.233012
nSV = 7828, nBSV = 7039
........................*.....................*
optimizati

## Save Model

In [25]:
import pickle
with open('embedding-svm/embedding_model.pkl','wb') as f:
    pickle.dump(embedding,f)
with open('embedding-svm/svm_model.pkl','wb') as f:
    pickle.dump(svm,f)

# Scores

In [26]:
def predict(embedding, svm, X):
    X_embedding = [embedding(x).vector for x in X]
    return svm.predict(X_embedding)

Y_val = valDataSet["rating"]
Y_val_pred = predict(embedding, svm, valDataSet["textFull"])
Y_test = testDataSet["rating"]
Y_test_pred = predict(embedding, svm, testDataSet["textFull"])


In [27]:
acc_val = sklearn.metrics.accuracy_score(Y_val, Y_val_pred)
f1_val = sklearn.metrics.f1_score(Y_val, Y_val_pred, average=None)
acc_test = sklearn.metrics.accuracy_score(Y_test, Y_test_pred)
f1_test = sklearn.metrics.f1_score(Y_test, Y_test_pred, average=None)

print("Val")
print("\tAcuracia: ", acc_val)
print("\tF1: ", f1_val)

print("Test")
print("\tAcuracia: ", acc_test)
print("\tF1: ", f1_test)

Val
	Acuracia:  0.6518754596714881
	F1:  [0.59674389 0.04761905 0.03826087 0.13442325 0.78011332]
Test
	Acuracia:  0.659375
	F1:  [0.5943304  0.02083333 0.03743316 0.15555556 0.78945939]


In [31]:
example = 33
print(testDataSet.iloc[example]["textFull"])
print( "Original: ", Y_test[example])
print("Predict: ", Y_test_pred[example])

please read amazing phone wish couldve used really research phone wa excited purchase made amazon unfortunately next day received phone thats nightmare started phone looked brand new sealed packaging like would purchase store example literally use scissors cut packing even get provided book cut complete different package even touch phone went proper procedure activating phone purchasing bundle plan use phone wa completely done programming customer service rep tell make call error message stated phone authenticated happened february 27th today march 3 2014 learned return phone amazon tthis review customer beware purpose amazon verizon wireless excellent customer service tried hard fix issue crazy part learned tonight phone purchased actually hacked never able use granted wa upset almost week late night verizon rep wa getting old trying million thing repeatedly try fix phone tonight verizon rep told straight maam need return phone somebody ha hacked phone resold whatever site bought wa s