In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer , LancasterStemmer
from nltk.corpus import stopwords
from langdetect import detect
from googletrans import Translator
from rake_nltk import Rake
import yake
import contractions
from unidecode import unidecode
from string import punctuation
from nltk.util import ngrams
from collections import Counter
from wordcloud import WordCloud
from autocorrect import Speller
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.model_selection import train_test_split,RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB,BernoulliNB,GaussianNB
from sklearn.metrics import recall_score,precision_score,accuracy_score,confusion_matrix


In [2]:
df = pd.read_table('Restaurant_Reviews.tsv')
df

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1
...,...,...
995,I think food should have flavor and texture an...,0
996,Appetite instantly gone.,0
997,Overall I was not impressed and would not go b...,0
998,"The whole experience was underwhelming, and I ...",0


In [3]:
## detect language

def lang_detect(data):
    lang = detect(data)
    return lang

In [4]:
df['language'] = df['Review'].apply(lang_detect)

In [5]:
## Translate

def translator(data):
    trans = Translator()
    t = trans.translate(data)
    return t.text

In [6]:
translator(df['Review'][0])

'Wow... Loved this place.'

In [None]:
df['translated_text'] = df['Review'].apply(translator)

In [None]:
stop_list = stopwords.words('english')
stop_list.remove('no')
stop_list.remove('nor')
stop_list.remove('not')
def clean_data(data):
    accent = unidecode(data)
    expand_data = contractions.fix(data)
    clean_text = [word.lower() for word in word_tokenize(expand_data) if (word not in punctuation) 
                  and (word not in stop_list) and (word.isalpha() and (len(word)>2))] 
    
    return clean_text

In [None]:
clean_data(df['Review'][0])

In [None]:
df['clean_data'] = df['Review'].apply(clean_data)

In [None]:
contractions.fix(df['Review'][0])

In [None]:
df

In [None]:
## Ngrams

def n_grams(data,n_grams):
    grams = ngrams(data,n_grams) 
    gram_list = []
    for gram in grams:
        gram_list.append(' '.join(gram))
    return gram_list

In [None]:
n_grams(df['clean_data'][0],1)

In [None]:
unigram = df['clean_data'].apply(lambda x : n_grams(x,1))
unigram_list = []
for uni in unigram :
    unigram_list.extend(uni)
cnt = Counter(unigram_list).most_common(100)
cnt

In [None]:
## bigram

bigram = df['clean_data'].apply(lambda x : n_grams(x,2))
bigram
bigram_list = []
for bi in bigram :
    bigram_list.extend(bi)
cnt = Counter(bigram_list).most_common(100)
cnt

In [None]:
## Trigram

trigram = df['clean_data'].apply(lambda x : n_grams(x,3))
trigram_list = []
for tri in trigram :
    trigram_list.extend(tri)
cnt = Counter(trigram_list).most_common(25)
cnt

In [None]:
quadragram = df['clean_data'].apply(lambda x : n_grams(x,4))
quadragram_list = []

for quadra in quadragram:
    quadragram_list.extend(quadra)
cnt = Counter(quadragram_list).most_common(25)
cnt

## WORD CLOUD

In [None]:
def word_cloud(data,column):
    string = ' '.join(data[column])+' '
    cloud = WordCloud(width = 800,height = 800,background_color = 'white',min_font_size = 10).generate(string)
    plt.figure(figsize = (8,8),facecolor = None)
    plt.imshow(cloud)
    
    plt.axis('off')
    plt.show()

In [None]:
import matplotlib.pyplot as plt


In [None]:
string = ' '.join(df['Review'])+' '
cloud = WordCloud(background_color = 'white').generate(string)
plt.figure(figsize = (8,8),facecolor = None)
plt.imshow(cloud)
    
plt.axis('off')
plt.show()

In [None]:
word_cloud(df,'Review')

## Yake Key phrase extraction

In [None]:
def yake_extractor(data):
    keyword_extractor = yake.KeywordExtractor()
    global keywords
    keywords = keyword_extractor.extract_keywords(data)
    keyword_list = []
    for kw in keywords:
        keyword_list.append(kw[0])
    return keyword_list

In [None]:
df['Review'].apply(yake_extractor)

In [None]:
## all keywords 
all_keywords = []
for kw in keywords :
    all_keywords.append(kw)
print(all_keywords)

## Rake

In [None]:
def rake_extractor(data):
    keyword_extractor = Rake()
    keyword_extractor.extract_keywords_from_text(data)
    return keyword_extractor.get_ranked_phrases()

df['Review'].apply(rake_extractor)

## Unlabelled Labelled Data

In [None]:
# preprocessing
# 1.remove spaces,newlines
def remove_spaces(data):
    clean_text = data.replace('\\n',' ').replace('\t',' ').replace('\\',' ')
    return clean_text

# 2.contraction mapping
def expand_text(data):
    expanded_text = contractions.fix(data)
    return expanded_text

# 3.Handling accented characters
def handling_accented(data):
    fixed_text = unidecode(data)
    return fixed_text

# 4.Cleaning
stopword_list = stopwords.words('english')
stopword_list.remove('no')
stopword_list.remove('nor')
stopword_list.remove('not')

def clean_data(data):
    tokens = word_tokenize(data)
    clean_text = [word.lower() for word in tokens if (word not in punctuation) and (word.lower() not in stopword_list) and (len(word)>2) and (word.isalpha())]
    return clean_text

# autocorrection
def autocorrection(data):
    spell = Speller(lang='en')
    corrected_text = spell(data)
    return corrected_text

# lemmatization
def lemmatization(data):
    lemmatizer = WordNetLemmatizer()
    final_data=[]
    for word in data:
        lemmatized_word = lemmatizer.lemmatize(word)
        final_data.append(lemmatized_word)
    return ' '.join(final_data)

In [None]:
clean_text_train = df['Review'].apply(remove_spaces)
clean_text_train = clean_text_train.apply(expand_text)
clean_text_train = clean_text_train.apply(handling_accented)
clean_text_train = clean_text_train.apply(clean_data)
#clean_text_train = clean_text_train.apply(autocorrection)
clean_text_train = clean_text_train.apply(lemmatization)

In [None]:
#clean_text_train = clean_text_train.apply(autocorrection)

## CountVectorizer

In [None]:
cnv = CountVectorizer()
bow = cnv.fit_transform(clean_text_train)
bow

In [None]:
cnv.get_feature_names_out()

In [None]:
cnv_df = pd.DataFrame(bow.A,columns = cnv.get_feature_names_out())
cnv_df

In [None]:
tfidf = TfidfVectorizer()
tfidf1= tfidf.fit_transform(clean_text_train)
tfidf1

In [None]:
tfidf.get_feature_names_out()

In [None]:
tfidf_df = pd.DataFrame(tfidf1.A,columns = tfidf.get_feature_names_out())
tfidf_df

## Word2Vec

In [None]:
from gensim.models import Word2Vec

In [None]:
word2vec = Word2Vec(df['clean_data'].tolist(),min_count = 2,window = 3)

In [None]:
word2vec.save('word2vec.model')

In [None]:
word2vec.vector_size

In [None]:
def vectorizer(list_of_docs,model):
    feature = []
    for rew in list_of_docs :
        zero_vector = np.zeros(model.vector_size)
        vectors = []
        for word in rew :
            try :
                word in model.wv 
                vectors.append(model.wv[word])

            except KeyError:
                continue
        if vectors :
            vectors = np.asarray(vectors)
            avg_vec = vectors.mean(axis = 0)
            feature.append(avg_vec)
        else :
            feature.append(zero_vector)
            
    return feature

In [None]:
vectorized_docs = vectorizer(df['clean_data'].tolist(),word2vec)
vectorized_docs[0]

In [None]:
type(vectorized_docs)

In [None]:
x_emb = np.array(vectorized_docs)
x_emb

## Build Kmeans

In [None]:
from sklearn.cluster import KMeans

In [None]:
def kmeans_build(cluster,data):
    km = KMeans(n_clusters = cluster)
    y_pred = km.fit_predict(data)
    return km,y_pred

In [None]:
## Word2Vec Clusters
model,w2v_cluster = kmeans_build(2,x_emb)
w2v_cluster

In [None]:
from sklearn.cluster import KMeans

In [None]:
## Countvectorizer

model2,bow_cluster = kmeans_build(2,bow)

In [None]:
## tfidf model clusters

model3,tfidf_cluster = kmeans_build(2,tfidf1)

In [None]:
df['clean_data_text'] = df['clean_data'].apply(lambda x : ' '.join(x))

In [None]:
df['clean_data_text']

## Silhoutte score

In [None]:
from yellowbrick.cluster import SilhouetteVisualizer
from sklearn.metrics import silhouette_score 

In [None]:
print(f"Cluster of Word2Vec = {silhouette_score(vectorized_docs,w2v_cluster)}")
print(f"Cluster of countvectorizer = {silhouette_score(bow,bow_cluster)}")
print(f"Cluster of Word2Vec = {silhouette_score(tfidf1,tfidf_cluster)}")

## silhouette_visualizer

In [None]:
def visualizer(model,data):
    visualize =SilhouetteVisualizer(model,colors='yellowbrick')
    visualize.fit(data)
    visualize.show()

In [None]:
## bow
visualizer(model2,bow)

In [None]:
## tfidf

visualizer(model3,tfidf_df)

In [None]:
## Word2Vec

visualizer(model,x_emb)

In [None]:
np.array(vectorized_docs).tolist()

In [None]:
x = pd.DataFrame(np.array(vectorized_docs).tolist())

In [None]:
y = w2v_cluster


## TrainTestSplit

In [None]:
from sklearn.naive_bayes import BernoulliNB

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,stratify = y,test_size = 0.3,random_state = 15 )
x_train.shape,x_test.shape,y_train.shape,y_test.shape

In [None]:
pd.Series(y).value_counts()

## BernaulliNB

In [None]:
bnb = BernoulliNB()
bnb.fit(x_train,y_train)

In [None]:
def model_eval(x,y,model):
    y_pred = model.predict(x)
    cm = confusion_matrix(y,y_pred)
    print(F"confusion matrix = \n{cm}")
    print('*'*30)
    
    acc = accuracy_score(y,y_pred)
    print(f"Accuracy Score = {acc}")
    print('*'*30)
    
    recall = recall_score(y,y_pred,average='micro')
    print(F"Recall Score = {recall}")
    print('*'*30)
    
    precision = precision_score(y,y_pred,average='weighted')
    print(f"Precision Score = {precision}")
    return

In [None]:
## Training

model_eval(x_train,y_train,bnb)

In [None]:
## Testing

model_eval(x_test,y_test,bnb)

## GaussianNB

In [None]:
gnb = GaussianNB()
gnb.fit(x_train,y_train)

In [None]:
## Training

model_eval(x_train,y_train,gnb)

In [None]:
## Testing

model_eval(x_test,y_test,gnb)

## Logistic Regression

In [165]:
lr = LogisticRegression()
lr.fit(x_train,y_train)

LogisticRegression()

In [166]:
## Training

model_eval(x_train,y_train,lr)

confusion matrix = 
[[  0 167]
 [  0 533]]
******************************
Accuracy Score = 0.7614285714285715
******************************
Recall Score = 0.7614285714285715
******************************
Precision Score = 0.5797734693877551


In [167]:
## Testing

model_eval(x_test,y_test,lr)

confusion matrix = 
[[  0  72]
 [  0 228]]
******************************
Accuracy Score = 0.76
******************************
Recall Score = 0.76
******************************
Precision Score = 0.5776


## KNN

In [168]:
from sklearn.preprocessing import MinMaxScaler

In [169]:
scaler = MinMaxScaler()
x_train_df = scaler.fit_transform(x_train)
x_test_df = scaler.transform(x_test)

In [170]:
knn = KNeighborsClassifier()
knn.fit(x_train_df,y_train)

KNeighborsClassifier()

In [171]:
## Training

model_eval(x_train_df,y_train,knn)

confusion matrix = 
[[137  30]
 [  4 529]]
******************************
Accuracy Score = 0.9514285714285714
******************************
Recall Score = 0.9514285714285714
******************************
Precision Score = 0.9523682288353242


In [172]:
y_test.shape

(300,)

In [173]:
x_test_df.shape

(300, 100)

In [174]:
## Testing

model_eval(x_test_df,y_test,knn)

confusion matrix = 
[[ 46  26]
 [  6 222]]
******************************
Accuracy Score = 0.8933333333333333
******************************
Recall Score = 0.8933333333333333
******************************
Precision Score = 0.8926302729528537


## Hyper parameter tunning

In [175]:
knn1 = KNeighborsClassifier()
hyp = {'n_neighbors':np.arange(3,20,2),'p':[1,2]}
rscv = RandomizedSearchCV(knn1,hyp)
rscv.fit(x_train_df,y_train)

RandomizedSearchCV(estimator=KNeighborsClassifier(),
                   param_distributions={'n_neighbors': array([ 3,  5,  7,  9, 11, 13, 15, 17, 19]),
                                        'p': [1, 2]})

In [176]:
rscv.best_estimator_

KNeighborsClassifier(n_neighbors=7)

In [177]:
knn1 = rscv.best_estimator_
knn1.fit(x_train_df,y_train)

KNeighborsClassifier(n_neighbors=7)

In [178]:
## Training

model_eval(x_train_df,y_train,knn1)

confusion matrix = 
[[130  37]
 [  3 530]]
******************************
Accuracy Score = 0.9428571428571428
******************************
Recall Score = 0.9428571428571428
******************************
Precision Score = 0.9449311108458978


In [179]:
## Testing

model_eval(x_test_df,y_test,knn1)

confusion matrix = 
[[ 46  26]
 [  3 225]]
******************************
Accuracy Score = 0.9033333333333333
******************************
Recall Score = 0.9033333333333333
******************************
Precision Score = 0.906581022847386


## Decision Tree

In [180]:
dt = DecisionTreeClassifier(random_state = 15)
dt.fit(x_train,y_train)

DecisionTreeClassifier(random_state=15)

In [181]:
## tarining

model_eval(x_train,y_train,dt)

confusion matrix = 
[[167   0]
 [  0 533]]
******************************
Accuracy Score = 1.0
******************************
Recall Score = 1.0
******************************
Precision Score = 1.0


In [182]:
## Testing

model_eval(x_test,y_test,dt)

confusion matrix = 
[[ 53  19]
 [ 22 206]]
******************************
Accuracy Score = 0.8633333333333333
******************************
Recall Score = 0.8633333333333333
******************************
Precision Score = 0.8654222222222222


## Hyperparameter Tunning

In [183]:
dt1 = DecisionTreeClassifier(random_state = 15)
hyp = {'criterion':['gini','entropy'],
      'max_depth':np.arange(3,8),
      'min_samples_leaf':np.arange(3,8),
      'min_samples_split':np.arange(3,8)}
rscv = RandomizedSearchCV(dt1,hyp)
rscv.fit(x_train,y_train)

RandomizedSearchCV(estimator=DecisionTreeClassifier(random_state=15),
                   param_distributions={'criterion': ['gini', 'entropy'],
                                        'max_depth': array([3, 4, 5, 6, 7]),
                                        'min_samples_leaf': array([3, 4, 5, 6, 7]),
                                        'min_samples_split': array([3, 4, 5, 6, 7])})

In [184]:
rscv.best_estimator_

DecisionTreeClassifier(criterion='entropy', max_depth=4, min_samples_leaf=6,
                       min_samples_split=7, random_state=15)

In [185]:
dt1 = rscv.best_estimator_
dt1.fit(x_train,y_train)

DecisionTreeClassifier(criterion='entropy', max_depth=4, min_samples_leaf=6,
                       min_samples_split=7, random_state=15)

In [186]:
## Training

model_eval(x_train,y_train,dt1)

confusion matrix = 
[[138  29]
 [  9 524]]
******************************
Accuracy Score = 0.9457142857142857
******************************
Recall Score = 0.9457142857142857
******************************
Precision Score = 0.9454633354245857


In [187]:
## Testing

model_eval(x_test,y_test,dt1)

confusion matrix = 
[[ 38  34]
 [ 17 211]]
******************************
Accuracy Score = 0.83
******************************
Recall Score = 0.83
******************************
Precision Score = 0.8203487940630797


## Random Forest

In [188]:
rf = RandomForestClassifier(random_state = 15)
rf.fit(x_train,y_train)

RandomForestClassifier(random_state=15)

In [189]:
## Training

model_eval(x_train,y_train,rf)

confusion matrix = 
[[167   0]
 [  0 533]]
******************************
Accuracy Score = 1.0
******************************
Recall Score = 1.0
******************************
Precision Score = 1.0


In [190]:
## testing

model_eval(x_test,y_test,rf)

confusion matrix = 
[[ 50  22]
 [  2 226]]
******************************
Accuracy Score = 0.92
******************************
Recall Score = 0.92
******************************
Precision Score = 0.9233498759305212


## Hyperparaeter Tunning

In [191]:
rf1 = RandomForestClassifier(random_state = 15)
hyp = {'criterion':['gini','entropy'],
      'n_estimators':np.arange(50,110,10),
      'max_depth':np.arange(3,8),
      'min_samples_split':np.arange(3,8),
      'min_samples_leaf':np.arange(3,8)}
rscv = RandomizedSearchCV(rf1,hyp)
rscv.fit(x_train,y_train)

RandomizedSearchCV(estimator=RandomForestClassifier(random_state=15),
                   param_distributions={'criterion': ['gini', 'entropy'],
                                        'max_depth': array([3, 4, 5, 6, 7]),
                                        'min_samples_leaf': array([3, 4, 5, 6, 7]),
                                        'min_samples_split': array([3, 4, 5, 6, 7]),
                                        'n_estimators': array([ 50,  60,  70,  80,  90, 100])})

In [192]:
rscv.best_estimator_

RandomForestClassifier(criterion='entropy', max_depth=7, min_samples_leaf=4,
                       min_samples_split=7, n_estimators=80, random_state=15)

In [193]:
rf2 = rscv.best_estimator_
rf2.fit(x_train,y_train)

RandomForestClassifier(criterion='entropy', max_depth=7, min_samples_leaf=4,
                       min_samples_split=7, n_estimators=80, random_state=15)

In [194]:
## Training

model_eval(x_train,y_train,rf2)

confusion matrix = 
[[167   0]
 [  0 533]]
******************************
Accuracy Score = 1.0
******************************
Recall Score = 1.0
******************************
Precision Score = 1.0


In [195]:
model_eval(x_test,y_test,rf2)

confusion matrix = 
[[ 52  20]
 [  0 228]]
******************************
Accuracy Score = 0.9333333333333333
******************************
Recall Score = 0.9333333333333333
******************************
Precision Score = 0.9387096774193548


## Ada Boost Classifier

In [196]:
ada = AdaBoostClassifier(random_state = 15)
ada.fit(x_train,y_train)

AdaBoostClassifier(random_state=15)

In [197]:
## Training

model_eval(x_train,y_train,ada)

confusion matrix = 
[[167   0]
 [  0 533]]
******************************
Accuracy Score = 1.0
******************************
Recall Score = 1.0
******************************
Precision Score = 1.0


In [198]:
## Testing

model_eval(x_test,y_test,ada)

confusion matrix = 
[[ 57  15]
 [  3 225]]
******************************
Accuracy Score = 0.94
******************************
Recall Score = 0.94
******************************
Precision Score = 0.9404999999999999


## Hyper parameter tunning

In [199]:
ada1 = AdaBoostClassifier(random_state = 15)
hyp = {'n_estimators':np.arange(10,60,10),
      'learning_rate':np.arange(0.01,1,0.1)}
rscv = RandomizedSearchCV(ada1,hyp)
rscv.fit(x_train,y_train)

RandomizedSearchCV(estimator=AdaBoostClassifier(random_state=15),
                   param_distributions={'learning_rate': array([0.01, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91]),
                                        'n_estimators': array([10, 20, 30, 40, 50])})

In [200]:
rscv.best_estimator_

AdaBoostClassifier(learning_rate=0.6100000000000001, random_state=15)

In [201]:
ada1 = rscv.best_estimator_
ada1.fit(x_train,y_train)

AdaBoostClassifier(learning_rate=0.6100000000000001, random_state=15)

In [202]:
## Training

model_eval(x_train,y_train,ada1)

confusion matrix = 
[[167   0]
 [  0 533]]
******************************
Accuracy Score = 1.0
******************************
Recall Score = 1.0
******************************
Precision Score = 1.0


In [203]:
## testing

model_eval(x_test,y_test,ada1)

confusion matrix = 
[[ 59  13]
 [  1 227]]
******************************
Accuracy Score = 0.9533333333333334
******************************
Recall Score = 0.9533333333333334
******************************
Precision Score = 0.9548333333333333


## SVM = SVC

In [204]:
svm = SVC()
svm.fit(x_train,y_train)

SVC()

In [205]:
## training

model_eval(x_train,y_train,svm)

confusion matrix = 
[[164   3]
 [  0 533]]
******************************
Accuracy Score = 0.9957142857142857
******************************
Recall Score = 0.9957142857142857
******************************
Precision Score = 0.9957382729211087


In [206]:
## Testing

model_eval(x_test,y_test,svm)

confusion matrix = 
[[ 61  11]
 [  1 227]]
******************************
Accuracy Score = 0.96
******************************
Recall Score = 0.96
******************************
Precision Score = 0.9610029818378966


## User defined function

In [207]:
import pickle

In [208]:
word2vec.save('restau.model')

with open('resau_svm_model.pkl','wb') as file:
    pickle.dump(svm,file)

In [209]:
# preprocessing
# 1.remove spaces,newlines
def remove_spaces(data):
    clean_text = data.replace('\\n',' ').replace('\t',' ').replace('\\',' ')
    return clean_text

# 2.contraction mapping
def expand_text(data):
    expanded_text = contractions.fix(data)
    return expanded_text

# 3.Handling accented characters
def handling_accented(data):
    fixed_text = unidecode(data)
    return fixed_text

# 4.Cleaning
stopword_list = stopwords.words('english')
stopword_list.remove('no')
stopword_list.remove('nor')
stopword_list.remove('not')

def clean_data(data):
    tokens = word_tokenize(data)
    clean_text = [word.lower() for word in tokens if (word not in punctuation) and (word.lower() not in stopword_list) and (len(word)>2) and (word.isalpha())]
    return clean_text

# autocorrection
def autocorrection(data):
    spell = Speller(lang='en')
    corrected_text = spell(data)
    return corrected_text

# lemmatization
def lemmatization(data):
    lemmatizer = WordNetLemmatizer()
    final_data=[]
    for word in data:
        lemmatized_word = lemmatizer.lemmatize(word)
        final_data.append(lemmatized_word)
    return ' '.join(final_data)

In [451]:
data ='Wow... Loved this place.'
#data = df['clean_data'][0]
data

'Wow... Loved this place.'

In [452]:
user_data = remove_spaces(data)
user_data = expand_text(user_data)
user_data = handling_accented(user_data)
user_data = clean_data(user_data)
user_data = autocorrection(' '.join(user_data))
user_data = lemmatization(user_data.split())
user_data

'wow loved place'

In [453]:
model1 = Word2Vec.load('restau.model',user_data)
model1


<gensim.models.word2vec.Word2Vec at 0x23eb1f9ed90>

In [454]:
def vectorizer(list_of_docs,model):
    feature = []
    for rew in list_of_docs :
        zero_vector = np.zeros(model.vector_size)
        vectors = []
        for word in rew :
            try :
                word in model.wv 
                vectors.append(model.wv[word])

            except KeyError:
                continue
        if vectors :
            vectors = np.asarray(vectors)
            avg_vec = vectors.mean(axis = 0)
            feature.append(avg_vec)
        else :
            feature.append(zero_vector)
            
    return feature

In [455]:
user = pd.Series([user_data.split()])
user.tolist()

[['wow', 'loved', 'place']]

In [456]:
user_data1 = np.array(vectorizer(user.tolist(),model1))
user_data1[:,:10]

array([[-0.00237288,  0.0081949 , -0.00066017,  0.00478384, -0.0011599 ,
        -0.00372177,  0.00072419,  0.00753723,  0.00209678, -0.0083062 ]],
      dtype=float32)

In [457]:
svm.predict(user_data1)

array([1])