In [1]:
# First up, I'll import every library that will be used in this project is imported at the start.

# Data handling and processing
import pandas as pd
import numpy as np

# Data visualisation
import matplotlib.pyplot as plt
import seaborn as sns

# Statistics
from scipy import stats
import statsmodels.api as sm
from scipy.stats import randint as sp_randint
from time import time

# NLP
import nltk
nltk.download('wordnet')
import re
from textblob import TextBlob
from nltk.corpus import stopwords
from sklearn.feature_extraction import text
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report

  from pandas.core import datetools


[nltk_data] Error loading wordnet: <urlopen error [Errno -2] Name or
[nltk_data]     service not known>


  from numpy.core.umath_tests import inner1d


In [2]:
# Reading in data
data = pd.read_csv('../input/nycomments/NYcomments.csv',sep=";",encoding="utf-8")
data = data[['Hotel Names','Sentiment','Comments']]
data.Sentiment=data.Sentiment.astype(str)
data.columns = ['Hotel Names', 'Sentiment', 'Comments']

In [3]:
# Inspecting the variables
data.info()
data

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245545 entries, 0 to 245544
Data columns (total 3 columns):
Hotel Names    245545 non-null object
Sentiment      245545 non-null object
Comments       245456 non-null object
dtypes: object(3)
memory usage: 5.6+ MB


Unnamed: 0,Hotel Names,Sentiment,Comments
0,Hyatt Place New York City/Times Square,1,perfect location! Very friendly staff that hel...
1,Hyatt Place New York City/Times Square,1,Clean and comfortable rooms
2,Hyatt Place New York City/Times Square,0,The best place to be
3,Hyatt Place New York City/Times Square,1,"excellent location, very comfortable rooms and..."
4,Hyatt Place New York City/Times Square,1,Service
5,Hyatt Place New York City/Times Square,1,I got to the hotel 1 hour and 30 minutes early...
6,Hyatt Place New York City/Times Square,1,Esta muy cerca del Time Square. Las habitacion...
7,Hyatt Place New York City/Times Square,1,Sad I could not eat most things. I have a dair...
8,Hyatt Place New York City/Times Square,1,"Location was great, beds were super comfy. Bat..."
9,Hyatt Place New York City/Times Square,1,Great location! Great customer service


In [4]:
# Replacing blank variables with 'unknown' ready for processing
data['Comments'].fillna('unknown', inplace=True)

## 1. Text Preprocessing

Text is the most unstructured form of all the available data, therefore various types of noise are present in it. This means that the data is not readily analysable without any pre-processing. The entire process of cleaning and standardization of text, making it noise-free and ready for analysis is known as text preprocessing. This usually comprises two key steps:
<br>
1. Noise Removal
2. Lexicon Normalisation

### Noise Removal

Any piece of text which is not relevant to the context of the data and the end-output can be specified as the noise.

For example – language stopwords (commonly used words of a language – is, am, the, of, in etc), URLs or links, punctuations and industry specific words. This step deals with removal of all types of noisy entities present in the text.

Following is a python function to strip out noise throughout the reviews:

In [5]:
# Importing SKLearn's list of stopwords and then appending with my own words 
stop = text.ENGLISH_STOP_WORDS

# Basic text cleaning function
def remove_noise(text):
    
    # Make lowercase
    text = text.apply(lambda x: " ".join(x.lower() for x in x.split()))
    
    # Remove whitespaces
    text = text.apply(lambda x: " ".join(x.strip() for x in x.split()))
    
    # Remove special characters
    text = text.apply(lambda x: "".join([" " if ord(i) < 32 or ord(i) > 126 else i for i in x]))
    
    # Remove punctuation
    text = text.str.replace('[^\w\s]', '')
    
    # Remove numbers
    text = text.str.replace('\d+', '')
    
    # Remove Stopwords
    text = text.apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
    
    # Convert to string
    text = text.astype(str)
        
    return text

In [6]:
# Applying noise removal function to data
data['Filtered Review Text'] = remove_noise(data['Comments'])
data.head()

Unnamed: 0,Hotel Names,Sentiment,Comments,Filtered Review Text
0,Hyatt Place New York City/Times Square,1,perfect location! Very friendly staff that hel...,perfect location friendly staff helped solve p...
1,Hyatt Place New York City/Times Square,1,Clean and comfortable rooms,clean comfortable rooms
2,Hyatt Place New York City/Times Square,0,The best place to be,best place
3,Hyatt Place New York City/Times Square,1,"excellent location, very comfortable rooms and...",excellent location comfortable rooms close tim...
4,Hyatt Place New York City/Times Square,1,Service,service


Before moving onto lexicon normalisation, I want to gain a sense of the sentiment per review. I don't intend to use this for any machine learning purposes, more-so out of interest to understand whether reviews lean towards positivity or negativity. I'll come back to this again later.

In [7]:
# Defining a sentiment analyser function
def sentiment_analyser(text):
    return text.apply(lambda Text: pd.Series(TextBlob(Text).sentiment.polarity))

# Applying function to reviews
data['Polarity'] = sentiment_analyser(data['Filtered Review Text'])
data.head(10)

Unnamed: 0,Hotel Names,Sentiment,Comments,Filtered Review Text,Polarity
0,Hyatt Place New York City/Times Square,1,perfect location! Very friendly staff that hel...,perfect location friendly staff helped solve p...,0.368056
1,Hyatt Place New York City/Times Square,1,Clean and comfortable rooms,clean comfortable rooms,0.383333
2,Hyatt Place New York City/Times Square,0,The best place to be,best place,1.0
3,Hyatt Place New York City/Times Square,1,"excellent location, very comfortable rooms and...",excellent location comfortable rooms close tim...,0.7
4,Hyatt Place New York City/Times Square,1,Service,service,0.0
5,Hyatt Place New York City/Times Square,1,I got to the hotel 1 hour and 30 minutes early...,got hotel hour minutes early fault check pm wa...,0.083333
6,Hyatt Place New York City/Times Square,1,Esta muy cerca del Time Square. Las habitacion...,esta muy cerca del time square las habitacione...,0.0
7,Hyatt Place New York City/Times Square,1,Sad I could not eat most things. I have a dair...,sad eat things dairy allergy dont eat sweets,-0.5
8,Hyatt Place New York City/Times Square,1,"Location was great, beds were super comfy. Bat...",location great beds super comfy bathroom nice ...,0.56
9,Hyatt Place New York City/Times Square,1,Great location! Great customer service,great location great customer service,0.8


### Lexicon Normalisation

Another type of textual noise is about the multiple representations exhibited by single word.

For example – “play”, “player”, “played”, “plays” and “playing” are the different variations of the word – “play”. Though they mean different things, contextually they all are similar. This step converts all the disparities of a word into their normalized form (also known as lemma). Normalization is a pivotal step for feature engineering with text as it converts the high dimensional features (N different features) to the low dimensional space (1 feature), which is an ideal ask for any ML model.

There are two methods of lexicon normalisation; Stemming or Lemmatization. I will opt for Lemmatization, as this will return the root form of each word (rather than just stripping suffixes, which is stemming).

In [8]:
# Instantiate the Word tokenizer & Word lemmatizer
w_tokenizer = nltk.tokenize.WhitespaceTokenizer()
lemmatizer = nltk.stem.WordNetLemmatizer()

# Define a word lemmatizer function
def lemmatize_text(text):
    return [lemmatizer.lemmatize(w) for w in w_tokenizer.tokenize(text)]

# Apply the word lemmatizer function to data
data['Filtered Review Text'] = data['Filtered Review Text'].apply(lemmatize_text)
data.head()

Unnamed: 0,Hotel Names,Sentiment,Comments,Filtered Review Text,Polarity
0,Hyatt Place New York City/Times Square,1,perfect location! Very friendly staff that hel...,"[perfect, location, friendly, staff, helped, s...",0.368056
1,Hyatt Place New York City/Times Square,1,Clean and comfortable rooms,"[clean, comfortable, room]",0.383333
2,Hyatt Place New York City/Times Square,0,The best place to be,"[best, place]",1.0
3,Hyatt Place New York City/Times Square,1,"excellent location, very comfortable rooms and...","[excellent, location, comfortable, room, close...",0.7
4,Hyatt Place New York City/Times Square,1,Service,[service],0.0


# 2. Getting a text matrix

To analyse a preprocessed data, it needs to be converted into features. Depending upon the usage, text features can be constructed using a variety of techniques – in this kernel I will be converting the data into statistical features.

The specific model in question is known as <b>'Term Frequency – Inverse Document Frequency' (TF – IDF)</b>

TF-IDF is a weighted model commonly used for information retrieval problems. It aims to convert the text documents into vector models on the basis of occurrence of words in the documents without taking considering the exact ordering. For Example – let say there is a dataset of N text documents, In any document “D”, TF and IDF will be defined as –

- <b>Term Frequency (TF)</b> – TF for a term “t” is defined as the count of a term “t” in a document “D”
- <b>Inverse Document Frequency (IDF)</b> – IDF for a term is defined as logarithm of ratio of total documents available in the corpus and number of documents containing the term T.
- <b>TF . IDF</b> – TF IDF formula gives the relative importance of a term in a corpus (list of documents), given by the following formula below. Following is the code using python’s scikit learn package to convert a text into tf idf vectors:

Scikit-learn provides two methods to get to our end result (a TD-IDF weight matrix). One is a two-part process of using the CountVectorizer class to count how many times each term shows up in each document, followed by the TfidfTransformer class generating the weight matrix. The other does both steps in a single TfidfVectorizer class. In this Kernel I will proceed with method one; below is step one:

In [9]:
# Getting a count of words from the documents
# Ngram_range is set to 1,2 - meaning either single or two word combination will be extracted
cvec = CountVectorizer(min_df=.015, max_df=.9, ngram_range=(1,2), tokenizer=lambda doc: doc, lowercase=False)
cvec.fit(data['Filtered Review Text'])

CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=False, max_df=0.9, max_features=None, min_df=0.015,
        ngram_range=(1, 2), preprocessor=None, stop_words=None,
        strip_accents=None, token_pattern='(?u)\\b\\w\\w+\\b',
        tokenizer=<function <lambda> at 0x7f97e071dd08>, vocabulary=None)

In [10]:
# Getting the total n-gram count
len(cvec.vocabulary_)

115

I am happy with that number as a starting point, less than 1000 was my initial aim. If I wanted to be more or less restrictive on n-gram selection, I could adjust the 'min_df' and 'max_df' parameters within my CountVectorizer, which controls for the minimum and maximum amount of documents each word should feature in.

We can now tackle the next step, which is to turn this document into a <b>'bag of words' representation</b>. This creates a separate column for each term that contains the count within each document. After that, we’ll take a look at the <b>sparsity</b> of this representation which lets us know how many <b>nonzero values</b> there are in the dataset. The more sparse the data is the more challenging it will be to model, but that’s a discussion for another day:

In [11]:
# Creating the bag-of-words representation
cvec_counts = cvec.transform(data['Filtered Review Text'])
print('sparse matrix shape:', cvec_counts.shape)
print('nonzero count:', cvec_counts.nnz)
print('sparsity: %.2f%%' % (100.0 * cvec_counts.nnz / (cvec_counts.shape[0] * cvec_counts.shape[1])))

sparse matrix shape: (245545, 115)
nonzero count: 1100706
sparsity: 3.90%


Now that we have term counts for each document, the TfidfTransformer can be applied to calculate the weights for each term in each document:

In [12]:
# Instantiating the TfidfTransformer
transformer = TfidfTransformer()

# Fitting and transforming n-grams
transformed_weights = transformer.fit_transform(cvec_counts)
transformed_weights

<245545x115 sparse matrix of type '<class 'numpy.float64'>'
	with 1100706 stored elements in Compressed Sparse Row format>

Great, we have our weighted words! Just a few more steps required (below); I'm going to extract all of the feature names (which are the n-grams) and put these into a DataFrame along with the corresponding weights per review. Then I am going to add in at the end some summary statistics to understand per review:

- The highest weighted word
- The weight of this word
- The total weighting per review.

In [13]:
# Getting a list of all n-grams
transformed_weights = transformed_weights.toarray()
vocab = cvec.get_feature_names()

# Putting weighted n-grams into a DataFrame and computing some summary statistics
model = pd.DataFrame(transformed_weights, columns=vocab)
#model['Keyword'] = model.idxmax(axis=1)
#model['Max'] = model.max(axis=1)
#model['Sum'] = model.drop('Max', axis=1).sum(axis=1)
model.head(10)

Unnamed: 0,al,amazing,area,bar,bathroom,bed,bien,breakfast,buena,building,c,cama,central,central park,chambre,check,clean,close,comfortable,comfy,d,da,day,del,desayuno,desk,die,e,el,emplacement,en,est,et,excelente,excellent,floor,friendly,friendly helpful,friendly staff,good,...,personal,personnel,place,posizione,price,que,r,really,restaurant,room,room clean,s,sehr,service,size,small,square,staff,staff friendly,state,station,stay,subway,super,t,tel,time,time square,todo,tr,tr s,ubicaci,ubicaci n,una,und,view,walk,y,york,zimmer
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.320789,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.306815,0.0,0.419775,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.466262,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.474326,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.585494,0.0,0.656124,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.476129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.424313,0.373842,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.412992,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.271285,0.0,0.0,0.0,0.0,0.0,0.0,0.361264,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.340493,0.367445,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.455833,0.0,0.0,0.0,0.384182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.496093,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.558403,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.454279,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.370422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.349124,0.37676,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.371245,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.364982,0.28086,0.0,0.291943,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.260902,0.0,0.0,0.390858,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.264787,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.32166,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.443657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# 3. Data Exploration

Now that we have a td-idf weight matrix, this can be fed directly into a predictive model. Before we do this, let's explore the current data a little more:

### Merging datasets

In [14]:
# Merging td-idf weight matrix with original DataFrame
model = pd.merge(data, model, left_index=True, right_index=True)

In [15]:
# Printing the first 10 reviews left
model.head(10)

Unnamed: 0,Hotel Names,Sentiment,Comments,Filtered Review Text,Polarity,al,amazing,area,bar,bathroom,bed,bien,breakfast,buena,building,c,cama,central,central park,chambre,check,clean,close,comfortable,comfy,d,da,day,del,desayuno,desk,die,e,el,emplacement,en,est,et,excelente,excellent,...,personal,personnel,place,posizione,price,que,r,really,restaurant,room,room clean,s,sehr,service,size,small,square,staff,staff friendly,state,station,stay,subway,super,t,tel,time,time square,todo,tr,tr s,ubicaci,ubicaci n,una,und,view,walk,y,york,zimmer
0,Hyatt Place New York City/Times Square,1,perfect location! Very friendly staff that hel...,"[perfect, location, friendly, staff, helped, s...",0.368056,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.320789,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.466262,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.474326,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Hyatt Place New York City/Times Square,1,Clean and comfortable rooms,"[clean, comfortable, room]",0.383333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.585494,0.0,0.656124,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.476129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Hyatt Place New York City/Times Square,0,The best place to be,"[best, place]",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Hyatt Place New York City/Times Square,1,"excellent location, very comfortable rooms and...","[excellent, location, comfortable, room, close...",0.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.424313,0.373842,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.412992,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.271285,0.0,0.0,0.0,0.0,0.0,0.0,0.361264,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.340493,0.367445,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Hyatt Place New York City/Times Square,1,Service,[service],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Hyatt Place New York City/Times Square,1,I got to the hotel 1 hour and 30 minutes early...,"[got, hotel, hour, minute, early, fault, check...",0.083333,0.0,0.0,0.0,0.455833,0.0,0.0,0.0,0.384182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.496093,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.558403,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Hyatt Place New York City/Times Square,1,Esta muy cerca del Time Square. Las habitacion...,"[esta, muy, cerca, del, time, square, la, habi...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.454279,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.370422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.349124,0.37676,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.371245,0.0,0.0
7,Hyatt Place New York City/Times Square,1,Sad I could not eat most things. I have a dair...,"[sad, eat, thing, dairy, allergy, dont, eat, s...",-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Hyatt Place New York City/Times Square,1,"Location was great, beds were super comfy. Bat...","[location, great, bed, super, comfy, bathroom,...",0.56,0.0,0.0,0.0,0.0,0.364982,0.28086,0.0,0.291943,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.260902,0.0,0.0,0.390858,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.32166,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Hyatt Place New York City/Times Square,1,Great location! Great customer service,"[great, location, great, customer, service]",0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.443657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
posmodel=model[model["Sentiment"]!="0"]
posmodel.drop("Sentiment",axis=1,inplace=True)
posmodel.drop("Comments",axis=1,inplace=True)
posmodel.drop("Polarity",axis=1,inplace=True)
posmodel.drop("Filtered Review Text",axis=1,inplace=True)
posmodel

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


Unnamed: 0,Hotel Names,al,amazing,area,bar,bathroom,bed,bien,breakfast,buena,building,c,cama,central,central park,chambre,check,clean,close,comfortable,comfy,d,da,day,del,desayuno,desk,die,e,el,emplacement,en,est,et,excelente,excellent,floor,friendly,friendly helpful,friendly staff,...,personal,personnel,place,posizione,price,que,r,really,restaurant,room,room clean,s,sehr,service,size,small,square,staff,staff friendly,state,station,stay,subway,super,t,tel,time,time square,todo,tr,tr s,ubicaci,ubicaci n,una,und,view,walk,y,york,zimmer
0,Hyatt Place New York City/Times Square,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.320789,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.306815,0.0,0.419775,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.466262,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.474326,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.000000,0.000000,0.0
1,Hyatt Place New York City/Times Square,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.585494,0.000000,0.656124,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.476129,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.000000,0.000000,0.0
3,Hyatt Place New York City/Times Square,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.424313,0.373842,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.412992,0.000000,0.000000,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.271285,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.361264,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.340493,0.367445,0.0,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.000000,0.000000,0.0
4,Hyatt Place New York City/Times Square,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.000000,0.000000,0.0
5,Hyatt Place New York City/Times Square,0.0,0.0,0.0,0.455833,0.000000,0.000000,0.000000,0.384182,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.496093,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.558403,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.000000,0.000000,0.0
6,Hyatt Place New York City/Times Square,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.454279,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.370422,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.349124,0.376760,0.0,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.371245,0.000000,0.0
7,Hyatt Place New York City/Times Square,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.000000,0.000000,0.0
8,Hyatt Place New York City/Times Square,0.0,0.0,0.0,0.000000,0.364982,0.280860,0.000000,0.291943,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.260902,0.000000,0.000000,0.390858,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.321660,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.000000,0.000000,0.0
9,Hyatt Place New York City/Times Square,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.443657,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.000000,0.000000,0.0
11,Hyatt Place New York City/Times Square,0.0,0.0,0.0,0.000000,0.523418,0.000000,0.000000,0.418674,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.419294,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.304269,0.000000,0.000000,0.000000,0.000000,0.000000,0.531374,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.000000,0.000000,0.0


In [17]:
hotelnames=posmodel.groupby("Hotel Names")

posvecmodel=hotelnames.mean()
posvecmodel

Unnamed: 0_level_0,al,amazing,area,bar,bathroom,bed,bien,breakfast,buena,building,c,cama,central,central park,chambre,check,clean,close,comfortable,comfy,d,da,day,del,desayuno,desk,die,e,el,emplacement,en,est,et,excelente,excellent,floor,friendly,friendly helpful,friendly staff,good,...,personal,personnel,place,posizione,price,que,r,really,restaurant,room,room clean,s,sehr,service,size,small,square,staff,staff friendly,state,station,stay,subway,super,t,tel,time,time square,todo,tr,tr s,ubicaci,ubicaci n,una,und,view,walk,y,york,zimmer
Hotel Names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1 Hotel Central Park,0.004591,0.030327,0.003611,0.006325,0.005705,0.025611,0.003558,0.017724,0.003383,0.002421,0.006738,0.002998,0.032191,0.032824,0.003337,0.005413,0.024106,0.003344,0.032434,0.006236,0.009985,0.011808,0.005708,0.002627,0.004136,0.004159,0.010804,0.030745,0.017417,0.010712,0.016857,0.008924,0.012951,0.021502,0.040444,0.001336,0.055706,0.010671,0.018497,0.023725,...,0.016527,0.004394,0.010976,0.019318,0.002053,0.004427,0.002792,0.017866,0.028387,0.046947,0.002524,0.020099,0.016853,0.033131,0.001484,0.004084,0.001896,0.089402,0.006800,0.000000,0.002202,0.010293,0.006025,0.016275,0.009532,0.007099,0.010485,0.001929,0.012846,0.009314,0.009340,0.010751,0.010783,0.002250,0.015665,0.013755,0.004414,0.016018,0.006092,0.008859
AC Hotel by Marriott New York Downtown,0.010181,0.024852,0.012882,0.004535,0.009924,0.048060,0.010363,0.014681,0.006477,0.002499,0.005679,0.003496,0.000407,0.000464,0.011296,0.007333,0.086598,0.015840,0.037746,0.019443,0.008082,0.017578,0.011446,0.007938,0.003060,0.016325,0.007288,0.031405,0.010971,0.007192,0.016586,0.003531,0.016993,0.007939,0.013194,0.010111,0.030521,0.004242,0.012770,0.036255,...,0.018142,0.006850,0.008599,0.015448,0.008216,0.003105,0.008177,0.015659,0.008572,0.074729,0.015250,0.026981,0.011936,0.018346,0.009709,0.005347,0.000405,0.078118,0.011482,0.000000,0.006865,0.021613,0.014580,0.024572,0.010763,0.007275,0.008027,0.000412,0.008596,0.008314,0.008337,0.007121,0.007143,0.005161,0.013348,0.082245,0.012249,0.020266,0.006998,0.008233
AKA Central Park,0.002868,0.013561,0.015056,0.007241,0.006256,0.022830,0.005293,0.002649,0.008347,0.001062,0.003065,0.017191,0.048893,0.052312,0.002761,0.006349,0.035628,0.012298,0.020979,0.011350,0.005625,0.010575,0.005487,0.015599,0.003224,0.005381,0.012396,0.039910,0.020469,0.002949,0.013261,0.005672,0.007736,0.012762,0.036344,0.002437,0.025676,0.015304,0.013297,0.016274,...,0.025826,0.004111,0.011989,0.010873,0.006873,0.009864,0.005581,0.010953,0.005301,0.072079,0.003779,0.011336,0.008362,0.011417,0.020556,0.005495,0.004117,0.087960,0.009679,0.000000,0.004755,0.017238,0.020292,0.008672,0.006834,0.002665,0.009757,0.004188,0.004628,0.005347,0.005362,0.032195,0.032291,0.005111,0.009544,0.001793,0.010435,0.027755,0.008254,0.000668
AKA Times Square,0.006555,0.013841,0.005191,0.002288,0.007148,0.020336,0.016399,0.003596,0.003364,0.000629,0.014694,0.007503,0.016125,0.000000,0.006439,0.003581,0.027835,0.014428,0.021995,0.009477,0.003107,0.013119,0.004521,0.013913,0.001210,0.004962,0.007470,0.062214,0.020734,0.016351,0.015159,0.010920,0.022745,0.010963,0.030509,0.002099,0.028781,0.013350,0.011000,0.028123,...,0.027055,0.018167,0.013246,0.026861,0.004044,0.011142,0.004982,0.007388,0.009349,0.041906,0.000000,0.025884,0.020363,0.006861,0.012874,0.001482,0.035419,0.064866,0.009263,0.000637,0.000859,0.013968,0.003219,0.011085,0.014236,0.002190,0.039552,0.034575,0.007165,0.017063,0.017110,0.023151,0.021976,0.004657,0.019422,0.002356,0.002964,0.019586,0.007717,0.000508
Aliz Hotel Times Square,0.007639,0.028374,0.005501,0.051634,0.013891,0.037439,0.008272,0.013071,0.003607,0.003956,0.003424,0.004218,0.007807,0.001002,0.008143,0.011811,0.074057,0.018487,0.038293,0.011735,0.007175,0.012903,0.006304,0.007734,0.001918,0.006287,0.012099,0.029012,0.009458,0.008942,0.009143,0.008464,0.014152,0.003743,0.017778,0.016184,0.027126,0.007938,0.007462,0.033905,...,0.008022,0.008739,0.008216,0.018653,0.005864,0.006360,0.006876,0.015104,0.011637,0.076570,0.012413,0.019215,0.012758,0.011407,0.004659,0.014365,0.033559,0.056628,0.013647,0.003307,0.008208,0.016948,0.009915,0.017291,0.011095,0.006526,0.036998,0.032972,0.002483,0.008768,0.008622,0.007634,0.007536,0.001906,0.010887,0.044438,0.007468,0.011500,0.007853,0.007382
Aloft Harlem,0.003611,0.008256,0.015143,0.009375,0.003974,0.045741,0.001804,0.019328,0.000000,0.003455,0.008673,0.005022,0.000000,0.000000,0.000000,0.003424,0.084027,0.024848,0.053321,0.012330,0.002148,0.007465,0.012326,0.001739,0.000000,0.024098,0.004010,0.012609,0.010837,0.004768,0.005123,0.000000,0.013459,0.000000,0.013755,0.003635,0.058579,0.011377,0.015148,0.036408,...,0.006263,0.001864,0.022654,0.007627,0.011133,0.001667,0.002229,0.010224,0.013660,0.075309,0.014628,0.021316,0.001419,0.027123,0.008652,0.006094,0.000000,0.092479,0.017098,0.000000,0.001202,0.038899,0.016172,0.009220,0.018368,0.002429,0.011314,0.000000,0.000000,0.001535,0.001539,0.001685,0.001690,0.000000,0.002528,0.014022,0.008118,0.008651,0.000000,0.001224
Andaz 5th Avenue-a concept by Hyatt,0.000000,0.019407,0.000000,0.000000,0.032773,0.005508,0.014247,0.031575,0.008751,0.000000,0.003646,0.007247,0.024603,0.001434,0.008686,0.000000,0.030935,0.008289,0.011189,0.004684,0.000000,0.004395,0.010782,0.001851,0.000000,0.007638,0.000000,0.000000,0.011700,0.010309,0.013280,0.006057,0.005091,0.007088,0.043135,0.000000,0.032328,0.008417,0.018565,0.036784,...,0.006702,0.003066,0.003248,0.029193,0.006952,0.006702,0.005777,0.025105,0.007475,0.094746,0.003043,0.006812,0.000000,0.007042,0.023021,0.003708,0.001078,0.069731,0.007132,0.000000,0.007050,0.015913,0.001312,0.019276,0.007770,0.008052,0.008705,0.001097,0.016826,0.008492,0.008516,0.009158,0.009186,0.003930,0.010429,0.013291,0.009124,0.023544,0.013416,0.011246
Archer Hotel New York,0.007762,0.024343,0.003976,0.057073,0.012301,0.047819,0.009287,0.007086,0.001696,0.038531,0.008520,0.003780,0.011032,0.001605,0.007883,0.001597,0.037967,0.010445,0.032130,0.017048,0.009422,0.022114,0.007562,0.008601,0.001094,0.004452,0.018977,0.038999,0.010474,0.014832,0.015263,0.011388,0.018264,0.007717,0.031672,0.004636,0.040203,0.014101,0.014916,0.031616,...,0.028382,0.016574,0.007005,0.032009,0.004085,0.003905,0.006636,0.012576,0.014455,0.062724,0.004390,0.024979,0.035298,0.037596,0.003766,0.017873,0.009994,0.100687,0.014241,0.051133,0.002798,0.011936,0.003472,0.029441,0.006678,0.006225,0.014509,0.009937,0.005460,0.016592,0.016638,0.009381,0.008761,0.008866,0.026755,0.042387,0.006259,0.012553,0.008892,0.018903
Arlo NoMad,0.005921,0.039364,0.009508,0.047398,0.003880,0.028983,0.007104,0.013589,0.001766,0.019957,0.002723,0.002197,0.010078,0.000905,0.010070,0.005288,0.040985,0.005615,0.020789,0.014047,0.010299,0.009848,0.006651,0.002583,0.000427,0.007680,0.012032,0.027427,0.008730,0.009612,0.010220,0.005013,0.015953,0.005453,0.013680,0.015833,0.036266,0.006478,0.010990,0.033400,...,0.009781,0.009612,0.019089,0.008624,0.006751,0.003882,0.004205,0.023388,0.012103,0.084575,0.009616,0.022082,0.014373,0.016740,0.005098,0.021510,0.004112,0.077356,0.016539,0.022511,0.003572,0.021187,0.005124,0.020536,0.009174,0.005598,0.012284,0.002559,0.002651,0.012260,0.012294,0.004401,0.004107,0.007109,0.009911,0.100602,0.004107,0.006267,0.006001,0.010041
Artezen Hotel,0.002795,0.022811,0.009157,0.000000,0.014684,0.045958,0.002236,0.013295,0.000000,0.000938,0.004409,0.001654,0.000000,0.000000,0.001510,0.009054,0.097611,0.015419,0.056667,0.005641,0.007458,0.018572,0.005113,0.003117,0.000000,0.010226,0.014323,0.012913,0.002328,0.000725,0.017919,0.003819,0.002976,0.003541,0.043260,0.008961,0.068453,0.014119,0.025879,0.039405,...,0.021140,0.002326,0.011740,0.006531,0.007477,0.005516,0.018727,0.024881,0.010170,0.107931,0.020037,0.011410,0.032751,0.029833,0.006157,0.011791,0.000557,0.122057,0.021692,0.000000,0.022127,0.026823,0.037890,0.043433,0.009865,0.003313,0.001756,0.000566,0.003972,0.005073,0.005088,0.004453,0.004466,0.000000,0.023612,0.011388,0.008274,0.010965,0.006095,0.014290


In [18]:
negmodel=model[model["Sentiment"]!="1"]
negmodel.drop("Sentiment",axis=1,inplace=True)
negmodel.drop("Comments",axis=1,inplace=True)
negmodel.drop("Polarity",axis=1,inplace=True)
negmodel.drop("Filtered Review Text",axis=1,inplace=True)
negmodel

Unnamed: 0,Hotel Names,al,amazing,area,bar,bathroom,bed,bien,breakfast,buena,building,c,cama,central,central park,chambre,check,clean,close,comfortable,comfy,d,da,day,del,desayuno,desk,die,e,el,emplacement,en,est,et,excelente,excellent,floor,friendly,friendly helpful,friendly staff,...,personal,personnel,place,posizione,price,que,r,really,restaurant,room,room clean,s,sehr,service,size,small,square,staff,staff friendly,state,station,stay,subway,super,t,tel,time,time square,todo,tr,tr s,ubicaci,ubicaci n,una,und,view,walk,y,york,zimmer
2,Hyatt Place New York City/Times Square,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.000000,0.00000,0.00000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
10,Hyatt Place New York City/Times Square,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.00000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
17,Hyatt Place New York City/Times Square,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.00000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
30,Hyatt Place New York City/Times Square,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,1.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.00000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
38,Hyatt Place New York City/Times Square,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,1.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.00000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
43,Hyatt Place New York City/Times Square,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.493395,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.410477,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.00000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.443669,0.000000,0.000000,0.0,0.274297,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
47,Hyatt Place New York City/Times Square,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.00000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
56,Hyatt Place New York City/Times Square,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.635309,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.00000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.772258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
68,Hyatt Place New York City/Times Square,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.00000,0.000000,0.0,0.574213,0.0,0.0,0.000000,0.000000,0.000000,0.501402,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.480218,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.433896,0.000000,0.000000,0.0,0.000000
71,Hyatt Place New York City/Times Square,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.00000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000


In [19]:
negmodel.groupby("Hotel Names")
hotelnames=negmodel.groupby("Hotel Names")
negvecmodel=hotelnames.mean()
negvecmodel.info()
negvecmodel



<class 'pandas.core.frame.DataFrame'>
Index: 209 entries, 1 Hotel Central Park to citizenM New York Bowery
Columns: 115 entries, al to zimmer
dtypes: float64(115)
memory usage: 189.4+ KB


Unnamed: 0_level_0,al,amazing,area,bar,bathroom,bed,bien,breakfast,buena,building,c,cama,central,central park,chambre,check,clean,close,comfortable,comfy,d,da,day,del,desayuno,desk,die,e,el,emplacement,en,est,et,excelente,excellent,floor,friendly,friendly helpful,friendly staff,good,...,personal,personnel,place,posizione,price,que,r,really,restaurant,room,room clean,s,sehr,service,size,small,square,staff,staff friendly,state,station,stay,subway,super,t,tel,time,time square,todo,tr,tr s,ubicaci,ubicaci n,una,und,view,walk,y,york,zimmer
Hotel Names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1 Hotel Central Park,0.000000,0.000000,0.000000,0.033818,0.124290,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.027112,0.000000,0.006232,0.021734,0.019408,0.000000,0.000000,0.000000,0.029548,0.008026,0.008952,0.000000,0.000000,0.000000,0.009026,0.051816,0.016214,0.000000,0.007559,0.012048,0.005486,0.000000,0.013499,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.023031,0.000000,0.018379,0.023139,0.041308,0.000000,0.000000,0.186593,0.000000,0.029255,0.018004,0.023183,0.000000,0.091420,0.000000,0.032258,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.066612,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.017636,0.021039,0.000000,0.014934,0.000000,0.009482
AC Hotel by Marriott New York Downtown,0.000000,0.010330,0.000000,0.000000,0.033308,0.042069,0.000000,0.035581,0.000000,0.011925,0.000000,0.000000,0.000000,0.000000,0.000000,0.036004,0.031258,0.000000,0.000000,0.000000,0.038060,0.014971,0.067643,0.015976,0.014439,0.049766,0.000000,0.009593,0.032732,0.000000,0.029405,0.000000,0.000000,0.000000,0.009342,0.026316,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.024631,0.000000,0.000000,0.000000,0.000000,0.109681,0.000000,0.012556,0.000000,0.051413,0.023160,0.011368,0.000000,0.043352,0.000000,0.000000,0.000000,0.056136,0.000000,0.000000,0.069340,0.000000,0.021325,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.026316,0.000000,0.010825,0.000000,0.026316
AKA Central Park,0.023584,0.000000,0.000000,0.032979,0.000000,0.000000,0.000000,0.027795,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.022322,0.012748,0.036610,0.000000,0.000000,0.069580,0.000000,0.060599,0.110029,0.000000,0.132210,0.024499,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.101117,0.000000,0.000000,0.068143,0.030813,0.000000,0.029224,0.066667,0.000000,0.041020,0.000000,0.118695,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.025314,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.026438,0.000000,0.066667,0.000000,0.018363,0.000000,0.000000
AKA Times Square,0.000000,0.000000,0.000000,0.083333,0.064769,0.015485,0.008489,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.009037,0.000000,0.000000,0.000000,0.000000,0.000000,0.025264,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.006139,0.000000,0.000000,0.000000,0.000000,0.015911,0.000000,0.000000,0.034636,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.032974,0.035831,0.000000,0.000000,0.081882,0.000000,0.027619,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.040170,0.009523,0.023628,0.000000,0.000000,0.008607,0.008631,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.006928,0.000000,0.000000
Aliz Hotel Times Square,0.006570,0.009193,0.006925,0.048005,0.009845,0.026664,0.000000,0.005952,0.000000,0.000000,0.000000,0.000000,0.003123,0.000000,0.025917,0.014533,0.010442,0.007368,0.005652,0.000000,0.023877,0.012883,0.022745,0.000904,0.000000,0.014831,0.008751,0.039153,0.012959,0.000000,0.006716,0.009617,0.006307,0.000000,0.002110,0.024441,0.005666,0.000000,0.007752,0.008254,...,0.003086,0.001757,0.007505,0.003493,0.020138,0.016409,0.012308,0.016648,0.014258,0.161227,0.002728,0.024712,0.002891,0.034001,0.004033,0.106335,0.000550,0.048965,0.000000,0.000000,0.003376,0.029803,0.000000,0.005517,0.052370,0.003645,0.027275,0.000560,0.000990,0.002009,0.002014,0.000000,0.000000,0.008103,0.013100,0.017949,0.006185,0.008965,0.010226,0.014059
Aloft Harlem,0.000000,0.000000,0.000000,0.035714,0.035714,0.079188,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.014796,0.000000,0.000000,0.000000,0.000000,0.035714,0.000000,0.022066,0.000000,0.000000,0.000000,0.000000,0.019579,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.083871,0.000000,0.000000,0.000000,0.008440,...,0.029688,0.000000,0.010901,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.091433,0.021685,0.011981,0.000000,0.000000,0.000000,0.046907,0.000000,0.000000,0.000000,0.000000,0.000000,0.061033,0.000000,0.000000,0.015065,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.035714,0.000000,0.000000,0.000000
Andaz 5th Avenue-a concept by Hyatt,0.000000,0.000000,0.000000,0.000000,0.000000,0.022506,0.000000,0.023394,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.125000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.030861,0.000000,0.000000,0.000000,0.000000,0.000000,0.036514,0.000000,0.036320,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.040017,0.000000,0.000000,0.065498,0.159003,0.000000,0.106466,0.000000,0.000000,0.000000,0.000000,0.000000,0.034591,0.000000,0.000000,0.000000,0.026390,0.000000,0.000000,0.056874,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.051388,0.000000,0.000000,0.000000,0.000000
Archer Hotel New York,0.000000,0.000000,0.000000,0.018417,0.018790,0.065714,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.038462,0.000000,0.000000,0.000000,0.023912,0.000000,0.000000,0.017445,0.000000,0.000000,0.000000,0.015565,0.000000,0.028702,0.000000,0.000000,0.000000,0.009459,0.008615,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.036953,...,0.000000,0.038503,0.014507,0.000000,0.000000,0.000000,0.000000,0.000000,0.039784,0.287277,0.000000,0.007477,0.019567,0.000000,0.015767,0.107638,0.000000,0.011112,0.000000,0.020926,0.000000,0.000000,0.000000,0.000000,0.018269,0.000000,0.000000,0.000000,0.000000,0.009321,0.009347,0.000000,0.000000,0.000000,0.019166,0.017048,0.000000,0.000000,0.000000,0.020610
Arlo NoMad,0.005683,0.005371,0.001713,0.023292,0.020047,0.043760,0.004721,0.008417,0.005357,0.026728,0.004783,0.000000,0.000000,0.000000,0.016365,0.029051,0.021070,0.000000,0.003631,0.012914,0.021127,0.010006,0.012928,0.004411,0.005169,0.012389,0.000000,0.016427,0.008299,0.000000,0.009790,0.003239,0.010458,0.000000,0.000000,0.040381,0.000000,0.000000,0.000000,0.014244,...,0.000000,0.000000,0.007619,0.000000,0.042739,0.009519,0.013508,0.015208,0.022624,0.220537,0.000000,0.036585,0.008118,0.018205,0.005664,0.047993,0.000000,0.007212,0.000000,0.001023,0.003083,0.025449,0.000000,0.008858,0.075955,0.000000,0.026842,0.000000,0.000000,0.014445,0.014485,0.000000,0.000000,0.003642,0.007952,0.054667,0.000000,0.003713,0.007732,0.007011
Artezen Hotel,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.093230,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.087375,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.075833,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.094534,0.094797,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [20]:
mainmodel=pd.concat([posvecmodel,negvecmodel],axis=1)
mainmodel.info()


<class 'pandas.core.frame.DataFrame'>
Index: 209 entries, 1 Hotel Central Park to citizenM New York Bowery
Columns: 230 entries, al to zimmer
dtypes: float64(230)
memory usage: 377.2+ KB


In [21]:
featvect=[]
for i in range(len(mainmodel.index)):
    featvect.append(mainmodel.iloc[i].tolist())


featvect


[[0.004590546660397578,
  0.030326914590788873,
  0.0036108646509743045,
  0.006324966881164596,
  0.005704901451062345,
  0.02561086174697536,
  0.003557750864748869,
  0.01772447829933952,
  0.0033830612782538277,
  0.0024208496729652925,
  0.006737545669302368,
  0.0029984838622500115,
  0.03219141136578449,
  0.032824322879065074,
  0.003336984328827323,
  0.005412718677916639,
  0.02410638761183297,
  0.003344041456836213,
  0.032433607623113536,
  0.006236431028412304,
  0.009984686415327706,
  0.011807816230380347,
  0.0057076024823013065,
  0.002626735454487159,
  0.004136173786298142,
  0.004159418306322586,
  0.010804078682341883,
  0.030745276029357275,
  0.01741650976394556,
  0.010711995058234126,
  0.01685665176010384,
  0.008923782257413162,
  0.012950694170425707,
  0.021501874935053795,
  0.04044377826623612,
  0.0013363889703730913,
  0.05570612991950514,
  0.010671415859617542,
  0.018497231045184453,
  0.023724692564790338,
  0.004711280695143006,
  0.06302368046909

In [22]:
datas = pd.read_csv('../input/scoredata/NYscores.csv',sep=";",encoding="utf-8")
datas.info()
groupscore= datas.groupby("Hotel Names")
scores=groupscore.mean()

endmodel=pd.concat([mainmodel,scores],axis=1)
endmodel

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 209 entries, 0 to 208
Data columns (total 8 columns):
Hotel Names        209 non-null object
Staff              209 non-null float64
Facilities         209 non-null float64
Cleanliness        209 non-null float64
Comfort            209 non-null float64
Value for money    209 non-null float64
Location           209 non-null float64
Free WiFi          209 non-null float64
dtypes: float64(7), object(1)
memory usage: 13.1+ KB


Unnamed: 0_level_0,al,amazing,area,bar,bathroom,bed,bien,breakfast,buena,building,c,cama,central,central park,chambre,check,clean,close,comfortable,comfy,d,da,day,del,desayuno,desk,die,e,el,emplacement,en,est,et,excelente,excellent,floor,friendly,friendly helpful,friendly staff,good,...,really,restaurant,room,room clean,s,sehr,service,size,small,square,staff,staff friendly,state,station,stay,subway,super,t,tel,time,time square,todo,tr,tr s,ubicaci,ubicaci n,una,und,view,walk,y,york,zimmer,Staff,Facilities,Cleanliness,Comfort,Value for money,Location,Free WiFi
Hotel Names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1 Hotel Central Park,0.004591,0.030327,0.003611,0.006325,0.005705,0.025611,0.003558,0.017724,0.003383,0.002421,0.006738,0.002998,0.032191,0.032824,0.003337,0.005413,0.024106,0.003344,0.032434,0.006236,0.009985,0.011808,0.005708,0.002627,0.004136,0.004159,0.010804,0.030745,0.017417,0.010712,0.016857,0.008924,0.012951,0.021502,0.040444,0.001336,0.055706,0.010671,0.018497,0.023725,...,0.000000,0.000000,0.186593,0.000000,0.029255,0.018004,0.023183,0.000000,0.091420,0.000000,0.032258,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.066612,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.017636,0.021039,0.000000,0.014934,0.000000,0.009482,9.0,8.6,9.0,8.8,7.6,9.6,9.1
AC Hotel by Marriott New York Downtown,0.010181,0.024852,0.012882,0.004535,0.009924,0.048060,0.010363,0.014681,0.006477,0.002499,0.005679,0.003496,0.000407,0.000464,0.011296,0.007333,0.086598,0.015840,0.037746,0.019443,0.008082,0.017578,0.011446,0.007938,0.003060,0.016325,0.007288,0.031405,0.010971,0.007192,0.016586,0.003531,0.016993,0.007939,0.013194,0.010111,0.030521,0.004242,0.012770,0.036255,...,0.000000,0.000000,0.109681,0.000000,0.012556,0.000000,0.051413,0.023160,0.011368,0.000000,0.043352,0.000000,0.000000,0.000000,0.056136,0.000000,0.000000,0.069340,0.000000,0.021325,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.026316,0.000000,0.010825,0.000000,0.026316,9.0,8.8,9.1,9.0,8.5,8.9,8.4
AKA Central Park,0.002868,0.013561,0.015056,0.007241,0.006256,0.022830,0.005293,0.002649,0.008347,0.001062,0.003065,0.017191,0.048893,0.052312,0.002761,0.006349,0.035628,0.012298,0.020979,0.011350,0.005625,0.010575,0.005487,0.015599,0.003224,0.005381,0.012396,0.039910,0.020469,0.002949,0.013261,0.005672,0.007736,0.012762,0.036344,0.002437,0.025676,0.015304,0.013297,0.016274,...,0.000000,0.029224,0.066667,0.000000,0.041020,0.000000,0.118695,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.025314,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.026438,0.000000,0.066667,0.000000,0.018363,0.000000,0.000000,8.8,8.3,8.6,8.6,7.7,9.6,9.0
AKA Times Square,0.006555,0.013841,0.005191,0.002288,0.007148,0.020336,0.016399,0.003596,0.003364,0.000629,0.014694,0.007503,0.016125,0.000000,0.006439,0.003581,0.027835,0.014428,0.021995,0.009477,0.003107,0.013119,0.004521,0.013913,0.001210,0.004962,0.007470,0.062214,0.020734,0.016351,0.015159,0.010920,0.022745,0.010963,0.030509,0.002099,0.028781,0.013350,0.011000,0.028123,...,0.000000,0.000000,0.081882,0.000000,0.027619,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.040170,0.009523,0.023628,0.000000,0.000000,0.008607,0.008631,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.006928,0.000000,0.000000,9.1,8.5,8.8,8.8,7.7,9.5,8.9
Aliz Hotel Times Square,0.007639,0.028374,0.005501,0.051634,0.013891,0.037439,0.008272,0.013071,0.003607,0.003956,0.003424,0.004218,0.007807,0.001002,0.008143,0.011811,0.074057,0.018487,0.038293,0.011735,0.007175,0.012903,0.006304,0.007734,0.001918,0.006287,0.012099,0.029012,0.009458,0.008942,0.009143,0.008464,0.014152,0.003743,0.017778,0.016184,0.027126,0.007938,0.007462,0.033905,...,0.016648,0.014258,0.161227,0.002728,0.024712,0.002891,0.034001,0.004033,0.106335,0.000550,0.048965,0.000000,0.000000,0.003376,0.029803,0.000000,0.005517,0.052370,0.003645,0.027275,0.000560,0.000990,0.002009,0.002014,0.000000,0.000000,0.008103,0.013100,0.017949,0.006185,0.008965,0.010226,0.014059,8.3,8.3,8.7,8.5,7.7,8.8,8.4
Aloft Harlem,0.003611,0.008256,0.015143,0.009375,0.003974,0.045741,0.001804,0.019328,0.000000,0.003455,0.008673,0.005022,0.000000,0.000000,0.000000,0.003424,0.084027,0.024848,0.053321,0.012330,0.002148,0.007465,0.012326,0.001739,0.000000,0.024098,0.004010,0.012609,0.010837,0.004768,0.005123,0.000000,0.013459,0.000000,0.013755,0.003635,0.058579,0.011377,0.015148,0.036408,...,0.000000,0.000000,0.091433,0.021685,0.011981,0.000000,0.000000,0.000000,0.046907,0.000000,0.000000,0.000000,0.000000,0.000000,0.061033,0.000000,0.000000,0.015065,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.035714,0.000000,0.000000,0.000000,8.8,8.3,8.6,8.6,7.8,8.4,8.7
Andaz 5th Avenue-a concept by Hyatt,0.000000,0.019407,0.000000,0.000000,0.032773,0.005508,0.014247,0.031575,0.008751,0.000000,0.003646,0.007247,0.024603,0.001434,0.008686,0.000000,0.030935,0.008289,0.011189,0.004684,0.000000,0.004395,0.010782,0.001851,0.000000,0.007638,0.000000,0.000000,0.011700,0.010309,0.013280,0.006057,0.005091,0.007088,0.043135,0.000000,0.032328,0.008417,0.018565,0.036784,...,0.000000,0.065498,0.159003,0.000000,0.106466,0.000000,0.000000,0.000000,0.000000,0.000000,0.034591,0.000000,0.000000,0.000000,0.026390,0.000000,0.000000,0.056874,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.051388,0.000000,0.000000,0.000000,0.000000,8.8,8.4,8.8,8.7,7.7,9.4,8.5
Archer Hotel New York,0.007762,0.024343,0.003976,0.057073,0.012301,0.047819,0.009287,0.007086,0.001696,0.038531,0.008520,0.003780,0.011032,0.001605,0.007883,0.001597,0.037967,0.010445,0.032130,0.017048,0.009422,0.022114,0.007562,0.008601,0.001094,0.004452,0.018977,0.038999,0.010474,0.014832,0.015263,0.011388,0.018264,0.007717,0.031672,0.004636,0.040203,0.014101,0.014916,0.031616,...,0.000000,0.039784,0.287277,0.000000,0.007477,0.019567,0.000000,0.015767,0.107638,0.000000,0.011112,0.000000,0.020926,0.000000,0.000000,0.000000,0.000000,0.018269,0.000000,0.000000,0.000000,0.000000,0.009321,0.009347,0.000000,0.000000,0.000000,0.019166,0.017048,0.000000,0.000000,0.000000,0.020610,9.3,8.8,9.3,8.9,8.0,9.6,9.1
Arlo NoMad,0.005921,0.039364,0.009508,0.047398,0.003880,0.028983,0.007104,0.013589,0.001766,0.019957,0.002723,0.002197,0.010078,0.000905,0.010070,0.005288,0.040985,0.005615,0.020789,0.014047,0.010299,0.009848,0.006651,0.002583,0.000427,0.007680,0.012032,0.027427,0.008730,0.009612,0.010220,0.005013,0.015953,0.005453,0.013680,0.015833,0.036266,0.006478,0.010990,0.033400,...,0.015208,0.022624,0.220537,0.000000,0.036585,0.008118,0.018205,0.005664,0.047993,0.000000,0.007212,0.000000,0.001023,0.003083,0.025449,0.000000,0.008858,0.075955,0.000000,0.026842,0.000000,0.000000,0.014445,0.014485,0.000000,0.000000,0.003642,0.007952,0.054667,0.000000,0.003713,0.007732,0.007011,8.9,8.4,9.0,8.6,7.8,9.2,9.1
Artezen Hotel,0.002795,0.022811,0.009157,0.000000,0.014684,0.045958,0.002236,0.013295,0.000000,0.000938,0.004409,0.001654,0.000000,0.000000,0.001510,0.009054,0.097611,0.015419,0.056667,0.005641,0.007458,0.018572,0.005113,0.003117,0.000000,0.010226,0.014323,0.012913,0.002328,0.000725,0.017919,0.003819,0.002976,0.003541,0.043260,0.008961,0.068453,0.014119,0.025879,0.039405,...,0.000000,0.000000,0.000000,0.000000,0.075833,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.094534,0.094797,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,9.6,9.1,9.5,9.5,8.9,9.4,9.4


In [23]:
w=endmodel.iloc[:,:230]
z=endmodel.iloc[:,230:]


In [24]:
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
xx = sc.fit_transform(w)

from sklearn import preprocessing


min_max_scaler = preprocessing.MinMaxScaler()
w = min_max_scaler.fit_transform(xx)
print(w[:,6])


print(xx[:,6])
w_train, w_test,z_train,z_test = train_test_split(w,z,test_size=0.33, random_state=0)

[0.07698791 0.22424082 0.1145312  0.35487673 0.17900301 0.03902782
 0.3083009  0.20097602 0.15373256 0.04838394 0.05487339 0.43345061
 0.24266296 0.18639172 0.21100283 0.27291946 0.19999315 0.170941
 0.29550735 0.14315203 0.12086303 0.13267484 0.22875609 0.2457499
 0.06000948 0.18030843 0.         0.20156037 0.12898146 0.
 0.07089008 0.13808793 0.12833285 0.15246941 0.16191905 0.15312798
 0.117085   0.35669376 0.31156975 0.         0.23839896 0.26547763
 0.14866028 0.16344152 0.42784347 0.26136743 0.2078585  0.2999565
 0.19980438 0.20683078 0.17480286 0.15535264 0.04091654 0.15021922
 0.28894837 0.09812998 0.09588319 0.18929853 0.11794541 0.15074403
 0.26469291 0.26459502 0.23453738 0.1341947  0.14121249 0.21809734
 0.19538555 0.08191635 0.20166248 0.11603254 0.12517057 0.16720063
 0.04104967 0.27303307 0.28801717 0.37102201 0.22195741 0.20871647
 0.23157081 0.17430987 0.13794973 0.25230023 0.27730347 0.10707392
 0.15333129 0.09827119 0.14007632 0.14327567 0.19114491 1.
 0.14887204 0.1

In [25]:
stafflist=endmodel["Staff"].tolist()
facilitieslist=endmodel["Facilities"].tolist()
cleanlist=endmodel["Cleanliness"].tolist()
comfortlist=endmodel["Comfort"].tolist()
moneylist=endmodel["Value for money"].tolist()
locationlist=endmodel["Location"].tolist()
wifilist=endmodel["Free WiFi"].tolist()

wifilist

[9.1,
 8.4,
 9.0,
 8.9,
 8.4,
 8.7,
 8.5,
 9.1,
 9.1,
 9.4,
 9.6,
 9.0,
 8.2,
 8.6,
 8.6,
 8.1,
 8.9,
 6.6,
 8.6,
 8.1,
 8.9,
 8.8,
 9.3,
 8.5,
 8.4,
 7.8,
 8.8,
 8.5,
 8.7,
 9.5,
 5.8,
 8.9,
 7.8,
 8.1,
 7.2,
 8.8,
 8.8,
 8.4,
 8.1,
 -1.0,
 8.5,
 8.5,
 9.1,
 8.7,
 8.9,
 8.6,
 8.0,
 8.1,
 8.6,
 8.7,
 8.2,
 8.6,
 8.8,
 8.2,
 8.5,
 8.2,
 7.5,
 8.5,
 8.0,
 8.6,
 8.8,
 8.4,
 8.3,
 8.1,
 8.4,
 8.9,
 8.5,
 8.7,
 7.9,
 8.9,
 8.4,
 8.0,
 -1.0,
 8.1,
 8.1,
 8.4,
 8.2,
 8.5,
 7.9,
 8.6,
 8.8,
 8.0,
 8.1,
 7.8,
 8.5,
 9.2,
 8.9,
 9.1,
 6.7,
 8.8,
 7.4,
 8.4,
 6.4,
 8.7,
 8.8,
 8.0,
 8.2,
 9.0,
 8.8,
 8.6,
 7.1,
 8.3,
 8.3,
 8.2,
 8.5,
 8.9,
 8.7,
 7.8,
 7.8,
 7.9,
 8.1,
 6.9,
 9.4,
 8.7,
 -1.0,
 8.6,
 -1.0,
 -1.0,
 7.5,
 7.9,
 8.3,
 8.9,
 8.7,
 9.0,
 7.5,
 7.5,
 -1.0,
 -1.0,
 8.6,
 8.2,
 8.6,
 7.3,
 10.0,
 8.3,
 8.9,
 8.8,
 8.5,
 -1.0,
 8.8,
 9.2,
 9.4,
 8.8,
 8.8,
 8.7,
 8.6,
 -1.0,
 8.8,
 8.4,
 9.1,
 9.1,
 -1.0,
 7.8,
 6.2,
 8.2,
 7.9,
 -1.0,
 8.3,
 10.0,
 8.7,
 8.1,
 8.6,
 7.7,
 8.9,
 7.5,
 8.

In [26]:
from sklearn import svm
from sklearn.metrics import mean_absolute_error
from sklearn import tree


In [27]:

X1 = featvect[205:892]
y1= wifilist[205:892]
regr1 =svm.SVR()
regr1.fit(X1, y1)
pr=regr1.predict(featvect[0:205])

mean_absolute_error(wifilist[0:205], pr)

1.1309472469449802

In [28]:
X1 = featvect[205:892]
y1= locationlist[205:892]
regr1 =tree.DecisionTreeRegressor()
regr1.fit(X1, y1)
pr=regr1.predict(featvect[0:205])

mean_absolute_error(locationlist[0:205], pr)

0.5360975609756095

In [29]:
X1 = featvect[205:892]
y1= moneylist[205:892]
regr1 =tree.DecisionTreeRegressor()
regr1.fit(X1, y1)
pr=regr1.predict(featvect[0:205])

mean_absolute_error(moneylist[0:205], pr)

0.5199999999999999

In [30]:
X1 = featvect[52:230]
y1= wifilist[52:230]
regr1 =svm.SVR()
regr1.fit(X1, y1)
pr=regr1.predict(featvect[0:52])

mean_absolute_error(wifilist[0:52], pr)

0.648169650838432

In [31]:
X_train = featvect[52:54]
y_train= [5,6]
X_test=featvect[53:55]
y_test=[7,8]
from sklearn.linear_model import LogisticRegression
logr = LogisticRegression(random_state=0)

logr.fit(X_train,y_train)


y_pred = logr.predict(X_test)
print(y_pred)
print(y_test)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_pred)
print("Logistic")
print(cm)

[6 5]
[7, 8]
Logistic
[[0 0 0 0]
 [0 0 0 0]
 [0 1 0 0]
 [1 0 0 0]]


In [32]:
X1 = featvect[205:892]
y1= comfortlist[205:892]
regr1 =tree.DecisionTreeRegressor()
regr1.fit(X1, y1)
pr=regr1.predict(featvect[0:205])

mean_absolute_error(comfortlist[0:205], pr)

0.7136585365853656

In [33]:
X1 = featvect[205:892]
y1= cleanlist[205:892]
regr1 =tree.DecisionTreeRegressor()
regr1.fit(X1, y1)
pr=regr1.predict(featvect[0:205])

mean_absolute_error(cleanlist[0:205], pr)

0.535121951219512

In [34]:
X1 = featvect[205:892]
y1= stafflist[205:892]
regr1 =tree.DecisionTreeRegressor()
regr1.fit(X1, y1)
pr=regr1.predict(featvect[0:205])

mean_absolute_error(stafflist[0:205], pr)

0.538048780487805

In [35]:
X1 = featvect[49:209]
y1= comfortlist[49:209]
regr1 =tree.DecisionTreeRegressor()
regr1.fit(X1, y1)
pr=regr1.predict(featvect[0:49])

mean_absolute_error(comfortlist[0:49], pr)

0.3326530612244898