In [1]:
import numpy as np
import pandas as pd

from sklearn.externals import joblib

In [2]:
ranking_rfc_fe = pd.read_csv('data/ranking_rfc_fe.csv')
ranking_rfc_fe.sort_values(by='kendall_tau', inplace=True)

X_val = np.load('data/X_val.npy')
df_cols = np.load('data/df_cols.npy')
X_val = pd.DataFrame(X_val, columns=df_cols)
y_val = np.load('data/y_val.npy')

model_features = np.load('data/rfc_fe_list.npy')
model = joblib.load('data/rnd_forest_best.pk1')

In [13]:
def rank_reviews_by_product_id(X_val, y_val,  model, model_features, product_id):
    """
    Rank the reviews of a Product by their helpfullness given a product id and a model

    Args:
        X_val(pd.DataFrame): product reviews data-set (validation)
        y_val(np.array): the labels of product's helpfullness
        model(sklearn.ensemble.forest.RandomForestClassifier): the model used for classifying the reviews
        model_features(np.array): the features used by the model
        product_id(str): the product id

    Returns:
        pd.DataFrame. Ranked Reviews

    """
    # Query all the reviews of product_id:
    products_reviews = X_val[X_val.ProductId == product_id]
    product_helpfulness_labels = y_val

    # Predict the helpfullness score of the reviews:
    predicted_helpfulness = model.predict(products_reviews[model_features])

    products_reviews['Predicted_Helpfulness'] = pd.Series(predicted_helpfulness, index=products_reviews.index)
    products_reviews = products_reviews.sort_values(
        by=['Predicted_Helpfulness', 'Score', 'Total_Reviews_by_Reviewer', 'WordsCount'], ascending=False)
    
    display_colums = ['Predicted_Helpfulness', 'Summary', 'Text', 'Score']
    

    return products_reviews[display_colums]


In [58]:
## Chosen Examples

# B001EQ56T4 - 0
# B004HLCKMY - 1
# B002Y2OWOC - 1

rank_reviews_by_product_id(X_val, y_val, model, model_features, 'B004HLCKMY')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Predicted_Helpfulness,Summary,Text,Score
14793,10.0,Addicting!!,"I just loved this Hot Chocolate. It was creamy, smooth, tasty, and just plain perfect. It reminded me of whipped Hot Chocolate like you'd get at Dunkin.<br />I bought Swiss Miss for Keurig to compare and wound up returning it. That tasted just like the envelopes.<br />I don't know where there is any chemical taste like some have mentioned.<br /><br />I'm very picky with what I drink and this is just smooth and creamy! Don't hesitate to try it.",5
11704,10.0,really good,"This is the best one we have found for our Kuerig Brewer........everyone loves it. Kids, husband, and me!",5
3964,0.0,The taste is so artificial,"This is my first review. Usually I am too lazy to leave feedback, but this product taste so horrible that I have to say something to warn people who want to try it. If you want to try it, don't! The taste is so artificial and it leaves strange taste in your mouth after you drink it. I don't know what to do with the rest. Worst buy ever.",1


In [49]:
ranking_rfc_fe.sort_values(by='kendall_tau', ascending=False)

Unnamed: 0,ProductId,kendall_tau
409,B000EMQFY4,1.0
2302,B004H3N2LU,1.0
2343,B004N8LMFM,1.0
143,B00061MVTG,1.0
1906,B002RSVOLM,1.0
350,B000E63LB0,1.0
351,B000E63LDS,1.0
1295,B001E52VJS,1.0
163,B0006ZN52E,1.0
1940,B002Y2OWOC,1.0


In [60]:
type(model)


sklearn.ensemble.forest.RandomForestClassifier