### Creating data for our example

### Logistic Regression

In [7]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

new_client = pd.read_csv('product.csv')
product_featured = pd.read_csv('product_featured.csv')

# all columns but last as inputs features for model
X = product_featured.ix[:, :-1].values
# last column to predict. input labels
y = product_featured.ix[:, -1].values

x = x.values  # what we want to classified

model = LogisticRegression()
model_trained = model.fit(X, y)
y_pred = model_trained.predict(x)

Y_pred = model_trained.predict(X)
print(classification_report(y, Y_pred))
print("Is this person valid for this Product? {}".format("Yes" if y_pred <= .5 else "No"))

ModuleNotFoundError: No module named 'sklearn'

In [4]:
### K-Nearest Neighbors

In [5]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors

def based_on_knn(product_featured, product, n_neighbors):
    """
    KNN
    """
    X = product_featured.values

    model = NearestNeighbors(n_neighbors=n_neighbors)
    trained_model = model.fit(X)

    return trained_model.kneighbors(product)


def main():
    # This is the number of closer products I want.
    n_neighbors = 5
    # This is the list of rated products
    product_featured = pd.read_csv('product_features.csv')
    # This is the product that user is currently viewing, so we need 5 closer to this.
    product = pd.read_csv('product.csv')

    result = based_on_knn(product_featured, product, n_neighbors)

    print('Product to label')
    print(product)
    print('Product closest by features are {}'.format(list(result[1][0])))
    print(product_featured.iloc[list(result[1][0])])


if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'sklearn'

In [6]:
### Collaborative filtering

In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD

def based_on_svd(ratings_df, products_df, name, corr_value):
    """
    1000 people with 50 products. We got 10000 recommendations
    so our matrix will have many empty places. No worries, it is not a problem
    """
    # Utility  1000x50  1000 people x 50 product
    ratings_pivot = pd.pivot_table(
        data=ratings_df,
        values='rating',
        index='userID',
        columns='productID',
        fill_value=0
    )

    # Transposing
    X = ratings_pivot.T  # 50x1000  50 product x 1000 people

    # Truncate to build a matrix with 'features' (12 but you can change) and get M = L x U
    SVD = TruncatedSVD(n_components=12, random_state=17)
    decomposed_matrix = SVD.fit_transform(X)  # L = 50x12  50 products x 12 features

    # correlation matrix on L Matrix, we get 50x50, diagonal matrix
    corr_mat = np.corrcoef(decomposed_matrix) 

    # get the product id input by user
    index_column = products_df[products_df['productName'] == name].index[0]

    # get the correlation values (vector) of the product respect to the rest,
    # If user search for corr. of 5th product, the 5th position of the vector is 1 (diagonal)
    product_corr_values = corr_mat[index_column]

    # mask values of dataframe with values of corr higher than....
    return products_df[product_corr_values >= corr_value]


def main():
    # client requirements, highly correlated product with name:
    name = 'CNIwdVaBLH'
    corr_value = 0.7

    # model source
    ratings_df = pd.read_csv('product_ratings.csv')
    products_df = pd.read_csv('product_list.csv')

    # feedback
    result = based_on_svd(ratings_df, products_df, name, corr_value)
    print(result)

if __name__ == "__main__":
    main()