In [0]:
# imports
import pickle
import pandas as pd
import numpy as np
from heapq import nsmallest

# scikit-learn package
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc, roc_auc_score
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics.pairwise import cosine_similarity


# Color, images and plotting
from PIL import Image
import requests
from io import BytesIO
import matplotlib.pyplot as plt
from skimage.color import rgb2lab, deltaE_cie76


In [0]:
category = 'blush'

## Load data and make more categories

In [0]:
# Load cleaned dataframe
df = pd.read_csv('/data/cleaned_data/{}/df.csv'.category)

# Include only relevant columns for model
relevant_cols = ['eye_color', 'hair_color', 'skin_tone', 'red', 'green', 'blue', 'rating']

# Transform non-numeric category values to numeric for compatibility with sklearn 
dicts = []
for category in ['eye_color', 'hair_color', 'skin_tone']:
    df[category] = df[category].astype('category')
    d = dict(enumerate(df[category].cat.categories))
    df[category] = df[category].cat.codes
    dicts.append(d)

# Save dictionaries for feeding new data to be predicted
eye_dict = dicts[0]
hair_dict = dicts[1]
skin_dict = dicts[2]

# Make copy of dataframe, and truncate copy to only include relevant columns
df_copy = df.copy()
df_copy = df_copy[relevant_cols]

## Construct classifier

In [0]:
# Create training and testing data sets
train, test = train_test_split(blush_df_copy, random_state=0)

X_train = train.drop('rating', axis=1)
y_train = train['rating']

X_test = test.drop('rating', axis=1)
y_test = test['rating']

# Train random forest classification model
model = RandomForestClassifier().fit(X_train, y_train)

# Use model to predict ratings
y_predict = model.predict(X_test)

## Evaluate model

In [0]:
# Accuracy on training set
print("Train accuracy: {}".format(model.score(X_test, y_test).round(2)))

# Accuracy on testing set
print("Test accuracy: {}".format(accuracy_score(y_test, y_predict).round(2)))

## Feature Importance

In [0]:
# Feature importance dataframe
imp_df = pd.DataFrame({'feature': X_train.columns.values,
                       'importance': model.feature_importances_})
 
# Reorder by importance
ordered_df = imp_df.sort_values(by='importance')
imp_range=range(1,len(imp_df.index)+1)
 
## Barplot with confidence intervals
height = ordered_df['importance']
bars = ordered_df['feature']
y_pos = np.arange(len(bars))

# Create horizontal bars
plt.barh(y_pos, height)
 
# Create names on the y-axis
plt.yticks(y_pos, bars)

plt.xlabel("Mean reduction in tree impurity in random forest")

plt.tight_layout()
# Show graphic
plt.show()

## Test User

In [0]:
# PANTONE 18-1443 TCX
# Redwood
r_u = 166
g_u = 89
b_u = 75


# # PANTONE PQ-17-1928TCX
# # Bubblegum
# r_u = 234
# g_u = 115
# b_u = 141

# # PANTONE 19-3832 TCX
# # Navy Blue
# r_u = 64
# g_u = 63 
# b_u = 111

lab_u = rgb_to_lab(r_u, g_u, b_u)
make_swatch(r_u, g_u, b_u)

### Find color matches

In [0]:
matches = find_closest_colors(blush_df, lab_u)

print('########## TOP 3 PICKS ##########')
print('Match 1: {0} in shade{1}'.format(matches.product_name.iloc[0],
                                                      matches.review_color.iloc[0]))
print('Match 2: {0} in shade{1}'.format(matches.product_name.iloc[1],
                                                      matches.review_color.iloc[1]))
print('Match 3: {0} in shade{1}'.format(matches.product_name.iloc[2],
                                                      matches.review_color.iloc[2]))

## Predict ratings

In [0]:
# give user input
eye_color_u = 'Green'
skin_tone_u = 'Tan'
hair_color_u = 'Black'

eye_mapped, hair_mapped, skin_mapped = map_to_dict(eye_color_u, hair_color_u, 
                                             skin_tone_u, eye_dict, hair_dict, skin_dict)

predicted_stars = np.zeros(3)
for ii in range(len(matches)):
  r = matches.iloc[ii].red
  g = matches.iloc[ii].green
  b = matches.iloc[ii].blue
  predicted_stars[ii] = model.predict([[eye_mapped,hair_mapped, skin_mapped, r, g, b]]) 

In [0]:
predicted_stars

In [0]:
swatches = [make_swatch(r_u, g_u, b_u)]
labels = ['User Input']

for ii in range(len(matches)):
  swatches.append(make_swatch(matches.iloc[ii].red, 
                              matches.iloc[ii].green, 
                              matches.iloc[ii].blue))
  labels.append('{0} {1}'.format(matches.product_name.iloc[ii], 
                                 matches.review_color.iloc[ii]))


plt.figure(figsize=(20, 10))
for ii in range(len(swatches)):
    plt.subplot(1, len(swatches), ii+1)
    plt.imshow(swatches[ii],)
    plt.title(labels[ii])

plt.tight_layout()