In [None]:
# imports

# extra
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# local
from feature_helpers import *

# display settings
sns.set(style='whitegrid', palette='husl')
pd.set_option('display.max_colwidth', 400)
pd.set_option('display.precision', 4)
pd.set_option('display.width', 1600)

In [None]:
recipes = load_clean_data()

In [None]:
metrics = get_metrics(recipes)

In [None]:
metrics.sort_values('unique_count')

In [None]:
sns.set(style='whitegrid', palette='husl')
recipe_counts_plot = sns.catplot(x='recipe_count', y='cuisine', data=metrics, kind='bar', height=7, aspect=1.8,
                                 order=metrics.recipe_count.sort_values(ascending=False).index)
recipe_counts_plot.set(xticks=range(0, 10500, 500));

In [None]:
recipe_lengths_plot = sns.catplot(x='recipe_length', y='cuisine', data=metrics, kind='bar', height=7, aspect=1.5,
                                  order=metrics.recipe_length.sort_values(ascending=False).index)
recipe_lengths_plot.set(xticks=range(0, 15, 2));

In [None]:
head_counts_plot = sns.catplot(x='head_count', y='cuisine', data=metrics, kind='bar', height=7, aspect=1.5,
                              order=metrics.head_count.sort_values(ascending=False).index)
head_counts_plot.set(xticks=range(0, 122000, 20000));

In [None]:
fig, ax = plt.subplots(figsize=(16, 8))
sns.set_color_codes('pastel')
unique_counts_plot = sns.barplot(x='unique_count', y='cuisine', data=metrics, label='unique ings', color='b', ax=ax,
                              order=metrics.unique_count.sort_values(ascending=False).index)
rare_counts_plot = sns.barplot(x='rare_count', y='cuisine', data=metrics, label='rare ings', color='r', ax=ax,
                              order=metrics.unique_count.sort_values(ascending=False).index)
ax.legend(ncol=2, loc='lower right', frameon=True)
unique_counts_plot.set(xticks=range(0, 1100, 100));

In [None]:
# for friendlier plotting
def reshape_tfidfs(tfidfs):
    pairs = []
    for cuisine, vals in tfidfs.iterrows():
        pairs.extend([[cuisine, v] for v in vals if v > 0.0001])
    return pd.DataFrame(pairs, columns=['cuisine', 'tfidf'])
tfidfs = make_tfidfs(recipes.query('cuisine != "test"'))
smoothed_tfidfs = smooth_tfidfs(tfidfs, .6)
reshaped_tfidfs = reshape_tfidfs(tfidfs)
reshaped_smoothed_tfidfs = reshape_tfidfs(smoothed_tfidfs)

In [None]:
fig, ax = plt.subplots(figsize=(16, 8))
all_tfidfs_plot = sns.distplot(reshaped_tfidfs.tfidf, kde=False)
all_tfidfs_plot.set(xticks=np.arange(0, .46, .02));

In [None]:
fig, ax = plt.subplots(figsize=(16, 8))
max_tfidf = reshaped_smoothed_tfidfs.tfidf.max()
all_smoothed_tfidfs_plot = sns.distplot(reshaped_smoothed_tfidfs.tfidf, kde=False)
all_smoothed_tfidfs_plot.set(xticks=np.arange(0, max_tfidf + .05, .1));

In [None]:
cuisine_tfidfs_plot = sns.catplot(y='tfidf', x='cuisine', data=reshaped_tfidfs, kind='strip', height=7, aspect=2.5,
                             order=metrics.unique_count.sort_values(ascending=False).index)
cuisine_tfidfs_plot.set(yticks=np.arange(0, .45, .05));

In [None]:
max_tfidf = reshaped_smoothed_tfidfs.tfidf.max()
smoothed_cuisine_tfidfs_plot = sns.catplot(y='tfidf', x='cuisine', data=reshaped_smoothed_tfidfs, kind='strip', height=7, aspect=2.5,
                             order=metrics.unique_count.sort_values(ascending=False).index)
smoothed_cuisine_tfidfs_plot.set(yticks=np.arange(0, max_tfidf + .1, .05));