In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from notebooks.helpers.models.embedding_model import PredictionModel
from generate_embeddings import get_wine_dataframe, get_food_dataframe
# from notebooks.helpers.prep.view_embeddings import view_embeddings_of_ingredient
from generate_pairings import compute_embedding_food_ingredients, get_the_closest_embedding, get_food_embedding_dict, get_descriptor_frequencies, get_production_wines, generate_pairing_for_ingredients, normalize_production_wines
import networkx as nx
from pyvis.network import Network
from pathlib import Path

from app.notebooks.helpers.bot.kg_generation import create_wine_triplets, create_variety_descriptor_triplets, create_food_triplets

%matplotlib inline


In [None]:
KG = pd.concat([create_variety_descriptor_triplets(), create_wine_triplets(), create_food_triplets()], axis=0)

In [None]:
# KG = pd.DataFrame({'head': triplets['heads'], "tail": triplets['tails'], 'edges': triplets['edges']
# })
# G=nx.from_pandas_edgelist(KG, "head", "tail", edge_key = 'labels', create_using=nx.MultiDiGraph())

G = nx.DiGraph()
for _, row in KG.iterrows():
    G.add_edge(row['head'], row['tail'], label=row['edges'])
    # G.nodes[row['tail']]['label'] = row['node_label']

In [None]:
# for edge in G.edges(data=True):
#     print(edge)

In [None]:
pos = nx.spring_layout(G, seed=42, k=1.5)
labels = nx.get_edge_attributes(G, 'label')
plt.figure(figsize=(20, 20))
nx.draw(G, pos, font_size=8, node_size=200, node_color='lightblue', edge_color='gray', alpha=0.6)
# nx.draw_networkx_edge_labels(G, pos, font_size=3, label_pos=0.3, verticalalignment='baseline')
plt.title('Knowledge Graph')
plt.show()

In [None]:
net = Network(notebook = True, cdn_resources = "remote",
                bgcolor = "#222222",
                font_color = "white",
                height = "750px",
                width = "100%",
                select_menu = True,
                filter_menu = True,
)
net.show_buttons(filter_="physics")
net.from_nx(G)
net.show("nx.html")

In [None]:
# net.set_options()

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from notebooks.helpers.prep.view_embeddings import reduce_ingredients_dimension, plot_pca_vectors_2d, reduce_embedding_dimensions
from generate_pairings import get_production_wines, nparray_str_to_list

In [None]:
wine_average_embeddings_path = Path(
        "./app/notebooks/helpers/models/wine_average_embeddings.pkl"
    )

with wine_average_embeddings_path.open("rb") as f:
    wine_average_embeddings = pickle.load(f) 

wines_df = get_production_wines()

In [None]:
# wines_df.loc[wines_df.index.str.contains('Ries') == True]
wines_df.index[::24]

In [None]:
def determine_variety(instance):
    varieties = ['chardonnay',]
    for variety in varieties:
        if variety in instance.lower():
            return variety

    return np.nan

In [None]:
wines_df['variety'] = wines_df.apply(lambda x: determine_variety(x.name),axis = 1)

In [None]:
# wines_df['country'].iloc[0]

In [None]:
wines_df.dropna(subset=['variety'], inplace=True)
wines_df['country'] = wines_df.apply(lambda x: x.name.split(',')[-1] ,axis = 1)
wines_df = wines_df[wines_df['country'].isin([' France', ' USA', ' Chile', ' New Zealand'])]

In [None]:
wines_df['variety & country'] = wines_df['variety'] + ' ' + wines_df['country']

In [None]:
wines_df = wines_df[['aroma', 'variety & country']]

In [None]:
wines_df['aroma'] = wines_df['aroma'].apply(nparray_str_to_list)

In [None]:
wines_df['variety & country']

In [None]:
variety_country = {value:[] for value in wines_df['variety & country'].values}

In [None]:
variety_country.keys()

In [None]:
for value, embedding in zip(wines_df['variety & country'].values, wines_df['aroma'].values):
    variety_country[value].append(embedding)

In [None]:
from numpy import concatenate


embeddings_to_reduce = np.stack(np.concatenate(
    [embeddings for embeddings in variety_country.values()]))


reduced_embeddings = reduce_embedding_dimensions(embeddings_to_reduce, 2)

count = 0
for ingredient, embeddings in variety_country.items():
    size = len(embeddings)
    variety_country[ingredient] = reduced_embeddings[count : count + size]
    count += size

fig = plt.figure(figsize=(6, 6))
ax = fig.add_subplot()
for ingredient, pca_components in variety_country.items():
    x = []
    y = []
    for pca_component in pca_components:
        x.append(pca_component[0])
        y.append(pca_component[1])

    ax.scatter(
        x,
        y,
        s=20,
        label=ingredient,
        marker="x" if "target" not in ingredient else "x",
    )
plt.title("PCA on Chardonnay Wines from various countries")
plt.legend()
ax.set_xlabel("1st Component")
ax.set_ylabel("2nd Component")
plt.show()


In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from notebooks.helpers.prep.view_embeddings import reduce_ingredients_dimension, plot_pca_vectors_2d, reduce_embedding_dimensions
from generate_pairings import get_production_wines, nparray_str_to_list, get_food_taste_distances_info

In [2]:
def normalize(df, cols_to_normalize):
    for feature_name in cols_to_normalize:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        df[feature_name] = df[feature_name].apply(
            lambda x: (x - min_value) / (max_value - min_value)
        )
    return df

In [30]:
food_average_distances, food_tastes_distances = get_food_taste_distances_info()

In [31]:
food_distances = pd.DataFrame({'distances_piquant': food_tastes_distances['piquant'].values(),'distances_sweet': food_tastes_distances['sweet'].values(), 'distances_acid': food_tastes_distances['acid'].values(), 'distances_bitter': food_tastes_distances['bitter'].values(), 'distances_weight': food_tastes_distances['weight'].values(), 'distances_fat': food_tastes_distances['fat'].values(), 'distances_salt': food_tastes_distances['salt'].values() }, index=food_tastes_distances['aroma'].keys())

In [33]:
food_distances = normalize(food_distances, cols_to_normalize=['distances_piquant', 'distances_fat', 'distances_sweet', 'distances_acid', 'distances_bitter','distances_weight', 'distances_salt'])

In [34]:
food_distances.head()

Unnamed: 0,distances_piquant,distances_sweet,distances_acid,distances_bitter,distances_weight,distances_fat,distances_salt
acorn,0.777222,0.683392,0.777929,0.867736,0.811885,0.809319,0.797112
acorn_squash,0.415016,0.33778,0.257909,0.551918,0.361879,0.336663,0.329133
adobo_sauce,0.501391,0.534494,0.657404,0.509118,0.704316,0.658078,0.711564
adobo_seasoning,0.546041,0.656928,0.766309,0.615732,0.723874,0.713944,0.85443
agave,0.787685,0.778019,0.852231,0.787985,0.851994,0.873779,0.814476


In [54]:
wines_df = food_distances
taste = 'distances_bitter'
wines_df[taste].mean()

0.5979802462770322

In [55]:
print(wines_df.value_counts(wines_df[taste].apply(lambda x: x > 0 and x <= 0.5)))
print(wines_df.value_counts(wines_df[taste].apply(lambda x: x > 0.5 and x <= 0.6)))
print(wines_df.value_counts(wines_df[taste].apply(lambda x: x > 0.6 and x <= 0.75 )))
print(wines_df.value_counts(wines_df[taste].apply(lambda x: x > 0.75 and x <= 1 )))
# print(wines_df.value_counts(wines_df['salt'].apply(lambda x: x > 0.8 and x <= 1 )))

distances_bitter
False    1392
True      589
Name: count, dtype: int64
distances_bitter
False    1505
True      476
Name: count, dtype: int64
distances_bitter
False    1507
True      474
Name: count, dtype: int64
distances_bitter
False    1540
True      441
Name: count, dtype: int64


In [10]:
print(food_distances['distances_weight']food_distances['distances_weight'].count())

1981
