In [None]:
import numpy as np
import pandas as pd
import logging
import ipywidgets as widgets
import matplotlib.pyplot as plt

In [None]:
import tables

In [None]:
import sompy
from sompy.sompy import SOMFactory

In [None]:
from tfprop_sompy.jupyter_integration.cluster_inspector import sort_materials_by_cluster, cluster_tabs

In [None]:
from tfprop_sompy.tfprop_vis import render_posmap_to_axes, kmeans_clust, show_posmap, ViewTFP

In [None]:
# This makes all the loggers stay quiet unless it's important
logging.getLogger().setLevel(logging.WARNING)

In [None]:
## CONSTANTS
CODEBOOK_FILE = 'som_codemat_22props_modified_19-04-30.h5'
# CODEBOOK_FILE = 'som_codemat_23props_19-03-01.h5'
KM_CLUSTERS = 12

In [None]:
stored_cb_matrix = pd.read_hdf(CODEBOOK_FILE, 'sm_codebook_matrix')
stored_mapsize = pd.read_hdf(CODEBOOK_FILE, 'sm_codebook_mapsize').values
mats_df = pd.read_hdf(CODEBOOK_FILE, 'sm_data')

# FIXME:
# We do a hack using the "pytables" library in order to extract the information
# For some reason pandas doesn't like to read object series out of h5 files
stored_columns = None
stored_matfamilies = None
with tables.open_file(CODEBOOK_FILE, "r") as store:
    # We normally get byte strings from this
    # The mapping operation turns them all into unicode strings ready for presentation
    stored_columns = list(map(lambda x: x.decode('utf-8'), store.root.sm_codebook_columns.property_names.read()))
    stored_matfamilies = list(map(lambda x: x.decode('utf-8'), store.root.sm_codebook_matfamilies.material_families.read()))

In [None]:
mats_df["Material_family"] = stored_matfamilies

In [None]:
sm = SOMFactory.build(mats_df[stored_columns].values, 
                mapsize=(*stored_mapsize,),
                normalization="var", 
                initialization="pca", 
                component_names=stored_columns)

In [None]:
sm.codebook.matrix = stored_cb_matrix.values

In [None]:
def create_posmap(mysom: sompy.sompy.SOM, num_clusters: int=KM_CLUSTERS):
    cl_labels = kmeans_clust(mysom, n_clusters=num_clusters)

    # plot positioning map with clustered groups
    show_posmap(mysom, mats_name_df, mats_name_df,
                num_clusters, cl_labels,
                show_data=False, labels=False)

In [None]:
cl_labels = kmeans_clust(sm, KM_CLUSTERS)

In [None]:
heatmap_size = (20, 20)
heatmap_col_sz = 4
gauss_alpha = None

cmap = plt.get_cmap('RdYlBu_r')  # set color map
viewTFP = ViewTFP(*heatmap_size, '', stdev_colorscale_coeff=1., text_size=14)

In [None]:
my_out = widgets.Output()

# No scaling
viewTFP.knee_value = 0.0
with my_out:
    print("Linear scaling")
    viewTFP.show(sm, cl_labels, col_sz=heatmap_col_sz,
                         which_dim='all', desnormalize=True, col_norm='mean',
                         cmap=cmap, isOutHtmap=False)
my_out

In [None]:
my_out = widgets.Output()
cmap = plt.get_cmap('RdYlBu_r')  # set color map

# No scaling
viewTFP.knee_value = 0.0
with my_out:
    print("Log scaling")
    viewTFP.show(sm, cl_labels, col_sz=heatmap_col_sz,
                         which_dim='all', desnormalize=True, col_norm='mean',
                         cmap=cmap, normalizer="log")
my_out

In [None]:
# viewTFP2 = ViewTFP(*(7, 7), '', stdev_colorscale_coeff=1,text_size=14)
# for i, p in enumerate(stored_columns):
#     viewTFP2.show(sm, cl_labels, col_sz=1,
#                      which_dim=i, desnormalize=True, col_norm='mean',
#                      cmap=cmap, normalizer="log", isOutHtmap=False)

In [None]:
# from sompy.visualization.mapview import View2D

# my_out = widgets.Output()
# cmap = plt.get_cmap('RdYlBu_r')  # set color map

# view2d = View2D(*heatmap_size, '', stdev_colorscale_coeff=1., text_size=14)
# # No scaling
# viewTFP.knee_value = 0.0
# with my_out:
#     print("Log scaling")
#     viewTFP.show(sm, cl_labels, col_sz=heatmap_col_sz,
#                          which_dim='all', desnormalize=True, col_norm='mean',
#                          cmap=cmap, normalizer="log")
# my_out

In [None]:
%matplotlib inline
my_dataframe = mats_df
clusters_list = sort_materials_by_cluster(sm, my_dataframe, cl_labels)

# This makes it so it will display the full lists
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

# This should be the last statement of the cell, to make it display
# That, or assign the return value to a variable, and have that variable be the final expression in a cell
cluster_tabs(sm, my_dataframe, clusters_list, cl_labels)

In [None]:
from tfprop_sompy.tfprop_vis import UMatrixTFP

umat_size = (50, 50)

umat = UMatrixTFP(*umat_size, 'U-matrix')

umat.show(sm, my_dataframe, my_dataframe, '', cmap=cmap)
None

In [None]:
# Run cells below this one manually
assert False

In [None]:
from tfprop_sompy.jupyter_integration.cluster_inspector import make_cluster_graph
from tfprop_sompy.tfprop_vis import dataframe_to_coords, render_points_to_axes

In [None]:
fig, ax = make_cluster_graph(sm, cl_labels)

In [None]:
# Replace mats_list_copper with an appropriate list of materials you want to look at
render_points_to_axes(ax, dataframe_to_coords(sm, my_dataframe.loc[mats_list_copper]))

In [None]:
fig

In [None]:
crds = dataframe_to_coords

In [None]:
type(my_dataframe)

In [None]:
my_dataframe.filter(clusters_list[3], axis='index')

In [None]:
from importlib import reload
from tfprop_sompy.jupyter_integration import cluster_inspector
reload(cluster_inspector)
from tfprop_sompy.jupyter_integration.cluster_inspector import sort_materials_by_cluster, cluster_tabs
None

In [None]:
from tfprop_sompy import tfprop_vis
reload(tfprop_vis)
None

In [None]:
from importlib import reload
from tfprop_sompy.utils import data
reload(data)
from tfprop_sompy.utils.data import calculate_SOM_radius, calculate_euclidean_radius
None

In [None]:
sm.project_data(my_dataframe[stored_columns].values)

In [None]:
np.all(sm.project_data(sm.data_raw) == sm.project_data(my_dataframe[stored_columns].values))

In [None]:
my_dataframe[stored_columns].values.shape

In [None]:
my_dataframe.filter(regex="[Cc]admium", axis="index")

In [None]:
my_dataframe.loc[my_dataframe.groupby("Material_family").groups['Metal (ferrous)']]

In [None]:
pd.Index([])

In [None]:
grouped_index = pd.Index([])
grouped_df = my_dataframe.groupby("Material_family")
for (matgroup, indices) in grouped_df.groups.items():
    if matgroup.startswith("Metal"):
        # 
        grouped_index = pd.Index([*grouped_index, *grouped_df.groups[matgroup]])

my_dataframe.loc[grouped_index]

In [None]:
sm.bmu_ind_to_xy(sm.project_data(my_dataframe.filter(regex="cadmium", axis="index")[stored_columns].values))

In [None]:
mats_list = list(my_dataframe.filter(regex="[Cc]opper", axis="index").index)

In [None]:
from tfprop_sompy.utils.data import calculate_SOM_radius, calculate_euclidean_radius

In [None]:
mats_list_copper = list(my_dataframe.filter(regex="([Cc]opper|Cu[^s])", axis="index").index)

In [None]:
mats_list_steel = list(my_dataframe.filter(regex="[Ss]teel", axis="index").index)

In [None]:
mats_list_cadmium = list(my_dataframe.filter(regex="[Cc]admium", axis="index").index)

In [None]:
mats_list_unlike = mats_list_copper + mats_list_steel

In [None]:
calculate_SOM_radius(my_dataframe, mats_list_copper, sm)

In [None]:
calculate_euclidean_radius(my_dataframe, mats_list_copper, stored_columns)

In [None]:
calculate_SOM_radius(my_dataframe, mats_list_steel, sm)

In [None]:
calculate_euclidean_radius(my_dataframe, mats_list_steel, stored_columns)

In [None]:
calculate_SOM_radius(my_dataframe, mats_list_unlike, sm)

In [None]:
calculate_euclidean_radius(my_dataframe, mats_list_unlike, stored_columns)

In [None]:
calculate_SOM_radius(my_dataframe, mats_list_cadmium, sm)

In [None]:
calculate_euclidean_radius(my_dataframe, mats_list_cadmium, stored_columns)

In [None]:
sm.codebook.matrix

In [None]:
sm.codebook.matrix.shape

In [None]:
sm._data