## Package Import, Output Specification

In [1]:
### Todo ###
############
    # Different colors for predicted d states? rows on ptable?
    # Drop-down for plotting different axes?
    # DataRangeSlider for atomic number of transition metal
    # clickable, selectable for transition metals?

import numpy as np
import pandas as pd
from bokeh.plotting import *
from bokeh.layouts import row, column
from bokeh.models import ColumnDataSource, CDSView, CustomJS, Slider, Button, CheckboxButtonGroup,\
                         DataTable, TableColumn, NumberFormatter, Select
from bokeh.models.filters import Filter, GroupFilter
from bokeh import events
from bokeh.io import curdoc, show
from bokeh.models.tools import HoverTool
from bokeh.palettes import Colorblind4
from bokeh.models.glyphs import Text

# output_notebook()
# output_file("giis_of_icsd_tmc.html", title='GIIs of ICSD TMCs')

## Data Import, Preparation, Filtering

In [2]:
### Import data
df = pd.read_csv('data/features_icsd_tmetal-compounds.csv')

### Filter out heteroanion, hetero-transition-metal compounds
df = df[df.heteroanion == False]
df['heterotm'] = [tm1 != tm2 for tm1, tm2 in zip(df.tm1, df.tm2)]
df = df[df.heterotm == False]
    # The following are only relevant to hetero transition metal compounds, which we filter out
    # df['mean_cn'] = [cn1 + cn2 for cn1, cn2 in zip(df.cn1, df.cn2)]
    # df['mean_n'] = [n1 + n2 for n1, n2 in zip(df.n1, df.n2)]
    # df['mean_ionic_r'] = [ir1 + ir2 for ir1, ir2 in zip(df.ionic_r_1, df.ionic_r_2)]
df['tm_row'] = [n + 1 for n in df.n1]
    
### Select only useful columns
df = df[['formula', 'sg_sym', 'sg_num',
         'anions', 'tm1', 
         'mm_dist', 'delta', 'normed_dist', 'ionic_r_1',
         'oxi1', 'd_state1', 'pred_d1', 
         'mn1',  'gii', 'n_elems', 'cn1']].dropna()
df.rename({'tm1':'tm', 'ionic_r_1':'ionic_r', 'oxi1':'oxi', 'n1':'n', 
           'd_state1':'n_d_elec', 'pred_d1':'pred_d'})

### Debug
print(f'Anions present: {set(df.anions)}')
print(f'\nTransition metals present: {set(df.tm1)}')

### Partitioning data
anion_set = ['N','O','S','Se']
anion_names = ['Nitrides', 'Oxides','Sulfides', 'Selenides']
markers = ['x','circle', 'plus', 'triangle']
colors = ['blue', 'red', 'green', 'dimgrey'] #Colorblind4

df = df[np.isin(df.anions, anion_set)]

### Convert to CDS
source = ColumnDataSource(data=df)
df.keys()

Anions present: {'Cl', 'Se', 'Sb', 'O', 'P', 'F', 'N', 'S'}

Transition metals present: {'Nb', 'Ta', 'Sc', 'Ir', 'Zn', 'Re', 'Rh', 'Pt', 'Cu', 'Tc', 'Cr', 'Zr', 'Hg', 'Cd', 'Co', 'Os', 'Mn', 'Au', 'Ru', 'Ti', 'Hf', 'Fe', 'Pd', 'Mo', 'W', 'Y', 'V', 'Ni', 'Ag'}


Index(['formula', 'sg_sym', 'sg_num', 'anions', 'tm1', 'mm_dist', 'delta',
       'normed_dist', 'ionic_r_1', 'oxi1', 'd_state1', 'pred_d1', 'mn1', 'gii',
       'n_elems', 'cn1'],
      dtype='object')

## Plotting

In [5]:
def select_compounds():
    """Returns the data filtered by the global filters (derived from the widgets)"""
#     genre_val = genre.value
#     director_val = director.value.strip()
#     cast_val = cast.value.strip()
#     selected = df[
#         (movies.Reviews >= reviews.value) &
#         (movies.BoxOffice >= (boxoffice.value * 1e6)) &
#         (movies.Year >= min_year.value) &
#         (movies.Year <= max_year.value) &
#         (movies.Oscars >= oscars.value)
#     ]
#     if (genre_val != "All"):
#         selected = selected[selected.Genre.str.contains(genre_val)==True]
#     if (director_val != ""):
#         selected = selected[selected.Director.str.contains(director_val)==True]
#     if (cast_val != ""):
#         selected = selected[selected.Cast.str.contains(cast_val)==True]
    selected = df
    return selected


def update():
    """Updates the underlying data to reflect the filter and axes selections"""
    df = select_compounds()
    x_name = axis_map[x_axis.value]
    y_name = axis_map[y_axis.value]
    scatter_plot.xaxis.axis_label = x_axis.value
    scatter_plot.yaxis.axis_label = y_axis.value
    source.data = dict(
        x=df[x_name],
        y=df[y_name])
#         color=df["color"],
#         title=df["Title"])

def format_scatter_plot(scatter_plot):
    scatter_plot.title.align = 'center'
    scatter_plot.line([-999,999],[0,0], color='black')
    scatter_plot.line([0,0],[-999,999], color='black')
    ### Shaded area
    scatter_plot.patch([-999,-999,999,999],[-999,0.2,0.2,-999],alpha=0.2, line_width=0, 
            legend_label='GII \u2264 0.2', muted_alpha=0)
    ### Legend
    scatter_plot.legend.background_fill_alpha = 0.8
    scatter_plot.legend.click_policy="hide"
    ### Fonts
    scatter_plot.xaxis.axis_label_text_font_style = "normal"
    scatter_plot.yaxis.axis_label_text_font_style = "normal"
    scatter_plot.title.text_font_size = f'{int(STD_FONT_SIZE*1.2)}pt'
    scatter_plot.xaxis.axis_label_text_font_size = f'{STD_FONT_SIZE}pt'
    scatter_plot.yaxis.axis_label_text_font_size = f'{STD_FONT_SIZE}pt'
    scatter_plot.xaxis.major_label_text_font_size = f'{int(STD_FONT_SIZE*0.8)}pt'
    scatter_plot.yaxis.major_label_text_font_size = f'{int(STD_FONT_SIZE*0.8)}pt'
    scatter_plot.legend.label_text_font_size = f'{STD_FONT_SIZE}pt'
    
    
### Specify global presets
TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select,lasso_select"
SIZING_MODE = 'stretch_both'
STD_FONT_SIZE = 16




### Creating configurable axes
axis_map = {'Global Instability Index': 'gii',
            'Metal-Metal Distance (\u212B)': 'mm_dist',
            'Normalized M-M Distance': 'normed_dist',
            'M-M Distance - Alloy Bond Length' : 'delta',
            'TM Coordination #' : 'cn',
            'TM Periodic Row #' :'tm_row',
            'Spacegroup #' : 'sg_num'
}

### Creating controls
x_axis = Select(title="X Axis", options=sorted(axis_map.keys()), value="Metal-Metal Distance (\u212B)")
y_axis = Select(title="Y Axis", options=sorted(axis_map.keys()), value="Global Instability Index")
controls = [x_axis, y_axis]
for control in controls:
    control.on_change('value', lambda attr, old, new: update())

### Figure, title
scatter_plot = figure(tools=TOOLS, x_range=(0, 9), y_range=(-0.01, 2), 
           title="Metal-Metal Distance vs. GII", sizing_mode=SIZING_MODE)
scatter_plot.scatter('x', 'y', source=source, fill_alpha=0.01, line_alpha=0.5)#, 
#                      legend_label=label, color='color', marker=marker, size=9, line_width=1.5)


### Run
update()
format_scatter_plot(scatter_plot)
control_row = row(*controls)
p = column(scatter_plot, control_row)
show(p)

# curdoc().add_root(p)
# curdoc().title = "TM Compounds Data Explorer"

RuntimeError: Models must be owned by only a single document, ColumnDataSource(id='1001', ...) is already in a doc

In [None]:
### Specify presets
TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select,lasso_select"
SIZING_MODE = 'stretch_both'


### Figure, title
scatter_plot = figure(tools=TOOLS, width=1550, height=850, x_range=(0, 9), y_range=(-0.01, 2), 
           title="Metal-Metal Distance vs. GII", sizing_mode=SIZING_MODE)
scatter_plot.title.align = 'center'

### Points
for anion, label, color, marker in zip(anion_set, anion_names, colors, markers):
    view = CDSView(source=source, filters=[GroupFilter(column_name='anions', group=anion)])
    scatter_plot.scatter('mm_dist', 'gii', source=source, view=view, fill_alpha=0.01, line_alpha=0.5, 
                legend_label=label, color=color, marker=marker, size=9, muted_alpha=0, line_width=1.5)

### Axes lines, labels
scatter_plot.line([-999,999],[0,0], color='black')
scatter_plot.line([0,0],[-999,999], color='black')
scatter_plot.xaxis.axis_label = "Metal-Metal Distance (\u212B)"
scatter_plot.yaxis.axis_label = "Global Instability Index"
scatter_plot.xaxis.axis_label_text_font_style = "normal"
scatter_plot.yaxis.axis_label_text_font_style = "normal"

### Shaded area
scatter_plot.patch([-999,-999,999,999],[-999,0.2,0.2,-999],alpha=0.2, line_width=0, 
        legend_label='GII \u2264 0.2', muted_alpha=0)

### Legend
scatter_plot.legend.background_fill_alpha = 0.8
scatter_plot.legend.click_policy="hide"

### Fonts
STD_FONT_SIZE = 16
scatter_plot.title.text_font_size = f'{int(STD_FONT_SIZE*1.2)}pt'
scatter_plot.xaxis.axis_label_text_font_size = f'{STD_FONT_SIZE}pt'
scatter_plot.yaxis.axis_label_text_font_size = f'{STD_FONT_SIZE}pt'
scatter_plot.xaxis.major_label_text_font_size = f'{int(STD_FONT_SIZE*0.8)}pt'
scatter_plot.yaxis.major_label_text_font_size = f'{int(STD_FONT_SIZE*0.8)}pt'
scatter_plot.legend.label_text_font_size = f'{STD_FONT_SIZE}pt'

### Histograms on the edge
# create the horizontal histogram
# hhist, hedges = np.histogram(x, bins=20)
# hzeros = np.zeros(len(hedges)-1)
# hmax = max(hhist)*1.1

### Delta vs GII
# right = figure(tools=TOOLS, width=400, height=450, x_range=(0.4, 3), y_range=left.y_range, 
#            title="Normalized Metal-Metal Distance vs. GII")
# right.title.align = 'center'
# right.circle('x1', 'y', source=source, color='black', fill_alpha=0, line_alpha=0.2)
# right.xaxis.axis_label = "Metal-Metal Distance / Pure Metal "
# right.yaxis.visible = False
# right.xaxis.axis_label_text_font_style = "normal"
# # Shaded area
# right.patch([-9999,-9999,9999,9999],[-0.01,0.2,0.2,-0.01],alpha=0.2, line_width=0)

### Tooltips
hover = HoverTool()
hover.tooltips ="""
    <div>
        <h3><center>@formula</center></h3>
        <div><strong>Spacegroup:    </strong>@sg_sym</div>
        <div><strong>M-M Dist.:    </strong>@mm_dist \u212B</div>
        <div><strong>GII:    </strong>@gii</div>
        <div><strong>Atomic #:      </strong>@n1</div>
        <div><strong>Pred. d state: </strong>@pred_d1</div>
        <div><strong>Mendeleev #:   </strong>@mn1</div>
    </div>
"""
scatter_plot.add_tools(hover)


### SELECTORS
### Plotting anion selector
# anion_labels = list(set(df.anions))
# anion_buttons = CheckboxButtonGroup(labels=anion_labels)
# anion_buttons.js_on_click(CustomJS(code="""
#     console.log('anion_buttons: active=' + this.active, this.toString())
# """))

### Plotting transition metal selector
# tm_labels = list(set(df.tm1).union(set(df.tm2)))
# tm_labels = 
# tm_buttons = CheckboxButtonGroup(labels=tm_labels)
# tm_buttons.js_on_click(CustomJS(code="""
#     console.log('tm_buttons: active=' + this.active, this.toString())
# """))


### Creating layout, showing
# p = gridplot([[scatter_plot]], sizing_mode=SIZING_MODE)#, right]])
# selectors = row(tm_buttons)
# layout = column(p, sizing_mode=SIZING_MODE)#, selectors)
# show(layout)

In [None]:
import pandas as pd
from bokeh.io import curdoc
from bokeh.layouts import row, column
from bokeh.models import ColumnDataSource, DataRange1d, Select
from bokeh.plotting import figure, show
from bokeh.palettes import inferno

variables = ['sunlight_hours']#, 'Sunrise', 'Sunset']

def get_dataset(src, name, plottype):

    df = pd.DataFrame()
    df['date'] = pd.to_datetime(t)
    df['sun'] = src[name]

    return ColumnDataSource(data=df)

def make_plot(source, title, city):
    plot = figure(x_axis_type="datetime", plot_width=800, tools="", toolbar_location=None)
    plot.title.text = title
    plot.line('date','sun',line_width=2,line_color=clrs[1],legend=city,source=source)

# fixed attributes
plot.xaxis.axis_label = None
plot.yaxis.axis_label = "Sunlight [hours]"
plot.axis.axis_label_text_font_style = "bold"
#plot.x_range = DataRange1d(range_padding=0.0)
plot.grid.grid_line_alpha = 0.3

return plot

def update_plot(attrname, old, new):
    new_city = city_select.value
    plot.title.text = "Sunlight data for " + new_city

    src_update = get_dataset(sunlight, new_city, plottype_select.value)
    source.data.update(src_update.data)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

i_city = 'Toronto'
plottype = 'sunlight_hours'

# make example data
yr = 2018
sites = pd.Series(['Resolute','Edmonton','Toronto'])
provs = pd.Series(['Nunavut','Alberta','Ontario'])
sunlight = pd.DataFrame()
sunlight['Toronto']  = pd.Series( [10,11,12,13,12,11,10] )
sunlight['Edmonton'] = pd.Series( [6,8,12,14,11,7,5] )
sunlight['Resolute'] = pd.Series( [4,6,10,16,11,5,2] )

t =  pd.date_range('1-1-' + str(yr),periods=7,freq='m')
N =  len(sites)

clrs = inferno(N)

cities = {}
for i in range(0,N):
    cities.update({sites[i]: {'city': sites[i], 'province': 
provs[i],'sun_hrs':sunlight[sites[i]],}})

city_select = Select(value=i_city, title='City', options=sorted(cities.keys()))
plottype_select = Select(value=plottype, title='Plot type', options=['Sunlight']) #, 'Sunrise', 'Sunset'])

source = get_dataset(sunlight, cities[i_city]['city'], plottype)

plot = make_plot(source, "Sunlight data for ",i_city)# + cities[city]['city'])

city_select.on_change('value', update_plot)
plottype_select.on_change('value', update_plot)

controls = column(city_select, plottype_select)

curdoc().add_root(row(plot, controls))
curdoc().title = "Sunlight"


In [4]:
# Probably should use pymatgen to add atomic number and other stuff to the csv to do this
set(list(zip(df.tm1, df.n1)))
# Looks like n1 is the electron's n (so different rows of periodic table
tm_n = [(tm, n) for tm, n in zip(df.tm1, df.n1)]
tm_n

AttributeError: 'DataFrame' object has no attribute 'n1'