In [1]:
# Laura Chapman
# Script for developing statistical tools for Glue
# As of 7/9/2018 

In [2]:
# Basic code that imports glue and loads in and links the data

import sys
from glue.core.data_factories import load_data
from glue.core import DataCollection
from glue.core.link_helpers import LinkSame
from glue.app.qt.application import GlueApplication
from glue.viewers.image.qt import ImageViewer
from glue_vispy_viewers.volume.volume_viewer import VispyVolumeViewer

image_filename = 'w5.fits'
catalog_filename = 'w5_psc.vot'

#load 2 datasets from files
catalog = load_data(catalog_filename)
image = load_data(image_filename)

dc = DataCollection([catalog,image])

# link positional information
dc.add_link(LinkSame(catalog.id['RAJ2000'], image.id['Right Ascension']))
dc.add_link(LinkSame(catalog.id['DEJ2000'], image.id['Declination']))

#Create subset based on filament mask
ra_state=(image.id['Right Ascension'] > 44) & (image.id['Right Ascension'] < 46)
subset_group=dc.new_subset_group('RA_Selection', ra_state)
subset_group.style.color = '#0000FF'

de_state = image.id['Declination'] > 60
subset_group1 = dc.new_subset_group('DE_Selection', de_state)
subset_group1.style.color = '#FF0000'

j_state = catalog.id['Jmag'] > 14
subset_group2 = dc.new_subset_group('Jmag Selection', j_state)
subset_group2.style.color = '#00FF00'




In [3]:
# format for keys

# subset (data set)component

'''
Notes:

Needs to be able to select multiple rows at a time to add to the table.
This seems to be reliant on detecting the mouse release event, though QTreeView only 
provides built in signals for pressed and clicked. 

When switching from the two tree views, the equivalent rows that were selected in the 
other view should be selected. Haven't figured out how to select rows in qtreeview. 

'''

"\nNotes:\n\nNeeds to be able to select multiple rows at a time to add to the table.\nThis seems to be reliant on detecting the mouse release event, though QTreeView only \nprovides built in signals for pressed and clicked. \n\nWhen switching from the two tree views, the equivalent rows that were selected in the \nother view should be selected. Haven't figured out how to select rows in qtreeview. \n\n"

In [41]:
from PyQt5.QtCore  import QAbstractItemModel, pyqtSignal, QSize, QFile, QIODevice, QModelIndex, Qt, pyqtSlot, QVariant, QItemSelectionModel
from PyQt5.QtWidgets import QSizePolicy, QTreeView, QMessageBox, QRadioButton, QAbstractScrollArea, QSpinBox, QToolButton, QHeaderView, QAbstractItemView, QApplication, QLabel, QTreeView, QComboBox, QCheckBox, QWidget, QPushButton, QHBoxLayout, QFrame, QTableView, QGroupBox, QDialog, QVBoxLayout, QLabel, QGridLayout
from PyQt5 import QtCore, QtWidgets, QtGui
from PyQt5.QtGui import QStandardItemModel, QStandardItem
from decimal import getcontext, Decimal
from IPython.display import display, HTML
from PyQt5.QtGui import *
import sys
from qtpy import compat
from glue.icons.qt import helpers
import pandas as pd
from pandas import DataFrame
import numpy as np


class pandasModel(QtCore.QAbstractTableModel):
    # Set up the data in a form that allows it to be added to qt widget
    def __init__(self, df, dc, parent=None):
        QtCore.QAbstractTableModel.__init__(self, parent)
        self.dc = dc
        self.data_frame = df
        self.subset_labels = []
        
        # Create an array of subset labels
        for i in range(0, len(self.dc.subset_groups)):
            self.subset_labels.append(dc.subset_groups[i].label)
        
        super(pandasModel, self).__init__(parent)      

    def rowCount(self, parent=None):
        return len(self.data_frame.values)

    def columnCount(self, parent=None):
        return self.data_frame.columns.size

    def data(self, index, role=Qt.DisplayRole):
        if index.isValid():
            if role == Qt.BackgroundRole:
                # Get the data index and set the tranparency
                data = str(self.data_frame.values[index.row()][1])
                data_index = np.where(data == np.asarray(self.dc.labels))[0][0]  
                transparency = 60
                
                # If it is a subset find the color and color accordingly
                if self.data_frame.values[index.row()][0] != '--':
                    subset = str(self.data_frame.values[index.row()][0])
                    subset_index = np.where(subset == np.asarray(self.subset_labels))[0][0]
                    color = dc[data_index].subsets[subset_index].style.color
                    q_color = QColor(color)
                    rgb_color = q_color.getRgb()
                    
                    return QBrush(QColor(rgb_color[0], rgb_color[1], rgb_color[2], transparency))
                
                # If it is a dataset find the color and color accordingly 
                else:
                    color = dc[data_index].style.color
                    q_color = QColor(color)
                    rgb_color = q_color.getRgb()

                    return QBrush(QColor(rgb_color[0], rgb_color[1], rgb_color[2], transparency))
                
            elif role == Qt.DisplayRole:
                return QVariant(str(
                    self.data_frame.values[index.row()][index.column()]))
        return QVariant()
    
    def headerData(self, col, orientation, role):
        if orientation == Qt.Horizontal and role == Qt.DisplayRole:
            return QVariant(self.data_frame.columns[col])
        if orientation == Qt.Vertical and role == Qt.DisplayRole:
            return QVariant(self.data_frame.index[col])
        return QVariant()  
    
    def sort(self, column, order):
        colname = self.data_frame.columns.tolist()[column]
        self.layoutAboutToBeChanged.emit()
        self.data_frame.sort_values(colname, ascending= order == QtCore.Qt.AscendingOrder, inplace=True)
        self.data_frame.reset_index(inplace=True, drop=True)
        self.layoutChanged.emit()   
        
        
class StatsGui(QWidget):
    ''' 
    This class accepts a glue data collection object, and builds an interactive window
    to display basic statistics (e.g. mean, median, mode) about each dataset
    '''
    
    def __init__(self,dc):
        
        # Initialize the object as a QWidget
        QWidget.__init__(self)
        
        #Save the datacollection object as an attribute of class StatsGui
        self.dc = dc
        
        #Set the title of the main GUI window
        self.setWindowTitle('Statistics')
        
        # Set up dicts for row indices
        self.subset_dict = dict()
        self.component_dict = dict()
        
        self.selected_dict = dict()
        self.selected_indices = []
        
        # Set up the count for number of components/tree rows
        self.num_rows = 0
        
        # Set up the headings
        self.headings = ('Subset', 'Dataset', 'Component', 'Mean', 'Median', 'Minimum', 'Maximum', 'Sum')
        
        #################Set up the QTableView Widget#############################
        self.table = QTableView(self)
        self.table.setSortingEnabled(True)
        self.table.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
        self.table.verticalHeader().setVisible(False)
        
        #Set the table headings   
        self.data_frame = pd.DataFrame(columns=self.headings) 
        self.data_accurate = pd.DataFrame(columns=self.headings)
        self.model = pandasModel(self.data_frame, self.dc)

        self.table.setModel(self.model) 
        
        #Set up tree view and fix it to the top half of the window
        self.treeview = QTreeView(self)

        # Set the default clicking behavior to be row selection
        self.treeview.setSelectionBehavior(QAbstractItemView.SelectRows)
        
        # Set up expand all, collapse all, select all and deselect all buttons
        
        # Layout for expand/collapse/select/deselect
        layout_left_options = QHBoxLayout()
        
        self.expand_data = QToolButton(self)
        self.expand_data.setText("Expand all data and subsets")
        self.expand_data.clicked.connect(self.expandClicked)
        layout_left_options.addWidget(self.expand_data)
        
        self.visible = QToolButton(self)
        self.visible.setText("Select all visible")
        self.visible.clicked.connect(self.visibleClicked)
        # Make uncheckable to start since no components are visible
        self.visible.setCheckable(False)
        layout_left_options.addWidget(self.visible)
        
        # Connect to the treeview to be unclickable when the tree is collapsed
        self.treeview.collapsed.connect(self.uncheckVisible)
        self.treeview.expanded.connect(self.checkVisible)
        
        self.all = QToolButton(self)
        self.all.setText('Select all')
        self.all.clicked.connect(self.allClicked)
        layout_left_options.addWidget(self.all)
        
        self.none = QToolButton(self)
        self.none.setText('Deselect all')
        self.none.clicked.connect(self.noneClicked)
        layout_left_options.addWidget(self.none)
        
        # Set default significant figures to 5
        getcontext().prec = 5
        
        # Set up past selected items
        self.past_selected = []
        
        # Sort by subsets as a default
        self.sortBySubsets()
        self.component_mode = False

        # Set up the combo box for users to choose the number of significant figures in the table
        
        # Set up bottom options layout
        layout_bottom_options = QHBoxLayout()
        
        self.siglabel = QLabel(self)
        self.siglabel.setText('Number of decimals:')
        layout_bottom_options.addWidget(self.siglabel)
        
        self.num_sigs = 3
        
        self.sigfig = QSpinBox(self)
        self.sigfig.setRange(1, 10)
        self.sigfig.setValue(self.num_sigs)
        self.sigfig.valueChanged.connect(self.sigchange)
        layout_bottom_options.addWidget(self.sigfig)
        
        # Allow user to pick scientific notation or nonscientific notation
        self.sci_notation = QRadioButton(self)
        self.sci_notation.setText('Scientific notation')
        self.sci_notation.setChecked(True)
        self.isSci = True
        self.sci_notation.toggled.connect(self.notation)
        
        self.stan_notation = QRadioButton(self)
        self.stan_notation.setText('Standard notation')
        self.sci_notation.toggled.connect(self.notation)
        
        layout_bottom_options.addWidget(self.sci_notation)
        layout_bottom_options.addWidget(self.stan_notation)
        
        # Export to file button
        self.export = QPushButton(self)
        self.export.setText('Export to file')
        self.export.clicked.connect(self.exportToFile)
        layout_bottom_options.addWidget(self.export)
        
        # Set up the toggle button to switch tree sorting modes
        self.switch_mode = QToolButton(self)
        self.switch_mode.setText('Sort tree by components')
        self.switch_mode.clicked.connect(self.switchMode)
        layout_left_options.addWidget(self.switch_mode)
        
        # Add instructions to sort the table
        self.how = QLabel(self)
        self.how.setText('Click each header to sort table')
        layout_left_options.addWidget(self.how)
        
        layout_table = QHBoxLayout()
        layout_table.addWidget(self.table)
        layout_table.stretch(10)

        # Finish nesting all the layouts
        main_layout = QVBoxLayout()
        
        main_layout.addWidget(self.treeview)
        main_layout.addLayout(layout_left_options)
        main_layout.addLayout(layout_table)
        main_layout.addLayout(layout_bottom_options)
        
        self.setLayout(main_layout)
        
        # Set up dict for caching
        self.cache_stash = dict()
        
        # Set up bool for keeping track of component_mode
    
    def myPressedEvent (self, currentQModelIndex):
        ''' 
        Every time the selection in the treeview changes:
        if it is newly selected, add it to the table
        if it is newly deselected, remove it from the table
        ''' 
        
#         if self.component_mode:
#             print(len(self.model_components.persistentIndexList()))
#         else:
#             print(len(self.model_subsets.persistentIndexList()))
            
        # Get the indexes of all the selected components
        self.selected_indices = self.treeview.selectionModel().selectedRows()

        newly_selected = np.setdiff1d(self.selected_indices, self.past_selected)
            
        for index in range (0, len(newly_selected)):
                
            # Check which view mode the tree is in to get the correct indices
            if not self.component_mode:
                if newly_selected[index].parent().parent().parent().row() == -1:
                    # Whole data sets
                    data_i = newly_selected[index].parent().row()
                    comp_i = newly_selected[index].row()
                    subset_i = -1
                else:
                    # Subsets
                    data_i = newly_selected[index].parent().row()
                    comp_i = newly_selected[index].row()
                    subset_i = newly_selected[index].parent().parent().row()
            
            else:
                data_i = newly_selected[index].parent().parent().row()
                comp_i = newly_selected[index].parent().row()
                subset_i = newly_selected[index].row() - 1

            is_subset = (subset_i != -1)

            # Check if its a subset and if so run subset stats
            if is_subset: 
                self.runSubsetStats(subset_i, data_i, comp_i)

            else:
                # Run standard data stats
                self.runDataStats(data_i, comp_i)   
            
        newly_dropped = np.setdiff1d(self.past_selected, self.selected_indices)
            
        for index in range (0, len(newly_dropped)):
                
            # Check which view mode the tree is in to get the correct indices
            if not self.component_mode:
                data_i = newly_dropped[index].parent().row()
                comp_i = newly_dropped[index].row()
                subset_i = newly_dropped[index].parent().parent().row()
            
            else:
                data_i = newly_dropped[index].parent().parent().row()
                comp_i = newly_dropped[index].parent().row()
                subset_i = newly_dropped[index].row() - 1
            
            is_subset = newly_dropped[index].parent().parent().parent().row() == 1 or (self.switch_mode.text() == 'Sort tree by subsets' and subset_i != -1)

            if is_subset:
                try:
                    # Get the indices that match the component, dataset, and subset requirements
                    idx_c = np.where(self.data_frame['Component'] == self.dc[data_i].components[comp_i].label)
                    idx_d = np.where(self.data_frame['Dataset'] == self.dc[data_i].label)
                    idx_s = np.where(self.data_frame['Subset'] == self.dc[data_i].subsets[subset_i].label)
                    idx1 = np.intersect1d(idx_c, idx_d)
                    idx2 = np.intersect1d(idx1, idx_s)

                    self.data_frame = self.data_frame.drop(idx2)
                except:
                    pass

            else:
                try:
                # Find the index in the table of the unchecked element, if it's in the table

                    # Find the matching component and dataset indices and intersect them to get the unique index
                    idx_c = np.where(self.data_frame['Component'] == self.dc[data_i].components[comp_i].label)
                    idx_d = np.where(self.data_frame['Dataset'] == self.dc[data_i].label)
                    idx_s = np.where(self.data_frame['Subset'] == '--')
                    idx1 = np.intersect1d(idx_c, idx_d)
                    idx2 = np.intersect1d(idx1, idx_s)

                    self.data_frame = self.data_frame.drop(idx2)
                except:
                    pass
        
        # Update the past selected indices
        self.past_selected = self.selected_indices
        
        model = pandasModel(self.data_frame, self.dc)
        
        self.table.setModel(model)
       
        self.table.setSortingEnabled(True)
        self.table.setShowGrid(False)  
    
    def runDataStats (self, data_i, comp_i):
        '''
        Runs statistics for the component comp_i of data set data_i
        '''

        subset_label = "--"
        data_label = dc[data_i].label   
        comp_label = self.dc[data_i].components[comp_i].label # add to the name array to build the table
        
        # Build the cache key
        cache_key = subset_label + data_label + comp_label
        
        # See if the values have already been cached
        try:
            column_data = self.cache_stash[cache_key]
        
        except:         
        # Find the stat values
        # Save the data in the cache 
            mean_val = self.dc[data_i].compute_statistic('mean', self.dc[data_i].components[comp_i])
            median_val = self.dc[data_i].compute_statistic('median', self.dc[data_i].components[comp_i])     
            min_val = self.dc[data_i].compute_statistic('minimum', self.dc[data_i].components[comp_i])     
            max_val = self.dc[data_i].compute_statistic('maximum', self.dc[data_i].components[comp_i])    
            sum_val = self.dc[data_i].compute_statistic('sum', self.dc[data_i].components[comp_i])

            column_data = np.asarray([[subset_label], [data_label], [comp_label], [mean_val], [median_val], [min_val], [max_val], [sum_val]]).transpose()
            
            self.cache_stash[cache_key] = column_data
        
        # Save the accurate data in self.data_accurate
        column_df = pd.DataFrame(column_data, columns=self.headings)
        self.data_accurate = self.data_accurate.append(column_df, ignore_index=True)        

        if self.isSci:
            # Format in scientific notation
            string = "%." + str(self.num_sigs) + 'E'
        else:
            # Format in standard notation
            string = "%." + str(self.num_sigs) + 'F'             

        mean_val = string % Decimal(column_data[0][3])
        median_val = string % Decimal(column_data[0][4])
        min_val = string % Decimal(column_data[0][5])
        max_val = string % Decimal(column_data[0][6])
        sum_val = string % Decimal(column_data[0][7])            

            # Create the column data array and append it to the data frame
        column_data = np.asarray([[subset_label], [data_label], [comp_label], [mean_val], [median_val], [min_val], [max_val], [sum_val]]).transpose()
        column_df = pd.DataFrame(column_data, columns=self.headings)
        self.data_frame = self.data_frame.append(column_df, ignore_index=True)
    
    def runSubsetStats (self, subset_i, data_i, comp_i):
        '''
        Runs statistics for the subset subset_i with respect to the component comp_i of data set data_i
        '''

        subset_label = dc[data_i].subsets[subset_i].label
        data_label = dc[data_i].label   
        comp_label = self.dc[data_i].components[comp_i].label # add to the name array to build the table
        
        # Build the cache key
        cache_key = subset_label + data_label + comp_label
        
        # See if the statistics are already in the cache
        try:
            column_data = self.cache_stash[cache_key]
        
        # Find the stats if not in the cache
        # Save in the cache
        
        except:
            mean_val = self.dc[data_i].compute_statistic('mean', self.dc[data_i].subsets[subset_i].components[comp_i], subset_state=self.dc[data_i].subsets[subset_i].subset_state)
            median_val = self.dc[data_i].compute_statistic('median', self.dc[data_i].subsets[subset_i].components[comp_i], subset_state=self.dc.subset_groups[subset_i].subset_state)       
            min_val = self.dc[data_i].compute_statistic('minimum', self.dc[data_i].subsets[subset_i].components[comp_i], subset_state=self.dc.subset_groups[subset_i].subset_state)       
            max_val = self.dc[data_i].compute_statistic('maximum', self.dc[data_i].subsets[subset_i].components[comp_i], subset_state=self.dc.subset_groups[subset_i].subset_state)      
            sum_val = self.dc[data_i].compute_statistic('sum', self.dc[data_i].subsets[subset_i].components[comp_i], subset_state=self.dc.subset_groups[subset_i].subset_state) 

            column_data = np.asarray([[subset_label], [data_label], [comp_label], [mean_val], [median_val], [min_val], [max_val], [sum_val]]).transpose()

            self.cache_stash[cache_key] = column_data
        
        # Save the data in self.data_accurate
        column_df = pd.DataFrame(column_data, columns=self.headings)
        self.data_accurate = self.data_accurate.append(column_df, ignore_index=True)        
        
        if self.isSci:
            # Format in scientific notation
            string = "%." + str(self.num_sigs) + 'E'
        else:
            # Format in standard notation
            string = "%." + str(self.num_sigs) + 'F'            
            
        mean_val = string % Decimal(column_data[0][3])
        median_val = string % Decimal(column_data[0][4])
        min_val = string % Decimal(column_data[0][5])
        max_val = string % Decimal(column_data[0][6])
        sum_val = string % Decimal(column_data[0][7])
        
        # Create the column data array and append it to the data frame
        column_data = np.asarray([[subset_label], [data_label], [comp_label], [mean_val], [median_val], [min_val], [max_val], [sum_val]]).transpose()
        column_df = pd.DataFrame(column_data, columns=self.headings)
        self.data_frame = self.data_frame.append(column_df, ignore_index=True)    
    
    def sigchange(self, i):
        # Set the number of significant figures according to what the user selects
        getcontext().prec = i
        self.num_sigs = i
        
        # Retrospectively change the number of significant figures in the table
        
        data_labels = self.data_frame['Dataset']
        comp_labels = self.data_frame['Component']
        subset_labels = self.data_frame['Subset']
        
        mean_vals = []
        median_vals = []
        min_vals = []
        max_vals = []
        sum_vals = []
        
        if self.isSci:
            # Build a string that will format numbers in scientific notation
            string = "%." + str(self.num_sigs) + 'E'
        else:
            # Build a string that will format numbers in standard notation
            string = "%." + str(self.num_sigs) + 'F'    
    
        # Get the values from the self.data_accurate array and append them
        for i in range (0, len(self.data_frame)):
            # Traverse through the data_frame, which represents the data in the table
            # Get the name of the component, dataset, and subset of each row  
            component = self.data_frame['Component'][i]
            dataset = self.data_frame['Dataset'][i]
            subset = self.data_frame['Subset'][i]
               
            # Find the index of data_accurate that corresponds to the data
            idx_c = np.where(component == self.data_accurate['Component'])
            idx_d = np.where(dataset == self.data_accurate['Dataset'])
            idx_s = np.where(subset == self.data_accurate['Subset'])
            idx1 = np.intersect1d(idx_c, idx_d)
            idx2 = np.intersect1d(idx1, idx_s)[0] 
                
            # Append the values to the stat arrays, formatted with the string built above
            mean_vals.append(string % Decimal(self.data_accurate['Mean'][idx2]))
            median_vals.append(string % Decimal(self.data_accurate['Median'][idx2]))
            min_vals.append(string % Decimal(self.data_accurate['Minimum'][idx2]))
            max_vals.append(string % Decimal(self.data_accurate['Maximum'][idx2]))
            sum_vals.append(string % Decimal(self.data_accurate['Sum'][idx2]))               
           
        # Build the column_data
        column_data = np.asarray([subset_labels, data_labels, comp_labels, mean_vals, median_vals, min_vals, max_vals, sum_vals]).transpose()
        
        # Update the self.data_frame
        self.data_frame = pd.DataFrame(column_data, columns=self.headings)
        model = pandasModel(self.data_frame, self.dc)
        self.table.setModel(model)
        self.table.setSortingEnabled(True)
        self.table.setShowGrid(False)        
        
    def expandClicked(self):
        if self.expand_data.text() == "Expand all data and subsets":
            self.treeview.expandAll()
            self.expand_data.setText("Collapse all data and subsets")
        else:
            self.treeview.collapseAll()
            self.expand_data.setText("Expand all data and subsets")

    def visibleClicked(self):
        # Select all visible components
        
        original_idx = self.treeview.selectionModel().selectedRows()

        self.treeview.selectAll()
        
        # Get all the currently selected rows
        end_idx=self.treeview.selectionModel().selectedRows()
        
        # Check to see if any new rows were selected
        new_rows = np.intersect1d(original_idx, end_idx)
        if len(new_rows) == len(original_idx):
            text = "No new rows are visible."
 
            # Initialize a widget for the message box
            message_widget = QWidget()
 
            # Show a message box with above text, Yes, and Cancel with Cancel default selected
            result = QMessageBox.warning(message_widget, 'Message', text, QMessageBox.Close, QMessageBox.Close)
        
            # Initialize application
            message_app = QApplication.instance()
            if message_app is None:
                message_app = QApplication(sys.argv)
            else:
                print('QApplication instance already exists: %s' % str(message_app))
 
            # Show window
            message_widget.show() 
            # Exit the function
            return
        
        for index in end_idx:
            if index not in original_idx:
                # Check to see if the clicked item is a subset component or a data component
                # Get correct index for either view
                if not self.component_mode:
                    if index.parent().parent().parent().row() == -1:
                        # Whole data sets
                        data_i = index.parent().row()
                        comp_i = index.row()
                        subset_i = -1
                    else:
                        # Subsets
                        data_i = index.parent().row()
                        comp_i = index.row()
                        subset_i = index.parent().parent().row()          
                else:
                    data_i = index.parent().parent().row()
                    comp_i = index.parent().row()
                    subset_i = index.row() - 1
                
                is_subset = index.parent().parent().parent().row() == 1 or (self.switch_mode.text() == 'Sort tree by subsets' and subset_i != -1)
                
                if is_subset:
                    try:
                        self.runSubsetStats(subset_i, data_i, comp_i)
                    except:
                        print("subset stats could not be run for: subset: ", subset_i, "data: ", data_i, "comp: ", comp_i)
                else:
                    try:
                        self.runDataStats(data_i, comp_i)
                    except:
                        print("data stats could not be run for data: ", data_i, "component: ", comp_i)
        
        # Set the table to display the correct data frame
        model = pandasModel(self.data_frame, self.dc)
        self.table.setModel(model)
        self.table.setSortingEnabled(True)
        self.table.setShowGrid(False) 
            
    def allClicked(self):
        # Expand and select all components
        # If more than 20 rows will be added, ask user if they'd like to continue or cancel
        
        # Find what rows are already selected
        original_idx = self.treeview.selectionModel().selectedRows()
        
        # Warn user if they are about to add more than 20 rows to the table
        num_to_add = self.num_rows - len(original_idx)
        
        if num_to_add > 20:
                
            text = "Are you sure you want to add " + str(num_to_add) + " rows to the table?"
 
            # Initialize a widget for the message box
            message_widget = QWidget()
 
            # Show a message box with above text, Yes, and Cancel with Cancel default selected
            result = QMessageBox.warning(message_widget, 'Message', text, QMessageBox.Yes | QMessageBox.Cancel, QMessageBox.Cancel)
 
            if result == QMessageBox.Yes:
                # Go ahead and expand and select all
                self.treeview.expandAll()
                
                # Set expand/collapse button to allow user to collapse rows
                self.expand_data.setText("Collapse all data and subsets")           
                self.treeview.selectAll()
                
                # Gather the selected rows (all the rows in the tree)
                end_idx=self.treeview.selectionModel().selectedRows()
                
                # For each one, only take action if it is not in the original set of selected rows
                for index in end_idx:
                    if index not in original_idx:
                        # Check to see if the clicked item is a subset component or a data component
                        # Get correct index for either view
                        if not self.component_mode:
                            if index.parent().parent().parent().row() == -1:
                                # Whole data sets
                                data_i = index.parent().row()
                                comp_i = index.row()
                                subset_i = -1
                            else:
                                # Subsets
                                data_i = index.parent().row()
                                comp_i = index.row()
                                subset_i = index.parent().parent().row()  
                        # Indexing is simpler for sort by components mode
                        else:
                            data_i = index.parent().parent().row()
                            comp_i = index.parent().row()
                            subset_i = index.row() - 1

                        # True if row is from a subset and false if from a full data set
                        is_subset = index.parent().parent().parent().row() == 1 or (self.switch_mode.text() == 'Sort tree by subsets' and subset_i != -1)

                        if is_subset:
                            self.runSubsetStats(subset_i, data_i, comp_i)
                        else:
                            self.runDataStats(data_i, comp_i)

                # Set the table to display the correct data frame
                
                # Build the model from the self.data_frame created in runStats functions
                model = pandasModel(self.data_frame, self.dc)
                # Set the model to the table
                self.table.setModel(model)
                # Allow sorting and show grid
                self.table.setSortingEnabled(True)
                self.table.setShowGrid(False)      
                
            # Initialize application
            message_app = QApplication.instance()
            if message_app is None:
                message_app = QApplication(sys.argv)
            else:
                print('QApplication instance already exists: %s' % str(message_app))
 
            # Show window
            message_widget.show() 

    def noneClicked(self):
        # Clear the selection from the tree
        self.treeview.clearSelection()
        
        # Clear the table
        self.data_frame = pd.DataFrame(columns=self.headings)
        model = pandasModel(self.data_frame, self.dc)
        self.table.setModel(model)
        self.table.setSortingEnabled(True)
        self.table.setShowGrid(False)
        
    def exportToFile(self):
        file_name, fltr = compat.getsavefilename(caption="Choose an output filename")
        
        try:
            self.data_frame.to_csv(str(file_name), index=False)
        except:
            pass
        
    def switchMode(self):
        # if the user clicks to sort by components, change the text to "sort by subsets" and sort tree by components
        if self.switch_mode.text() == 'Sort tree by components':
            self.sortByComponents()
            self.switch_mode.setText('Sort tree by subsets')
        # otherwise the user wants to sort by subsets, change text to "sort by components" and sort tree by subsets
        else:
            self.sortBySubsets()
            self.switch_mode.setText('Sort tree by components')
  
    def sizeHint(self):
        return QSize(600, 800)
    
    def sortBySubsets(self):
        '''
        Sorts the treeview by subsets- Dataset then subset then component.
        What we originally had as the default
        '''
        # Set to not component mode
        self.component_mode = False
        
        # Clear the num_rows
        self.num_rows = 0
        
        # Clear the data_accurate
        self.data_accurate = pd.DataFrame(columns=self.headings)      
        
        # Save the selected rows from the component view
        try:
            selected = dict()
            for i in range(0, len(self.selected_indices)):
                item = self.model_components.itemFromIndex(self.selected_indices[i])
                if item.row() != 0:
                    key = item.text() + " (" + item.parent().parent().text() + ")"+ item.parent().text()
                    selected[key] = item.index()
                else:
                    key = item.text() + item.parent().text()
                    selected[key] = item.index()
                
        except:
            pass
        
        # Clear the selection
        self.treeview.clearSelection()
        
        # Set Expand/collapse button to "expand all"
        self.expand_data.setText("Expand all data and subsets")       
        
        #Allow the user to select multiple rows at a time 
        self.selection_model = QAbstractItemView.MultiSelection
        self.treeview.setSelectionMode(self.selection_model)
        
        # See if the model already exists instead of generating fresh
        try:
            self.treeview.setModel(self.model_subsets)
        
        # If try fails, go ahead and generate the treeview
        except:
            self.model_subsets = QStandardItemModel()
            self.model_subsets.setHorizontalHeaderLabels([''])

            self.treeview.setModel(self.model_subsets)
            self.treeview.setUniformRowHeights(True)

            # populate the tree
            # Make all the datasets be parents, and make it so they are not selectable
            parent_data = QStandardItem('{}'.format('Data'))
            parent_data.setEditable(False)
            parent_data.setSelectable(False)

            for i in range(0, len(self.dc)):
                parent = QStandardItem('{}'.format(self.dc.labels[i]))
                parent.setIcon(helpers.layer_icon(self.dc[i]))
                parent.setEditable(False)
                parent.setSelectable(False)

                # Make all the data components be children, nested under their parent
                for j in range(0,len(self.dc[i].components)):
                    child=QStandardItem('{}'.format(str(self.dc[i].components[j])))
                    child.setEditable(False)
                    
                    # Add to the subset_dict
                    key = self.dc[i].label + self.dc[i].components[j].label + "All data-" + self.dc[i].label
                    self.subset_dict[key] = child.index()
                    
                    parent.appendRow(child)
                    self.num_rows = self.num_rows + 1

                parent_data.appendRow(parent)

                #Add the parents with their children to the QStandardItemModel
            self.model_subsets.appendRow(parent_data)

            parent_subset = QStandardItem('{}'.format('Subsets')) 
            parent_subset.setEditable(False)
            parent_subset.setSelectable(False)

            # Set up the subsets as Subsets > choose subset > choose data set > choose component

            for j in range(0, len(self.dc.subset_groups)):
                grandparent = QStandardItem('{}'.format(self.dc.subset_groups[j].label))
                grandparent.setIcon(helpers.layer_icon(self.dc.subset_groups[j]))

                grandparent.setEditable(False)
                grandparent.setSelectable(False)

                for i in range(0, len(self.dc)):
                    parent = QStandardItem('{}'.format(self.dc.subset_groups[j].label) + ' (' + '{}'.format(self.dc[i].label) + ')')

                    # Set up the circles
                    parent.setIcon(helpers.layer_icon(self.dc.subset_groups[j]))
                    parent.setEditable(False)
                    parent.setSelectable(False)

                    try:
                        self.dc[i].compute_statistic('mean', self.dc[i].subsets[j].components[0], subset_state=self.dc[i].subsets[j].subset_state)

                    except:
                        parent.setForeground(QtGui.QBrush(Qt.gray))

                    for k in range(0, len(self.dc[i].components)):

                        child = QStandardItem('{}'.format(str(self.dc[i].components[k])))
                        child.setEditable(False)
                        
                        # Update the dict to keep track of row indices
                        key = self.dc[i].label + self.dc[i].components[k].label + self.dc[i].subsets[j].label
                        self.subset_dict[key] = child.index()
                        
                        parent.appendRow(child)
                        self.num_rows = self.num_rows + 1

                        # Make gray and unselectable components that aren't defined for a subset
                        try:
                            self.dc[i].compute_statistic('mean', self.dc[i].subsets[j].components[k], subset_state=self.dc[i].subsets[j].subset_state)

                        except:
#                             print("Glue has raised an Incompatible Attribute error on this component. Let's do this instead.")
                            child.setEditable(False)
                            child.setSelectable(False)
                            child.setForeground(QtGui.QBrush(Qt.gray))

                    grandparent.appendRow(parent) 
                parent_subset.appendRow(grandparent)
            self.model_subsets.appendRow(parent_subset)
        
            # Fill out the dict now that the indices are connected to the QStandardItemModel
            
            # Full datasets
            for i in range(0, parent_data.rowCount()):
                for j in range(0, parent_data.child(i).rowCount()):
                    key = "All data (" + parent_data.child(i).text() + ")"+ parent_data.child(i).child(j).text()
                    self.subset_dict[key] = parent_data.child(i).child(j).index()
            
            # Subsets
            for i in range(0, parent_subset.rowCount()):
                for j in range(0, parent_subset.child(i).rowCount()):
                    for k in range(0, parent_subset.child(i).child(j).rowCount()):
                        key = parent_subset.child(i).child(j).text() + parent_subset.child(i).child(j).child(k).text()
                        self.subset_dict[key] = parent_subset.child(i).child(j).child(k).index()
        
        self.treeview.setUniformRowHeights(True)
        
        # Make the table update whenever the selection in the tree is changed
        selection_model = QItemSelectionModel(self.model_subsets)
        self.treeview.setSelectionModel(selection_model)
        selection_model.selectionChanged.connect(self.myPressedEvent)

        # Clear the table 
        self.data_frame = pd.DataFrame(columns=self.headings)
        model = pandasModel(self.data_frame, self.dc)
        self.table.setModel(model)
        self.table.setSortingEnabled(True)
        self.table.setShowGrid(False)
        
        # Select rows that should be selected
        
        sel_mod = self.treeview.selectionModel()
        
        for i in range(0, len(selected)):
            key = list(selected.keys())[i]
            index = self.subset_dict[key]
            self.treeview.setCurrentIndex(index)
    
        self.treeview.setSelectionModel(sel_mod)
        
        # Update the past_selected and selected_indices
        self.past_selected = self.treeview.selectionModel().selectedRows()
        self.selected_indices = self.treeview.selectionModel().selectedRows()
    
    def sortByComponents(self):
        '''
        Sorts the treeview by components- Dataset then component then subsets
        '''  
        # Set component_mode to true
        self.component_mode = True
        
        # Clear the num_rows
        self.num_rows = 0
        
        # Clear the data_accurate
        self.data_accurate = pd.DataFrame(columns=self.headings)
        
        # Save the selected rows from the subset view if applicable
        try:
            selected = dict()

            for i in range(0, len(self.selected_indices)):
                item = self.model_subsets.itemFromIndex(self.selected_indices[i])
                if item.parent().parent().text() == "Data":
                    key =  "All data (" + item.parent().text() + ")" + item.text()
                    selected[key] = item.index()
                else:
                    key = item.parent().text() + item.text()
                    selected[key] = item.index()
        except:
            pass
        
        # Clear the selection
        self.treeview.clearSelection()
        
        # Set Expand/collapse button to "expand all"
        self.expand_data.setText("Expand all data and subsets")
        
        self.selection_model = QAbstractItemView.MultiSelection
        self.treeview.setSelectionMode(self.selection_model)
        
        # See if the model already exists
        try:
            self.treeview.setModel(self.model_components)
            
        except: 
        
            self.model_components = QStandardItemModel()
            self.model_components.setHorizontalHeaderLabels([''])

            self.treeview.setModel(self.model_components)
            self.treeview.setUniformRowHeights(True)
    
            # Populate the tree
            # Make all the datasets be parents, and make it so they are not selectable
        
            for i in range(0,len(dc)):
                grandparent = QStandardItem('{}'.format(self.dc.labels[i]))
                grandparent.setIcon(helpers.layer_icon(self.dc[i]))
                grandparent.setEditable(False)
                grandparent.setSelectable(False)
            
                # Make all the data components be children, nested under their parent
                for k in range(0,len(self.dc[i].components)):
                    parent=QStandardItem('{}'.format(str(self.dc[i].components[k])))
                    parent.setEditable(False)
                    parent.setSelectable(False)
                
                    child = QStandardItem('{}'.format('All data (' + self.dc.labels[i] + ')'))
                    child.setIcon(helpers.layer_icon(self.dc[i]))
                    child.setEditable(False)
                    
                    parent.appendRow(child)
                    self.num_rows = self.num_rows + 1
                
                    for j in range(0, len(self.dc.subset_groups)):
                        child = QStandardItem('{}'.format(self.dc.subset_groups[j].label))
                        child.setEditable(False)
                        child.setIcon(helpers.layer_icon(self.dc.subset_groups[j]))
                        
                        try:
                            self.dc[i].compute_statistic('mean', self.dc[i].subsets[j].components[k], subset_state=self.dc[i].subsets[j].subset_state)

                        except:
#                             print("Glue has raised an Incompatible Attribute error on this component. Let's do this instead.")
                            child.setEditable(False)
                            child.setSelectable(False)
                            child.setForeground(QtGui.QBrush(Qt.gray)) 

                        parent.appendRow(child)
                        self.num_rows = self.num_rows + 1
                
                    grandparent.appendRow(parent)
                self.model_components.appendRow(grandparent)
                
                # Fill out the dict now that the indices are connected to the QStandardItemModel
                for i in range(0, grandparent.rowCount()):
                    for j in range(0, grandparent.child(i).rowCount()):
                        if grandparent.child(i).child(j).row() == 0:
                            key = grandparent.child(i).child(j).text() + grandparent.child(i).text()
                            self.component_dict[key] = grandparent.child(i).child(j).index()
                        else:
                            key = grandparent.child(i).child(j).text() + " (" + grandparent.text() + ")" + grandparent.child(i).text()
                            self.component_dict[key] = grandparent.child(i).child(j).index()
            
        self.treeview.setUniformRowHeights(True)
        
        # Make the table update whenever the tree selection is changed
        selection_model = QItemSelectionModel(self.model_components)
        self.treeview.setSelectionModel(selection_model)
        selection_model.selectionChanged.connect(self.myPressedEvent)
 
        # Clear the table 
        self.data_frame = pd.DataFrame(columns=self.headings)
        model = pandasModel(self.data_frame, self.dc)
        self.table.setModel(model)
        self.table.setSortingEnabled(True)
        self.table.setShowGrid(False)

        # Select the rows that should be selected

        sel_mod = self.treeview.selectionModel()
    
        for i in range(0, len(selected)):
            key = list(selected.keys())[i]
            index = self.component_dict[key]
            self.treeview.setCurrentIndex(index)
    
        self.treeview.setSelectionModel(sel_mod)
        
        # Update the past_selected and selected_indices
        self.past_selected = self.treeview.selectionModel().selectedRows() 
        self.selected_indices = self.treeview.selectionModel().selectedRows()
            
    def notation(self):
        # Changes the data from scientific to standard notation and vice versa
        
        data_labels = self.data_frame['Dataset']
        comp_labels = self.data_frame['Component']
        subset_labels = self.data_frame['Subset']
        
        mean_vals = []
        median_vals = []
        min_vals = []
        max_vals = []
        sum_vals = []
        
        if self.stan_notation.isChecked():
            self.isSci = False
            # Build string to format in standard notation
            string = "%." + str(self.num_sigs) + 'F'
        else:
            self.isSci = True
            # Build string to format in scientific notation
            string = "%." + str(self.num_sigs) + 'E'    
            
        for i in range(0, len(self.data_frame)):
            # Traverse through the dataframe and get the names of the component, dataset, and subset
            component = self.data_frame['Component'][i]
            dataset = self.data_frame['Dataset'][i]
            subset = self.data_frame['Subset'][i]
                
            # Pull the correct index of the data in data_accurate
            idx_c = np.where(component == self.data_accurate['Component'])
            idx_d = np.where(dataset == self.data_accurate['Dataset'])
            idx_s = np.where(subset == self.data_accurate['Subset'])
            idx1 = np.intersect1d(idx_c, idx_d)
            idx2 = np.intersect1d(idx1, idx_s)[0] 
                
            # Format the data in data_accurate
            mean_vals.append(string % Decimal(self.data_accurate['Mean'][idx2]))
            median_vals.append(string % Decimal(self.data_accurate['Median'][idx2]))
            min_vals.append(string % Decimal(self.data_accurate['Minimum'][idx2]))
            max_vals.append(string % Decimal(self.data_accurate['Maximum'][idx2]))
            sum_vals.append(string % Decimal(self.data_accurate['Sum'][idx2])) 
           
        # Build the column_data and update the data_frame
        column_data = np.asarray([subset_labels, data_labels, comp_labels, mean_vals, median_vals, min_vals, max_vals, sum_vals]).transpose()
        self.data_frame = pd.DataFrame(column_data, columns=self.headings)
        model = pandasModel(self.data_frame, self.dc)
        self.table.setModel(model)
        self.table.setSortingEnabled(True)
        self.table.setShowGrid(False)
        
    def uncheckVisible(self):
        self.visible.setCheckable(False)
#         self.visible.setForegroundRole(QColor('grey'))
#         if self.component_mode:
# #             print(len(self.model_components.persistentIndexList()))
#         else:
# #             print(len(self.model_subsets.persistentIndexList()))
            
    def checkVisible(self):
        self.visible.setCheckable(True)
#         if self.component_mode:
# #             print(len(self.model_components.persistentIndexList()))
#         else:
# #             print(len(self.model_subsets.persistentIndexList()))
            
app = QApplication.instance()
if app is None:
    app = QApplication(sys.argv)
else:
    print('QApplication instance already exists: %s' % str(app))
ex = StatsGui(dc)
ex.show()
sys.exit(app.exec_())


QApplication instance already exists: <PyQt5.QtWidgets.QApplication object at 0x1518a0e168>
QApplication instance already exists: <PyQt5.QtWidgets.QApplication object at 0x1518a0e168>


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
# Ask at meeting about select all- what the functionality should be
# Ideas:
# Expand all and select all- slow and overwhelming
# Select all currently expanded- what I had before- doesn't do anything when pressed before expansion
# Grayed out until an item is expanded and then has above functionality
# Other ideas?

In [14]:
help(QToolButton)

Help on class QToolButton in module PyQt5.QtWidgets:

class QToolButton(QAbstractButton)
 |  QToolButton(parent: QWidget = None)
 |  
 |  Method resolution order:
 |      QToolButton
 |      QAbstractButton
 |      QWidget
 |      PyQt5.QtCore.QObject
 |      sip.wrapper
 |      PyQt5.QtGui.QPaintDevice
 |      sip.simplewrapper
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  actionEvent(...)
 |      actionEvent(self, QActionEvent)
 |  
 |  arrowType(...)
 |      arrowType(self) -> Qt.ArrowType
 |  
 |  autoRaise(...)
 |      autoRaise(self) -> bool
 |  
 |  changeEvent(...)
 |      changeEvent(self, QEvent)
 |  
 |  defaultAction(...)
 |      defaultAction(self) -> QAction
 |  
 |  enterEvent(...)
 |      enterEvent(self, QEvent)
 |  
 |  event(...)
 |      event(self, QEvent) -> bool
 |  
 |  hitButton(...)
 |      hitButton(self, QPoint) -> bool
 |  
 |  initStyleOption(...)
 |      initStyleOption(self, QStyleOptionToolButton)
 |  
 |  leaveEvent(...)
 |      leaveE

In [15]:
help(QPalette.ColorRole)

Help on class ColorRole in module PyQt5.QtGui:

class ColorRole(builtins.int)
 |  int(x=0) -> integer
 |  int(x, base=10) -> integer
 |  
 |  Convert a number or string to an integer, or return 0 if no arguments
 |  are given.  If x is a number, return x.__int__().  For floating point
 |  numbers, this truncates towards zero.
 |  
 |  If x is not a number or if base is given, then x must be a string,
 |  bytes, or bytearray instance representing an integer literal in the
 |  given base.  The literal can be preceded by '+' or '-' and be surrounded
 |  by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
 |  Base 0 means to interpret the base from the string as an integer literal.
 |  >>> int('0b100', base=0)
 |  4
 |  
 |  Method resolution order:
 |      ColorRole
 |      builtins.int
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __reduce__ = _pickle_enum(...)
 |  
 |  ----------------------------------------------------------------------
 |  Data des

In [18]:
QColor('gray')

<PyQt5.QtGui.QColor at 0x151db7de48>

In [29]:
help(QStandardItemModel)

Help on class QStandardItemModel in module PyQt5.QtGui:

class QStandardItemModel(PyQt5.QtCore.QAbstractItemModel)
 |  QStandardItemModel(parent: QObject = None)
 |  QStandardItemModel(int, int, parent: QObject = None)
 |  
 |  Method resolution order:
 |      QStandardItemModel
 |      PyQt5.QtCore.QAbstractItemModel
 |      PyQt5.QtCore.QObject
 |      sip.wrapper
 |      sip.simplewrapper
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  appendColumn(...)
 |      appendColumn(self, Iterable[QStandardItem])
 |  
 |  appendRow(...)
 |      appendRow(self, object)
 |      appendRow(self, QStandardItem)
 |  
 |  clear(...)
 |      clear(self)
 |  
 |  columnCount(...)
 |      columnCount(self, parent: QModelIndex = QModelIndex()) -> int
 |  
 |  data(...)
 |      data(self, QModelIndex, role: int = Qt.DisplayRole) -> Any
 |  
 |  dropMimeData(...)
 |      dropMimeData(self, QMimeData, Qt.DropAction, int, int, QModelIndex) -> bool
 |  
 |  findItems(...)
 |      findItems(s