In [1]:
# Laura Chapman
# Script for developing statistical tools for Glue
# Computes statistics for subsets as well as entire data using compute_statistic
# Changed from astropy tables to pandas dataframe

# Format data well in a popup using qt
# Color code by subset

In [2]:
# Basic code that imports glue and loads in and links the data

import sys
from glue.core.data_factories import load_data
from glue.core import DataCollection
from glue.core.link_helpers import LinkSame
from glue.app.qt.application import GlueApplication
from glue.viewers.image.qt import ImageViewer
from glue_vispy_viewers.volume.volume_viewer import VispyVolumeViewer

image_filename='w5.fits'
catalog_filename='w5_psc.vot'

#load 2 datasets from files
catalog = load_data(catalog_filename)
image = load_data(image_filename)

dc = DataCollection([catalog,image])

# link positional information
dc.add_link(LinkSame(catalog.id['RAJ2000'], image.id['Right Ascension']))
dc.add_link(LinkSame(catalog.id['DEJ2000'], image.id['Declination']))

#Create subset based on filament mask
ra_state=(image.id['Right Ascension'] > 44) & (image.id['Right Ascension'] < 46)
subset_group=dc.new_subset_group('RA_Selection',ra_state)
subset_group.style.color = '#0000FF'





In [3]:
import glue.utils.array as gua
import glue.core.data as gcd
import pandas as pd
from pandas import DataFrame
import numpy as np

In [5]:
# Defines a subset and runs statistics using compute_statistic for the subset
# Saves data using a pandas dataframe called my_pandas_data

# Data that subset is pulled from
data = dc[0]

# Define state and subset
state1 = data.id['Jmag'] > 14
subset1 = data.new_subset(state1, label='Jmag > 14')

# Same arrays as with full data
mean_array = []
median_array = []
min_array = []
max_array = []
sum_array = []
name_array = []
tables = []

headings = ('mean', 'median', 'minimum', 'maximum', 'sum')

print(data.label, 'subset1')
print() 
for j in range (0, len(data.components)):
    name = data.components[j].label # Get the name of each component
    name_array.append(name) # add to the name array to build the table
    mean_array.append(data.compute_statistic('mean', subset1.components[j], subset_state=subset1.subset_state))
    median_array.append(data.compute_statistic('median', subset1.components[j], subset_state=subset1.subset_state))       
    min_array.append(data.compute_statistic('minimum', subset1.components[j], subset_state=subset1.subset_state))       
    max_array.append(data.compute_statistic('maximum', subset1.components[j], subset_state=subset1.subset_state))      
    sum_array.append(data.compute_statistic('sum', subset1.components[j], subset_state=subset1.subset_state))        
   
column_data = np.asarray([mean_array, median_array, min_array, max_array, sum_array]).transpose()

my_pandas_data = pd.DataFrame(column_data, index=name_array, columns=headings)

w5_psc subset1



In [265]:
from PyQt5.QtCore import QAbstractItemModel, QFile, QIODevice, QModelIndex, Qt
from PyQt5.QtWidgets import QApplication, QTreeView, QAbstractItemView
from PyQt5.QtWidgets import QApplication, QLabel, QTreeView, QComboBox, QWidget, QPushButton, QHBoxLayout, QFrame, QTableView,QGroupBox, QDialog, QVBoxLayout, QLabel,QGridLayout
from PyQt5 import QtCore, QtWidgets, QtGui
from PyQt5.QtGui import QStandardItemModel, QStandardItem
from PyQt5.QtCore import QItemSelectionModel
from PyQt5.QtCore import pyqtSlot, QVariant
from decimal import getcontext, Decimal

import sys

class pandasModel(QtCore.QAbstractTableModel):
    # Set up the data in a form that allows it to be added to qt widget
    def __init__(self, df, parent=None):
        QtCore.QAbstractTableModel.__init__(self, parent)
        self.data_frame = df
        super(pandasModel, self).__init__(parent)      

    def rowCount(self, parent=None):
        return len(self.data_frame.values)

    def columnCount(self, parent=None):
        return self.data_frame.columns.size

    def data(self, index, role=Qt.DisplayRole):
        if index.isValid():
            if role == Qt.DisplayRole:
                return QVariant(str(
                    self.data_frame.values[index.row()][index.column()]))
        return QVariant()
    
    def headerData(self, col, orientation, role):
        if orientation == Qt.Horizontal and role == Qt.DisplayRole:
            return QVariant(self.data_frame.columns[col])
        if orientation == Qt.Vertical and role == Qt.DisplayRole:
            return QVariant(self.data_frame.index[col])
        return QVariant()  

class StatsGui(QWidget):
    ''' 
    This class accepts a glue data collection object, and builds an interactive window
    to display basic statistics (e.g. mean, median, mode) about each dataset
    '''
    def __init__(self,dc):
        
        # Initialize the object as a QWidget
        QWidget.__init__(self)
    
        #Save the datacollection object as an attribute of class StatsGui
        self.dc=dc

        #Fix the size of the main GUI window (for now)
        self.setMinimumSize(800, 600)
        self.setMaximumSize(800, 600)
        
        #Set the title of the main GUI window
        self.setWindowTitle('Statistics')
        
        #Set up tree view and fix it to the top half of the window
        self.treeview = QTreeView(self)
        self.treeview.setGeometry(50, 25, 400, 200) 
        
        #Every time a selection is made in the tree, perform the functionality in 'myPressedEvent'
        self.treeview.pressed.connect(self.myPressedEvent)

        #Set the default clicking behavior to be row selection
        self.treeview.setSelectionBehavior(QAbstractItemView.SelectRows)
        model = QStandardItemModel()
        model.setHorizontalHeaderLabels([''])

        self.treeview.setModel(model)
        self.treeview.setUniformRowHeights(True)
        
        #Allow the user to select multiple rows at a time 
        self.treeview.setSelectionMode(QAbstractItemView.MultiSelection)
    
        # populate the tree
        # Make all the datasets be parents, and make it so they are not selectable
        for i in range(0,len(dc)):
            parent = QStandardItem('{}'.format(self.dc.labels[i]))
            parent.setEditable(False)
            parent.setSelectable(False)
            
            # Make all the data components be children, nested under their parent
            for j in range(0,len(self.dc[i].components)):
                child=QStandardItem('{}'.format(str(self.dc[i].components[j])))
                child.setEditable(False)
                parent.appendRow(child)
            
            #Add the parents with their children to the QStandardItemModel
            model.appendRow(parent)

        # Set up the combo box for users to choose the number of significant figures in the table
        self.sigfig = QComboBox(self)
        self.sigfig.addItems(["Default = 5"])
        self.sigfig.addItems(list('12345678'))
        self.sigfig.model().item(0).setEnabled(False)
        self.sigfig.move(500, 50)
        self.sigfig.currentIndexChanged.connect(self.sigchange)
        
        self.siglabel = QLabel(self)
        self.siglabel.setText('Number of significant figures:')
        self.siglabel.move(500, 25)
        
        # Set up select all and deselect all buttons
        self.all = QPushButton('Select all')
        self.all.clicked.connect(self.allClicked)
        self.all.move(500, 100)
        
        # Set default significant figures to 5
        getcontext().prec = 5
            
        #################Set up the QTableView Widget#############################
        self.table = QTableView(self)
        self.table.setAlternatingRowColors(True)
        
        #Move the table widget to the bottom of the GUI window
        self.table.setGeometry(50, 250, 700, 300) 
        
        #Set the table headings -- I have added Dataset and Component Columns. 
        self.headings = ('Dataset', 'Component','Mean', 'Median', 'Minimum', 'Maximum', 'Sum')   
        self.data_frame = pd.DataFrame(columns=self.headings) 
        self.model = pandasModel(self.data_frame)

        self.table.setModel(self.model)
        
    def myPressedEvent (self, currentQModelIndex):
        ''' 
        Every time a row (or rows) in the tree view is clicked:
        if it is selected, add it to the table
        if it is deselected, remove it from the table
        ''' 
        data_i = currentQModelIndex.parent().row()
        comp_i = currentQModelIndex.row()

        # Get the indexes of all the selected components
        self.indexes=self.treeview.selectionModel().selectedRows()

        # If nothing is selected clear the stats from the table
        if len(self.indexes) == 0:
            self.data_frame = pd.DataFrame(columns=self.headings)          
        
        # See if the current index is in the selected components
        if currentQModelIndex in self.indexes:
            # If the current index is selected, call runStats to append it to the data_frame
            self.runStats(currentQModelIndex.parent().row(), currentQModelIndex.row())
        else:
            # Find the index of the unchecked element in the table and remove that row
            try:
                # Find the index in the table of the unchecked element, if it's in the table
                
                # Find the matching component and dataset indices and intersect them to get the unique index
                idx_c = np.where(self.data_frame['Component'] == self.dc[data_i].components[comp_i].label)
                idx_d = np.where(self.data_frame['Dataset'] == self.dc[data_i].label)
                idx = np.intersect1d(idx_c, idx_d)
                self.data_frame = self.data_frame.drop(idx)
            except:
                pass
        
        model = pandasModel(self.data_frame)
        self.table.setModel(model)
        self.table.setSortingEnabled(True)
        self.table.setShowGrid(False)  
    
    def runStats (self, data_i, comp_i):
        '''
        Runs statistics for the component comp_i of data set data_i
        '''
        
        data_label = dc[data_i].label   
        comp_label = self.dc[data_i].components[comp_i].label # add to the name array to build the table
        mean_val = Decimal(float(self.dc[data_i].compute_statistic('mean', self.dc[data_i].components[comp_i]))) * Decimal(1)
        median_val = Decimal(float(self.dc[data_i].compute_statistic('median', self.dc[data_i].components[comp_i]))) * Decimal(1)       
        min_val = Decimal(float(self.dc[data_i].compute_statistic('minimum', self.dc[data_i].components[comp_i]))) * Decimal(1)     
        max_val = Decimal(float(self.dc[data_i].compute_statistic('maximum', self.dc[data_i].components[comp_i]))) * Decimal(1)    
        sum_val = Decimal(float(self.dc[data_i].compute_statistic('sum', self.dc[data_i].components[comp_i]))) * Decimal(1)
        
        # Create the column data array and append it to the data frame
        column_data = np.asarray([[data_label],[comp_label], [mean_val], [median_val], [min_val], [max_val], [sum_val]]).transpose()
        column_df = pd.DataFrame(column_data, columns = self.headings)
        self.data_frame = self.data_frame.append(column_df, ignore_index = True)
    
    def sigchange(self, i):
        # Set the number of significant figures according to what the user selects
        getcontext().prec = i   
        
    def allClicked(self):
        # Select all components of the treeview
        # Currently does not work
        self.treeview.selectAll()
        
    
app = QApplication.instance()
if app is None:
    app = QApplication(sys.argv)
else:
    print('QApplication instance already exists: %s' % str(app))
ex = StatsGui(dc)
ex.show()
sys.exit(app.exec_())

QApplication instance already exists: <PyQt5.QtWidgets.QApplication object at 0x15148ee798>


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [106]:
'''Things to implement
add the name of the data set to the component
sort by maximum, minimum etc
make it scroll if there are too many buttons
have a drop down menu to select components that you want to turn into buttons
have a button that says select all or deselect all
show subsets as a button
have a list of components and select from a list instead of buttons
complete working insert row and remove row functionality when user clicks buttons
add in data subsets
color code table by data/subset?
export to file button
advanced mode that allows user to pick what stats to calculate'''

'Things to implement\nadd the name of the data set to the component\nsort by maximum, minimum etc\nmake it scroll if there are too many buttons\nhave a drop down menu to select components that you want to turn into buttons\nhave a button that says select all or deselect all\nshow subsets as a button\nhave a list of components and select from a list instead of buttons\ncomplete working insert row and remove row functionality when user clicks buttons\nadd in data subsets\ncolor code table by data/subset?\nexport to file button\nadvanced mode that allows user to pick what stats to calculate'

In [264]:
help(QTableView)

Help on class QTableView in module PyQt5.QtWidgets:

class QTableView(QAbstractItemView)
 |  QTableView(parent: QWidget = None)
 |  
 |  Method resolution order:
 |      QTableView
 |      QAbstractItemView
 |      QAbstractScrollArea
 |      QFrame
 |      QWidget
 |      PyQt5.QtCore.QObject
 |      sip.wrapper
 |      PyQt5.QtGui.QPaintDevice
 |      sip.simplewrapper
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  clearSpans(...)
 |      clearSpans(self)
 |  
 |  columnAt(...)
 |      columnAt(self, int) -> int
 |  
 |  columnCountChanged(...)
 |      columnCountChanged(self, int, int)
 |  
 |  columnMoved(...)
 |      columnMoved(self, int, int, int)
 |  
 |  columnResized(...)
 |      columnResized(self, int, int, int)
 |  
 |  columnSpan(...)
 |      columnSpan(self, int, int) -> int
 |  
 |  columnViewportPosition(...)
 |      columnViewportPosition(self, int) -> int
 |  
 |  columnWidth(...)
 |      columnWidth(self, int) -> int
 |  
 |  currentChanged(...)
 | 