# PCA to Stellar Spectral Classification


From Deeming 1963 applied to late stars

He discussed the specific case of late-type giants. He used 84 G and K class III giants. 




First load all the relevant packages

In [1]:
#Bokeh plotting
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.models import Span, Label, Arrow, NormalHead
from bokeh.models import HoverTool, tools, ColumnDataSource, CustomJS, Slider, BoxAnnotation
from bokeh.layouts import  column, row
from bokeh.palettes import Category20_18
import re
import glob, os
from astropy.io import fits
import urllib
#from urllib import urlretrieve
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import PCA

#Garbage collector
import gc

from pyraf import iraf
output_notebook()
iraf.noao()
iraf.noao.onedspec()
iraf.dataio()


No graphics display available for this session.
Graphics tasks that attempt to plot to an interactive screen will fail.
For help, search "PyRAF FAQ 5.13" or contact "help@stsci.edu".



Defined the classes. Lines and spec. Add a new attribute with the general type of star. 

In [2]:
#Emission Lines
class line(object):
    """Line with the name of the line, line center and the region around it. 
    This region around it will be use to fit it using IRAF function. It has to be a list of numbers"""
    def __init__(self,name,linecenter,regiontofit):
        self.name = name
        self.linecenter = linecenter
        self.regiontofit = regiontofit
        
def find_nearest(array,value):
    idx = (np.abs(array-value)).argmin()
    return idx, array[idx]

def isDigit(x):
    try:
        float(x)
        return True
    except ValueError:
        return False
    
def gaussian(x, mu, sig,core):
    """Gaussian"""
    return core*np.exp(-np.power(x - mu, 2.) / (2 * np.power(sig, 2.)))

        
class spec2(object):
    """Spectra created from the url. Probably in the future better with the fiber
    , plate and MJD"""
    def __init__(self,url):
        self.url = url
        self.name = url.split('/')[-1]
        self.wave, self.flux, self.model, self.type, self.typegeneral = self.get_type()
       
            
    def get_type(self, verbose=False):
        """Get the type, flux, wavelenght and model from the SDSS TABLE. 
        It downloadst he fit if it doesnt exist. Had to do it like that becase it would waint until the download
        to call the other method"""
        #Download if it doesnt't exists. 
        if not os.path.isfile(self.name):
            file_name = self.name
            u = urllib.urlopen(self.url)
            f = open(file_name, 'wb')
            meta = u.info()
            file_size = int(meta.getheaders("Content-Length")[0])
            #print "Downloading: %s Bytes: %s" % (file_name, file_size)

            file_size_dl = 0
            block_sz = 8192
            while True:
                buffer = u.read(block_sz)
                if not buffer:
                    break

                file_size_dl += len(buffer)
                f.write(buffer)
                status = r"%10d  [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
                status = status + chr(8)*(len(status)+1)
                if verbose:
                    print status,

            f.close()
            #print('Downloaded '+ self.name)
        
        ob = fits.open(self.name,memmap=False)
        #Get data and save wavelenft in x, and flux on y
        dataob = ob[1].data
        x = 10**(dataob['loglam'])
        y2 = dataob['model']
        y = dataob['flux']
        typee = ob[2].data['SUBCLASS'][0]
        ob.close()
        #Delete
        del ob
        del dataob
        gc.collect()
        
        npattern = re.compile('[O,B,A,F,G,K,WD,M,CV]{1,2}')
        typegeneral = npattern.findall(typee)
        if 'WD' in typegeneral:
            typegeneral = 'WD'
        elif len(typegeneral) == 0:
            typegeneral = 'Other'
        else:
            typegeneral = typegeneral[0]
        #delete to aboid too many oepenf files see
        #http://docs.astropy.org/en/stable/io/fits/appendix/faq.html#i-m-opening-many-fits-files-in-a-loop-and-getting-oserror-too-many-open-files
        return x,y,y2, typee, typegeneral
    
    def get_iraffits(self, modelfit = True):
        """From the SDSS table creates a fits file to work with iraf"""
        #Which one to convert to iraf fts
        if modelfit:
            fluxormodel = self.model
        else:
            fluxormodel = self.flux
        #Name of iraf .txt file and create text file
        
        namenewfits = '{name}.txt'.format(name=self.name)
        with open(namenewfits,'w') as file:
            for x,y in zip(self.wave,fluxormodel):
                file.write('{}\t{}\n'.format(x,y))
                
        #Create fits file from text file interpolation to work wtih IRAF functions
        iraffitsname = 'iraf'+self.name
        if os.path.exists(iraffitsname+'.fits'):
            os.remove(iraffitsname+'.fits')
        iraf.rspectext(namenewfits,iraffitsname,dtype='interp')
        #Remove the text file
        os.remove(namenewfits)
        
    
    def fit_lines(self, dicoflines, errorestimate = True, verbose='No'):
        """Fit lines using gaussian fitport. It populates a diccionary with the lines
         and the fit of the lines. The parameter if a diccionary of lines objects. 
         The class is define before. Verbose can be yes or NO"""
        self.linesdicall = []
        errorparam = []
        if not os.path.exists('iraf'+self.name):
            self.get_iraffits()
            
        #Initialize files log and lines
        ! echo '' > fited.log

        for indexline,linesfit in enumerate(dicoflines):
            regionf = "{} {}".format(linesfit.regiontofit[0],linesfit.regiontofit[1])
            #wavelenght
            xlimns = [find_nearest(self.wave,i)[0] for i in linesfit.regiontofit   ]
            wavex = self.wave[xlimns[0]:xlimns[1]]
            lineszero = linesfit.linecenter
            ! echo '$lineszero' > lines.lines

            filename = 'iraf'+self.name
            #Error estimation iraf: http://stsdas.stsci.edu/cgi-bin/gethelp.cgi?fitprofs
            if errorestimate:        
                iraf.fitprofs(filename,pos='lines.lines', reg=regionf ,
                              fitbackground= 'yes', 
                              logfile='fited.log'
                              ,nerrsample='100',sigma0='4',invgain='4',verbose=verbose)
            else:
                iraf.fitprofs(filename,pos='lines.lines', reg=regionf ,
                              fitbackground= 'yes', 
                              logfile='fited.log', verbose=verbose)

            #Plotting the gaussian
            #Find in log file of fitprofs.  
            npattern = re.compile('[-\d.]+')
            npattern2 = re.compile('[-\d.E?]+') #Gets the exponentioals

            gparameters=[]
            with open('fited.log','r') as file:
                for lines in file:

                    if '(' not in lines:
                        temp = npattern2.findall(lines)
                        if 'INDEF' in lines:
                            gparameters.append(7*[0])
                    else:
                        errorparam.append(npattern2.findall(lines))

                    if len(temp) == 7 and all(isDigit(i) for i in temp) and 'INDEF' not in lines:
                        gparameters.append(temp)

            #gaussian
            if len(gparameters) > 0:
                gparamfinal = [ float(i) for i in gparameters[-1] ]
                centerg, contg, fluxg, eqwg, coreg, fwhmg, fwhml = gparamfinal
                yg = gaussian(wavex,centerg,fwhmg/2.3538,coreg) + contg


            if errorestimate == True:
                errorparamfinal = [ float(i) for i in errorparam[-1] ]
                #print(gparamfinal)
                #print(errorparamfinal)


                linesdic = {'linename':linesfit.name,
                             'center':centerg,
                              'EW': eqwg,
                              'EWerror':errorparamfinal[3],
                            'fluxg':fluxg,
                            'coregaus':coreg,
                            'fwgmgaus':fwhmg,
                            'contgaus':contg,
                            'gaussian':{'x':wavex,'y':yg}
                            }
            else:
                 linesdic = {'linename':linesfit.name,
                             'center':centerg,
                              'EW': eqwg,
                              'EWerror':0,
                            'fluxg':fluxg,
                            'coregaus':coreg,
                            'fwgmgaus':fwhmg,
                            'contgaus':contg,
                            'gaussian':{'x':wavex,'y':yg}
                            }
                
                
                
            self.linesdicall.append(linesdic)

        
        


In [3]:
%%time
diclines = {line('P15',8547,[8520,8560]),line('P14',8600,[8600-20,8600+20]),
           line('Halpha',6562,[6550,6575]),line('P16',8504,[8504-20,8504+20]),
            line('P13',8667,[8667-20,8667+20]),line('P12',8752,[8752-20,8752+20]),
           line('P11',8865,[8865-20,8865+20]),line('P10',9017,[9017-20,9017+20])}




dicofspectra = [];
filestoread = ['../listG1V.txt',
              '../listG2.txt','../listG4V.txt','../listG5III+.txt',
               '../listG8V.txt','../listG9IV.txt','../listK1III.txt',
               '../listK2III.txt','../listK3III.txt','../listK4III.txt',
               '../listK5III.txt','../listK7.txt']

#filestoread = glob.glob('../list*.txt')

limitonlines = 10
print('Reading {} files and {} lines per file'.format(len(filestoread),limitonlines))

for index,files in enumerate(filestoread):
    with open(files,'r') as f:
        linereads = f.readlines()
        for i in range(min(limitonlines, len(linereads))):
            lineread = linereads[i]
            #print(lineread)
            url = lineread.split(',')[1]
            #Strip Return a copy of the string with leading and trailing characters removed. 
            url = url.strip()
            url = url.replace('segue1','sdss')
            #print(url)
            tempspec = spec2(url)
            dicofspectra.append(tempspec)
            gc.collect()
            #print(index)
            
            
#Plot parameters
colors = 100*Category20_18 #+ Viridis8 + Dark2_8 + Paired8 + Set1_4
#Plot range
xr = (5000,8900)
yr = (0,40)


#Tool to get wavelength
hover2 = HoverTool(
        tooltips=[
            ("(x,y)", "($x{1}, $y)"),
        ]
    )
#Add the tool
#Start index of color
index = 0
fit = True
plot = False

if plot:
        #Create the Bokeh Figure
    pl =  figure(x_axis_label='Angstrom', y_axis_label='Y',title="Click on the desired stellar template to overplot", x_range=xr, y_range=yr
                  ,active_drag='pan', active_scroll='wheel_zoom',
                  plot_width=900, plot_height=1000
                 )

    pl.add_tools(hover2)





# Loop over spec object list:

for spectra in dicofspectra:

    # Plot the 
    if plot:
        t = pl.line(x=spectra.wave,y=spectra.model,color=colors[index], line_alpha=1.0, 
                line_width=4,legend=spectra.type+str(index),muted_alpha=0.,muted_color=colors[index])

    if fit:
        try:
            spectra.fit_lines(diclines, verbose='No', errorestimate=False)
        except:
            pass
        for i in spectra.linesdicall:
            if plot:
                pl.line(i['gaussian']['x'],i['gaussian']['y'],color='red', line_alpha=1.,
                    line_width=5,legend=tryspec.name,muted_alpha=0.,muted_color=colors[index])

    index = index +1

if plot:    
    pl.legend.location = "top_left"
    pl.legend.click_policy="mute"

    show(pl)  

            

Reading 12 files and 10 lines per file




CPU times: user 2min 29s, sys: 38.1 s, total: 3min 7s
Wall time: 5min 47s


Now that we got all the spectra we can see all the types of stars and how many of them in the diccionary of spectra we created in the cell above

In [4]:
typess = []
for star in dicofspectra:
    typess.append(star.type)
np.unique(typess,return_counts=True)

(array(['G1V (95128)', 'G2', 'G4V (32923)', 'G5III+... (157910)',
        'G8V (101501)', 'G9IV (100030)', 'K1III (18322)', 'K2III (115136)',
        'K3III (101673)', 'K4III (136726)', 'K5III (111335)', 'K7'],
       dtype='|S18'), array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]))

The lines we fitted

In [5]:
for index,i in enumerate(diclines):
    print(index,i.name)

(0, 'P10')
(1, 'P15')
(2, 'P14')
(3, 'Halpha')
(4, 'P16')
(5, 'P13')
(6, 'P12')
(7, 'P11')


Now we created a matrix with n rows and k columns. Where n is the number of fitted lines, and k the number of stars we have in the diccionary. 

This will be called matrix, we also create a list of the starclass. I should make sure, but I think in each loop the dicofspectra order does not change. 

In [6]:
matrix = []
#Loop over each line in the diccionart of lines we created. 
for linesfit in diclines:
    print(linesfit.name)
    listofeq = []
    starclass = []
    #Loop over all the stars in the diccionary of spectra.
    #For each star if we were able to fit a line get the EW
    #Of the line
    for indexs,stars in enumerate(dicofspectra):
        if hasattr(stars,'linesdicall'):
            lines = stars.linesdicall
            #We find the line and ew of the lne and append
            for line in lines:
                if line['linename'] == linesfit.name:
                    eq = line['EW']
                    listofeq.append(eq)
                    starclass.append(stars.type)
                    
    matrix.append(listofeq)
        #    print(stars.typegeneral)

P10
P15
P14
Halpha
P16
P13
P12
P11


In [7]:
mat = np.array(matrix)
mat.shape

(8, 120)

We can decompose using the [numpy SVD](https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.svd.html) function:

In [8]:
U, D, Vtranspose = np.linalg.svd(mat, full_matrices=False)
print('Shape of U: {}'.format(U.shape))
print('Shape of V Transpose: {}'.format(Vtranspose.shape))
print('Shape of D: {}'.format(D.shape))

Shape of U: (8, 8)
Shape of V Transpose: (8, 120)
Shape of D: (8,)


We only care about the two main singular values so we do:


In [9]:
dim = 2
Unew = U[:,0:dim]
Dnew= D[0:dim]
Vtranspose_new = Vtranspose[0:dim,:]

print('Shape of new U: {}'.format(Unew.shape))
print('Shape of new V Transpose: {}'.format(Vtranspose_new.shape))
print('Shape of new D: {}'.format(Dnew.shape))

print('Singular values: {}'.format(Dnew[0:dim]))

Shape of new U: (8, 2)
Shape of new V Transpose: (2, 120)
Shape of new D: (2,)
Singular values: [ 118448.44344506   24518.51213346]


To make sure it works we can also do it via Trucanted of Scikit-learn. Lets check the singular values and the Percentage of variance explained by each of the selected components. The fit and fit_transform expect a matrix in the form **(n_samples, n_features)** so lets transpose the matrix and call the TruncatedSVD.

In [10]:
sklearn_svd = TruncatedSVD(n_components=2,algorithm='arpack')
matsvd = mat.T
svdfit = sklearn_svd.fit(matsvd)
fitmat = svdfit.fit_transform(matsvd)

print('Singular values: {}'.format(svdfit.singular_values_))
print('Variance ratio: {}'.format(svdfit.explained_variance_ratio_))

Singular values: [ 118448.44344506   24518.51213346]
Variance ratio: [ 0.8808827   0.03662017]


So the first component accounts for almost 90% of the variance of the data.  Let's see the arrays we get

In [11]:
print('SVD compoments (n_components, n_features): {}'.format(svdfit.components_.shape))

SVD compoments (n_components, n_features): (2, 8)


In [12]:
print('fit model to matrix returns (n_samples, n_components): {}'.format(fitmat))

fit model to matrix returns (n_samples, n_components): [[  2.09153658e-01   3.62483645e+00]
 [  1.27339359e-01   2.21618854e+00]
 [  2.34568067e-01   3.99900696e+00]
 [  2.18067204e-01   3.88359334e+00]
 [  9.69681518e-02   1.66905152e+00]
 [  1.96534337e-01   3.63353716e+00]
 [  2.41719280e-01   4.42623580e+00]
 [  9.71706776e-02   1.69529121e+00]
 [  1.32871719e-01   2.33492286e+00]
 [  1.32023983e-01   2.28317220e+00]
 [  1.23036971e-01   1.10719024e+00]
 [  1.39075862e-01   1.23406434e+00]
 [  7.33323691e+00   9.19153741e+03]
 [  1.02964534e-01   8.97023579e-01]
 [  1.80737008e-01   1.56643084e+00]
 [  1.91560953e-01   1.69119959e+00]
 [  1.74103539e-01   1.65402440e+00]
 [  1.51191679e-01   1.32549293e+00]
 [  1.75989787e-01   1.52545259e+00]
 [  1.98448869e-01   1.71000424e+00]
 [  5.73685109e+01   6.78102210e+02]
 [  2.45483470e+00   3.27276615e+01]
 [  3.60853561e+00   6.22064741e+02]
 [  4.10652501e-01   1.69348553e+00]
 [  8.23381430e-01   1.92182823e+00]
 [  1.35193130e+00  

# Plot

Create first a diccionary of type of star and color. Then from the list of star class created with the matrix can create a list of color

In [13]:
coldic ={}
for index,typestar in enumerate(np.unique([i.type for i in dicofspectra])):
    coldic[str(typestar)] = colors[index]
coldic
label = starclass
colorsvd = [ coldic[i] for i in starclass ]


dd = {}
dd['x'] = fitmat[:,0]
dd['y'] = fitmat[:,1]
dd['labels'] = label
dd['colors'] = colorsvd
source = ColumnDataSource(dd)


p = figure()


p.circle(x='x', y='y', color='colors', legend = 'labels',source=source , size =5)
show(p)

There are many outliers so we can change the zoom and see if we can learn somthing

In [14]:
coldic ={}
for index,typestar in enumerate(np.unique([i.type for i in dicofspectra])):
    coldic[str(typestar)] = colors[index]
coldic
label = starclass
colorsvd = [ coldic[i] for i in starclass ]


dd = {}
dd['x'] = fitmat[:,0]
dd['y'] = fitmat[:,1]
dd['labels'] = label
dd['colors'] = colorsvd
source = ColumnDataSource(dd)

#Plot range
xr = (-2,4)
yr = (-2,5.6)



p = figure(x_axis_label='Star', y_axis_label='Y', x_range=xr, y_range=yr
                  ,active_drag='pan', active_scroll='wheel_zoom',
                  plot_width=900, plot_height=900
                 )



p.circle(x='x', y='y', color='colors', legend = 'labels',source=source , size =5)
show(p)

## The first compoment and star type:
    

In [15]:
indexstardic ={}
for index,typestar in enumerate(np.unique([i.type for i in dicofspectra])):
    indexstardic[str(typestar)] = index

label = starclass
indexsvd = [ indexstardic[i] for i in starclass ]


dd = {}
dd['x'] = indexsvd
dd['y'] = fitmat[:,0]
dd['labels'] = label
dd['colors'] = colorsvd
source = ColumnDataSource(dd)


#Plot range
xr = (-1,10.5)
yr = (-2,5.6)



pone = figure(x_axis_label='Star', y_axis_label='Y', x_range=xr, y_range=yr
                  ,active_drag='pan', active_scroll='wheel_zoom',
                  plot_width=900, plot_height=900
                 )



pone.circle(x='x', y='y', color='colors', legend = 'labels',source=source , size =5)
show(pone)

# Plotting SVD

We get the same thing differing by a negative sign.

In [16]:
coldic ={}
for index,typestar in enumerate(np.unique([i.type for i in dicofspectra])):
    coldic[str(typestar)] = colors[index]
coldic
label = starclass
colorsvd = [ coldic[i] for i in starclass ]


dd = {}
dd['x'] = -Vtranspose[0]
dd['y'] = -Vtranspose[1]
dd['labels'] = label
dd['colors'] = colorsvd
source = ColumnDataSource(dd)


p = figure()


p.circle(x='x', y='y', color='colors', legend = 'labels',source=source , size =5)
show(p)

In [17]:
indexstardic ={}
for index,typestar in enumerate(np.unique([i.type for i in dicofspectra])):
    indexstardic[str(typestar)] = index

label = starclass
indexsvd = [ indexstardic[i] for i in starclass ]


dd = {}
dd['x'] = indexsvd
dd['y'] = -Vtranspose[0]
dd['labels'] = label
dd['colors'] = colorsvd
source = ColumnDataSource(dd)






pone = figure()


pone.circle(x='x', y='y', color='colors', legend = 'labels',source=source , size =5)
show(pone)

# PCA

The fit and the fit transform expect a array with shape (n_samples, n_features). So can transpose the matrix to apply the dimensionalty reduction using the PCA decomposition form scikit-learn

In [18]:
matpca = mat.T

print('Shape matpca (n_samples, n_features): {}'.format(matpca.shape))

pca = PCA(n_components=2, svd_solver='full')
pca.fit(matpca)
# plot principal components
mat_pca = pca.transform(matpca)

Shape matpca (n_samples, n_features): (120, 8)


In [19]:
coldic ={}
for index,typestar in enumerate(np.unique([i.type for i in dicofspectra])):
    coldic[str(typestar)] = colors[index]
coldic
label = starclass
colorsvd = [ coldic[i] for i in starclass ]


dd = {}
dd['x'] = mat_pca[:,0]
dd['y'] = mat_pca[:,1]
dd['labels'] = label
dd['colors'] = colorsvd
source = ColumnDataSource(dd)


p = figure()


p.circle(x='x', y='y', color='colors', legend = 'labels',source=source , size =5)
show(p)

# Only one component

In [20]:
indexstardic ={}
for index,typestar in enumerate(np.unique([i.type for i in dicofspectra])):
    indexstardic[str(typestar)] = index

label = starclass
indexsvd = [ indexstardic[i] for i in starclass ]


dd = {}
dd['x'] = indexsvd
dd['y'] = mat_pca[:,0]
dd['labels'] = label
dd['colors'] = colorsvd
source = ColumnDataSource(dd)


pone = figure()


pone.circle(x='x', y='y', color='colors', legend = 'labels',source=source , size =5)
show(pone)

# Covariance Matrix

To double check I can get the PCA by building the covariance matrix, or even the correlation matrix, and then get the eigevalues. 

In [21]:
print('Shape covariance Matrix: {}'.format(pca.get_covariance().shape))

Shape covariance Matrix: (8, 8)


In [22]:
pca.get_covariance()

array([[  1.79667055e+06,   9.41028388e+04,   2.76422091e+05,
         -1.75669157e+03,   1.52861232e+05,  -4.67621908e+04,
         -1.95294473e+02,   4.71916020e-01],
       [  9.41028388e+04,   2.58376827e+06,  -3.39451297e+05,
         -1.14484408e+04,   1.28894105e+06,  -3.82619055e+05,
         -1.39795120e+03,   2.44178212e+00],
       [  2.76422091e+05,  -3.39451297e+05,   1.15219962e+08,
         -1.41388142e+05,  -1.65402941e+05,  -4.47071582e+05,
         -1.03854021e+04,   6.49741668e+01],
       [ -1.75669157e+03,  -1.14484408e+04,  -1.41388142e+05,
          1.78505759e+06,  -1.89604108e+04,   6.26887779e+03,
          3.42024367e+01,  -1.20200816e-01],
       [  1.52861232e+05,   1.28894105e+06,  -1.65402941e+05,
         -1.89604108e+04,   3.86513900e+06,  -6.19245595e+05,
         -2.29202089e+03,   4.16142244e+00],
       [ -4.67621908e+04,  -3.82619055e+05,  -4.47071582e+05,
          6.26887779e+03,  -6.19245595e+05,   1.97119619e+06,
          7.28465847e+02,  -1.5

Getting in from the mean and transposing

If I don't tranpose the matrix just use the .components instarf of the transform

In [23]:
matpca2 = mat

pca = PCA(n_components=2, svd_solver='full')
pca.fit(matpca2)
# plot principal components
mat_pca2 = pca.transform(matpca2)

In [24]:
pca.components_.shape

(2, 120)

# To Do

Deeming talks about a non linear PCA. Maybe try to explore this with the Kernel PCA of Scikit-learn

In [25]:
corrmatrix = np.array([[1,0.285,0.546,0.463,0.497],
                          [0.285,1,0.7,0.603,0.812],
                          [0.546,0.7,1,0.558,0.761],
                          [0.463,0.603,0.558,1,0.615],
                      [0.497,0.812,0.761,0.615,1]])
corrmatrix

array([[ 1.   ,  0.285,  0.546,  0.463,  0.497],
       [ 0.285,  1.   ,  0.7  ,  0.603,  0.812],
       [ 0.546,  0.7  ,  1.   ,  0.558,  0.761],
       [ 0.463,  0.603,  0.558,  1.   ,  0.615],
       [ 0.497,  0.812,  0.761,  0.615,  1.   ]])

In [26]:
U, D, Vtranspose = np.linalg.svd(corrmatrix, full_matrices='False')

In [27]:
D

array([ 3.37132909,  0.75205936,  0.47964116,  0.24572139,  0.151249  ])

In [28]:
V.T[0]

NameError: name 'V' is not defined

Match the Talbe IV and V of the paper. 

## Table VI

In [None]:
np.dot(V.T[0]**2, np.diag(D))

In [None]:
D[0]**2/(np.sum(D**2))

In [None]:
D[0]**2/(np.sum(D**2))

In [29]:
coldic

{'G1V (95128)': '#1f77b4',
 'G2': '#aec7e8',
 'G4V (32923)': '#ff7f0e',
 'G5III+... (157910)': '#ffbb78',
 'G8V (101501)': '#2ca02c',
 'G9IV (100030)': '#98df8a',
 'K1III (18322)': '#d62728',
 'K2III (115136)': '#ff9896',
 'K3III (101673)': '#9467bd',
 'K4III (136726)': '#c5b0d5',
 'K5III (111335)': '#8c564b',
 'K7': '#c49c94'}