ANALISI DEL DATASET SULLE STELLE 

In [1]:
import pandas as pd
import plotly.graph_objects as go
from ipywidgets import widgets
import pandas as pd
import numpy as np
import altair as alt

In [2]:
dataset = pd.read_csv("Stars_colorfixed.csv")

dataset

Unnamed: 0,Temperature,L,R,A_M,Color,Spectral_Class,Type
0,3068,0.002400,0.1700,16.12,Red,M,Red Dwarf
1,3042,0.000500,0.1542,16.60,Red,M,Red Dwarf
2,2600,0.000300,0.1020,18.70,Red,M,Red Dwarf
3,2800,0.000200,0.1600,16.65,Red,M,Red Dwarf
4,1939,0.000138,0.1030,20.06,Red,M,Red Dwarf
...,...,...,...,...,...,...,...
235,38940,374830.000000,1356.0000,-9.93,Blue,O,Hyper Giants
236,30839,834042.000000,1194.0000,-10.63,Blue,O,Hyper Giants
237,8829,537493.000000,1423.0000,-10.73,White,A,Hyper Giants
238,9235,404940.000000,1112.0000,-11.23,White,A,Hyper Giants


Viene stampata una parte del dataset per mostrarne la struttura. Possiamo vedere che all'interno del dataset sono contenute 7 colonne, ciascuna corripondente ad un diverso attributo.

Gli attributi sono rispettivamente:

1. Temperatura, viene espressa in Kelvin
2. L, è la luminosità della stella cioè la quantità di energia elettromagnetica che viene emessa dalla stella stessa per unità di tempo, ovvero la sua potenza. Viene espresso in relazione alla luminosità del sole
3. R, è il raggio della stella. Viene espresso in relazione al raggio del sole
4. AM, parametro che dipende dalla luminosità intrinseca della stella e dalla sua distanza con la terra
5. Colore, indica il colore dello spettro della stella
6. Classe spettrale a cui appartengono le stelle
7. Tipo di stella, viene assegnata considerando i parametri di colore, spettro di emissione ed albedo. Viene indicato con un numero che indica:
- 0 Red Dwarf
- 1 Brown Dwarf
- 2 White Dwarf
- 3 Main Sequence
- 4 Super Giants
- 5 Hyper Giants

In [3]:
dataset['Spectral_Class'].value_counts()

M    111
B     46
O     40
A     19
F     17
K      6
G      1
Name: Spectral_Class, dtype: int64

In [4]:
dataset['Type'].value_counts()

Main Sequence    40
Brown Dwarf      40
Red Dwarf        40
Super Giants     40
White Dwarf      40
Hyper Giants     40
Name: Type, dtype: int64

In [5]:
dataset['Color'].value_counts()

Red           112
Blue           56
Blue-White     41
White          21
Yellowish       6
Orange          4
Name: Color, dtype: int64

In [6]:
alt.Chart(dataset).mark_point().encode( 
    
    x = alt.X('Temperature',
         scale = alt.Scale(domain = (min(dataset.Temperature), max(dataset.Temperature))),
         axis = alt.Axis(format='K', title='Temperature: K')
         ),
    
    y = alt.Y('L',
         scale = alt.Scale(domain = (min(dataset.L), max(dataset.L))),
         axis = alt.Axis(format='K', title='Brightness')
         ),
    
    size = alt.Size('R', legend = alt.Legend(title = 'Dimension'))
)

Nel grafico sull'asse delle ascisse viene rappresentata la temperatura della stella mentre su quello delle ordinate viene rappresentata la luminosità (il parametro L). Per rappresentare la dimensione della stella sono stati utilizzati i dati del raggio. Il raggio è in relazione alla dimensione della stella: maggiore è il raggio e maggiore è la dimensione della bolla. 

Da questo grafico non è possibile ricavare molte informazioni

In [7]:
# applying normalization formula to our numerical data
def normalize_colums(column):
    dataset[column] = (dataset[column] - dataset[column].min()) / (dataset[column].max() - dataset[column].min())

In [8]:
numerical_columns = ['Temperature', 'L', 'R', 'A_M']

for column in numerical_columns:
    normalize_colums(column)

dataset

Unnamed: 0,Temperature,L,R,A_M,Color,Spectral_Class,Type
0,0.029663,2.731275e-09,0.000083,0.876798,Red,M,Red Dwarf
1,0.028980,4.944550e-10,0.000075,0.891807,Red,M,Red Dwarf
2,0.017367,2.590003e-10,0.000048,0.957473,Red,M,Red Dwarf
3,0.022622,1.412729e-10,0.000078,0.893371,Red,M,Red Dwarf
4,0.000000,6.828189e-11,0.000049,1.000000,Red,M,Red Dwarf
...,...,...,...,...,...,...,...
235,0.972150,4.412776e-01,0.695919,0.062226,Blue,O,Hyper Giants
236,0.759307,9.818959e-01,0.612777,0.040338,Blue,O,Hyper Giants
237,0.181025,6.327765e-01,0.730304,0.037211,White,A,Hyper Giants
238,0.191692,4.767253e-01,0.570694,0.021576,White,A,Hyper Giants


Sono state normalizzate le colonne con i dati scalari all'interno del dataset per poter plottare nuovamente i dati come fatto in precedenza

In [9]:
alt.Chart(dataset).mark_point().encode( 
    
    x = alt.X('Temperature',
         scale = alt.Scale(domain = (min(dataset.Temperature), max(dataset.Temperature))),
         axis = alt.Axis(format='K', title='Temperature')
         ),
    
    y = alt.Y('L',
         scale = alt.Scale(domain = (min(dataset.L), max(dataset.L))),
         axis = alt.Axis(format='K', title='Brightness')
         ),
    
    size = alt.Size('R', legend = alt.Legend(title = 'Dimension'))
)

In [10]:
data = dataset[dataset['Spectral_Class'] == 'O']
data

alt.Chart(data).mark_point().encode( 
    
    x = alt.X('Temperature',
         scale = alt.Scale(domain = (min(dataset.Temperature), max(dataset.Temperature))),
         axis = alt.Axis(format='K', title='Temperature')
         ),
    
    y = alt.Y('L',
         scale = alt.Scale(domain = (min(dataset.L), max(dataset.L))),
         axis = alt.Axis(format='K', title='Brightness')
         ),
    
    size = alt.Size('R', legend = alt.Legend(title = 'Dimension'))
)

In [11]:
data = dataset[dataset['Spectral_Class'] == 'B']
data

alt.Chart(data).mark_point().encode( 
    
    x = alt.X('Temperature',
         scale = alt.Scale(domain = (min(dataset.Temperature), max(dataset.Temperature))),
         axis = alt.Axis(format='K', title='Temperature')
         ),
    
    y = alt.Y('L',
         scale = alt.Scale(domain = (min(dataset.L), max(dataset.L))),
         axis = alt.Axis(format='K', title='Brightness')
         ),
    
    size = alt.Size('R', legend = alt.Legend(title = 'Dimension'))
)

In [12]:
data = dataset[dataset['Spectral_Class'] == 'A']
data

alt.Chart(data).mark_point().encode( 
    
    x = alt.X('Temperature',
         scale = alt.Scale(domain = (min(dataset.Temperature), max(dataset.Temperature))),
         axis = alt.Axis(format='K', title='Temperature')
         ),
    
    y = alt.Y('L',
         scale = alt.Scale(domain = (min(dataset.L), max(dataset.L))),
         axis = alt.Axis(format='K', title='Brightness')
         ),
    
    size = alt.Size('R', legend = alt.Legend(title = 'Dimension'))
)

In [13]:
data = dataset[dataset['Spectral_Class'] == 'F']
data

alt.Chart(data).mark_point().encode( 
    
    x = alt.X('Temperature',
         scale = alt.Scale(domain = (min(dataset.Temperature), max(dataset.Temperature))),
         axis = alt.Axis(format='K', title='Temperature')
         ),
    
    y = alt.Y('L',
         scale = alt.Scale(domain = (min(dataset.L), max(dataset.L))),
         axis = alt.Axis(format='K', title='Brightness')
         ),
    
    size = alt.Size('R', legend = alt.Legend(title = 'Dimension'))
)

In [14]:
data = dataset[dataset['Spectral_Class'] == 'G']
data

alt.Chart(data).mark_point().encode( 
    
    x = alt.X('Temperature',
         scale = alt.Scale(domain = (min(dataset.Temperature), max(dataset.Temperature))),
         axis = alt.Axis(format='K', title='Temperature')
         ),
    
    y = alt.Y('L',
         scale = alt.Scale(domain = (min(dataset.L), max(dataset.L))),
         axis = alt.Axis(format='K', title='Brightness')
         ),
    
    size = alt.Size('R', legend = alt.Legend(title = 'Dimension'))
)

In [15]:
data = dataset[dataset['Spectral_Class'] == 'K']
data

alt.Chart(data).mark_point().encode( 
    
    x = alt.X('Temperature',
         scale = alt.Scale(domain = (min(dataset.Temperature), max(dataset.Temperature))),
         axis = alt.Axis(format='K', title='Temperature')
         ),
    
    y = alt.Y('L',
         scale = alt.Scale(domain = (min(dataset.L), max(dataset.L))),
         axis = alt.Axis(format='K', title='Brightness')
         ),
    
    size = alt.Size('R', legend = alt.Legend(title = 'Dimension'))
)

In [16]:
data = dataset[dataset['Spectral_Class'] == 'M']
data

alt.Chart(data).mark_point().encode( 
    
    x = alt.X('Temperature',
         scale = alt.Scale(domain = (min(dataset.Temperature), max(dataset.Temperature))),
         axis = alt.Axis(format='K', title='Temperature')
         ),
    
    y = alt.Y('L',
         scale = alt.Scale(domain = (min(dataset.L), max(dataset.L))),
         axis = alt.Axis(format='K', title='Brightness')
         ),
    
    size = alt.Size('R', legend = alt.Legend(title = 'Dimension'))
)

In [17]:
categorical_dimensions = ['Type', 'Color', 'Spectral_Class']

dimensions = [dict(values=dataset[label], label=label) for label in categorical_dimensions]

# Build colorscale
color = np.zeros(len(dataset), dtype='uint8')
colorscale = [[0, 'gray'], [1, 'firebrick']]

# Build figure containing a scatterplot and a parallel diagram
fig = go.FigureWidget(
    data=[go.Scatter(x=dataset.Temperature, y=dataset['L'],
    marker={'color': 'gray'}, mode='markers', selected={'marker': {'color': 'firebrick'}},
    unselected={'marker': {'opacity': 0.3}}), go.Parcats(
        domain={'y': [0, 0.4]}, dimensions=dimensions,
        line={'colorscale': colorscale, 'cmin': 0,
              'cmax': 1, 'color': color, 'shape': 'hspline'})
    ])

fig.update_layout(
        height=800, xaxis={'title': 'Temperature'},
        yaxis={'title': 'L', 'domain': [0.6, 1]},
        dragmode='lasso', hovermode='closest')

# Update color callback
def update_color(trace, points, state):
    # Update scatter selection
    fig.data[0].selectedpoints = points.point_inds

    # Update parcats colors
    new_color = np.zeros(len(dataset), dtype='uint8')
    new_color[points.point_inds] = 1
    fig.data[1].line.color = new_color

# Register callback on scatter selection...
fig.data[0].on_selection(update_color)
# and parcats click
fig.data[1].on_click(update_color)

fig

FigureWidget({
    'data': [{'marker': {'color': 'gray'},
              'mode': 'markers',
              'sele…