In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
import warnings

In [2]:
data = pd.read_csv('../input/star-dataset/6 class csv.csv')
data

In [3]:
data.info()

# No null values but there are non-numerical values

In [4]:
corr = data.corr()
sns.heatmap(corr, annot=True)

# There is a significant correlation, both positive and negative, 
# between star type and other numerical categories

# Due to this, we will take out the non-numerical categories and
# jut feed the model these ones

# For example, it seems that as star type increases, magnitude decreases
# Let us see how that is

In [5]:
data.boxplot('Temperature (K)', 'Star type')
data.boxplot('Luminosity(L/Lo)', 'Star type')
data.boxplot('Radius(R/Ro)', 'Star type')
data.boxplot('Absolute magnitude(Mv)', 'Star type')

# These box plots better show the visual aspect of correlation 
# between star type and other numerical categories

In [6]:
data.groupby('Star type').mean()

In [7]:
X = np.asarray(data.drop(["Star type", "Star color", "Spectral Class"],axis=1))
y = np.asarray(data["Star type"], dtype=np.float)
y.shape = (len(y),1)

# Create X and y data and convert to numpy arrays to feed to model

In [8]:
scaler = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Split and scale the data for better model performance

In [9]:
# Build the neural net
warnings.filterwarnings('always')
model = keras.models.Sequential([
    keras.layers.Input(shape=X_train.shape),
    keras.layers.Dense(200,activation='relu'),
    keras.layers.Dropout(0.1),
    keras.layers.Dense(150,activation='relu'),
    keras.layers.Dropout(0.1),
    keras.layers.Dense(6, activation='softmax')
])
model.compile(optimizer='adam', loss=keras.losses.SparseCategoricalCrossentropy(),
             metrics=['accuracy'])

In [10]:
# Run model for 50 epochs
model.fit(X_train, y_train, batch_size=16, epochs=50)

In [11]:
# Get testing accuracy
model.evaluate(X_test, y_test)

In [12]:
# Prepare predictions to get f1 score

y_predict = model.predict(X_test)
y_pred = []
for i in y_predict:
    y_pred.append(int(np.argmax(i)))
    
y_pred = np.asarray(y_pred)
y_pred.shape = (len(y_pred), 1)

In [13]:
# Get F1 score to evaluate model's performance
f1_score(y_test, y_pred, average='weighted')

In [14]:
model.summary()