In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 1- Importing libraries and data

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow import feature_column

In [None]:
df = pd.read_csv("/kaggle/input/star-dataset/6 class csv.csv")
df.rename(columns={"Absolute magnitude(Mv)": "A_M", "Luminosity(L/Lo)":"L", "Temperature (K)":"Temperature",
                  "Radius(R/Ro)":"R", "Star color":"Color", "Spectral Class":"class"}, inplace=True)
df.head()

In [None]:
df.describe()

# 2-Data visualization & EDA

### a. Distributon of the temprature

In [None]:
figure= plt.figure(figsize=(10,10))
sns.displot(df['Temperature'])

### b. Relation between star color and temperature

In [None]:
figure= plt.figure(figsize=(20,10))
sns.boxenplot(x='Color',y='Temperature',data=df)

### c. Relation between Spectral class and Luminosity

In [None]:
figure= plt.figure(figsize=(20,10))
sns.boxenplot(x='class',y='L',data=df,palette='winter')

# 3. Data pre-processing

In [None]:
train, val = train_test_split(df, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')

In [None]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('Star type')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds.prefetch(tf.data.AUTOTUNE)
# small batch size for visualization
train_ds = df_to_dataset(train, batch_size=5)
val_ds = df_to_dataset(val, batch_size=5, shuffle=False)

In [None]:
for x, y in train_ds.take(1):
    print(x)
    print(y)

# 4. Feature engineering

In [None]:
t = feature_column.numeric_column('Temperature')
temperature = feature_column.bucketized_column(t, boundaries=[3500, 5000, 6000, 7500, 11000, 25000])
l = feature_column.numeric_column('L')
luminosity = feature_column.bucketized_column(l, boundaries=[0.4, 1.2, 6, 80, 20000, 1400000])
r = feature_column.numeric_column('R')
radius = feature_column.bucketized_column(r, boundaries=[0.4, 9, 11, 13, 25, 70, 150])
am = feature_column.numeric_column('A_M')
color = feature_column.categorical_column_with_vocabulary_list("Color", df["Color"].unique())
color_one_hot = feature_column.indicator_column(color)
Spectral = feature_column.categorical_column_with_vocabulary_list("class", df["class"].unique())
Spectral_one_hot = feature_column.indicator_column(Spectral)
feature_column = [temperature, luminosity, radius, am, color_one_hot, Spectral_one_hot]

# 5. Training

In [None]:
train_ds = df_to_dataset(train, batch_size=64)
val_ds = df_to_dataset(val,batch_size=64, shuffle=False)
model = tf.keras.Sequential([
    tf.keras.layers.DenseFeatures(feature_column),
    tf.keras.layers.Dense(128),
    tf.keras.layers.Dense(64),
    tf.keras.layers.Dense(6, activation="softmax")])
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

history = model.fit(train_ds,
          validation_data=val_ds,
          epochs=50)

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()