In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler

In [2]:
df = pd.read_csv('classes.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240 entries, 0 to 239
Data columns (total 7 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Temperature (K)         240 non-null    int64  
 1   Luminosity(L/Lo)        240 non-null    float64
 2   Radius(R/Ro)            240 non-null    float64
 3   Absolute magnitude(Mv)  240 non-null    float64
 4   Star type               240 non-null    int64  
 5   Star color              240 non-null    object 
 6   Spectral Class          240 non-null    object 
dtypes: float64(3), int64(2), object(2)
memory usage: 13.3+ KB


In [4]:
df.replace({'Spectral Class':{'M':0, 
                              'A':1, 
                              'B':2, 
                              'F':3, 
                              'O':4, 
                              'K':5, 
                              'G':6 }}, inplace=True)

In [5]:
df.replace({'Star color':{'Red': 1,
                          'Blue': 2,
                          'Blue-white': 4,
                          'Blue White': 4,
                          'yellow-white': 3,
                          'White': 0, 
                          'Blue white': 4,
                          'white': 0, 
                          'Yellowish White': 3, 
                          'Whitish': 0,
                          'yellowish': 3,
                          'Orange': 1,
                          'Blue ': 2,
                          'White-Yellow': 3,
                          'Orange-Red': 1,
                          'Yellowish': 3, 
                          'Blue-White': 4,
                          'Blue white ': 4,
                          'Pale yellow orange': 3}}, inplace=True)

In [6]:
df["Temperature (K)"] = np.log(df["Temperature (K)"])
df["Luminosity(L/Lo)"] = np.log(df["Luminosity(L/Lo)"])
df["Radius(R/Ro)"] = np.log(df["Radius(R/Ro)"])

In [7]:
scaler = MinMaxScaler()

In [8]:
df["Temperature (K)"] = scaler.fit_transform(np.expand_dims(df["Temperature (K)"], axis = 1))
df["Luminosity(L/Lo)"] = scaler.fit_transform(np.expand_dims(df["Luminosity(L/Lo)"], axis = 1))
df["Radius(R/Ro)"] = scaler.fit_transform(np.expand_dims(df["Radius(R/Ro)"], axis = 1))
df["Absolute magnitude(Mv)"] = scaler.fit_transform(np.expand_dims(df["Absolute magnitude(Mv)"], axis = 1))
df["Star color"] = scaler.fit_transform(np.expand_dims(df["Star color"], axis = 1))
df["Spectral Class"] = scaler.fit_transform(np.expand_dims(df["Spectral Class"], axis = 1))

In [9]:
df.head()

Unnamed: 0,Temperature (K),Luminosity(L/Lo),Radius(R/Ro),Absolute magnitude(Mv),Star type,Star color,Spectral Class
0,0.151602,0.147329,0.243442,0.876798,0,0.25,0.0
1,0.14879,0.079381,0.235546,0.891807,0,0.25,0.0
2,0.096917,0.057254,0.202094,0.957473,0,0.25,0.0
3,0.121402,0.039691,0.238535,0.893371,0,0.25,0.0
4,0.0,0.023617,0.202884,1.0,0,0.25,0.0


In [10]:
x = df.drop(["Star type"], axis = 1)
y = df["Star type"]

In [11]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [12]:
x_train_tensor = torch.from_numpy(x_train.to_numpy()).float()
y_train_tensor = torch.from_numpy(y_train.to_numpy()).long()
x_test_tensor = torch.from_numpy(x_test.to_numpy()).float()
y_test_tensor = torch.from_numpy(y_test.to_numpy()).long()

In [13]:
class CNN(nn.Module):
    def __init__(self,input_d,output_d,layer_s,dropout):
        super(CNN, self).__init__()
        self.myn = nn.Sequential(
        nn.Dropout(dropout),
        nn.Linear(input_d,layer_s[0]),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(layer_s[0],layer_s[1]),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(layer_s[1], layer_s[2]),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(layer_s[2], layer_s[3]),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(layer_s[3], layer_s[4]),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(layer_s[4], output_d),
        

        )
    def forward(self, input_tensor):
        output_tensor = self.myn(input_tensor)
        return output_tensor

In [19]:
model = CNN(x_train_tensor.shape[1], len(torch.unique(y_train_tensor)), [1024,1024,512,256,128], 0.2)

In [20]:
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)
epoches = 1000
loss_vals_train = []

In [24]:
for i in range(epoches):
    y_pred = model(x_train_tensor)
    loss = loss_func(y_pred[:-15], y_train_tensor[:-15])
    loss_vals_train.append(loss)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [25]:
model.eval()

CNN(
  (myn): Sequential(
    (0): Dropout(p=0.2, inplace=False)
    (1): Linear(in_features=6, out_features=1024, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=1024, out_features=1024, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.2, inplace=False)
    (7): Linear(in_features=1024, out_features=512, bias=True)
    (8): ReLU()
    (9): Dropout(p=0.2, inplace=False)
    (10): Linear(in_features=512, out_features=256, bias=True)
    (11): ReLU()
    (12): Dropout(p=0.2, inplace=False)
    (13): Linear(in_features=256, out_features=128, bias=True)
    (14): ReLU()
    (15): Dropout(p=0.2, inplace=False)
    (16): Linear(in_features=128, out_features=6, bias=True)
  )
)

In [26]:
y_pred = model(x_test_tensor)
y_pred = np.argmax(y_pred.detach().numpy(), axis = 1)
accuracy_score(y_test_tensor, y_pred)

1.0