In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, plot_confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier


In [None]:
stars = pd.read_csv('../input/star-type-classification/Stars.csv')
stars.head()

 - Temperature -- K
 - L -- luminosity
 - R -- radius
 - AM -- Absolute Magnitute 
 - Color -- General Color of Spectrum
 - Spectral_Class -- O,B,A,F,G,K,M / SMASS - https://en.wikipedia.org/wiki/Asteroid_spectral_types
 - Type -- Red Dwarf, Brown Dwarf, White Dwarf, Main Sequence , Super Giants, Hyper Giants

In [None]:
stars.info()

In [None]:
stars.describe()

In [None]:
stars.describe(include='object')

In [None]:
stars.isnull().sum()

### Consider the data

In [None]:
_, ax = plt.subplots(figsize=(20, 20))
sns.heatmap(stars.corr(), annot=True, linewidths=.5, fmt= '.1f', ax=ax)

plt.title('Correlation', fontsize = 30)
plt.xlabel('Features', fontsize = 15)
plt.ylabel('Features', fontsize = 15)

plt.show()

In [None]:
sns.pairplot(stars, hue = 'Type', size = 3)
plt.show()

In [None]:
sns.set(rc={'figure.figsize':(26, 10)})
ax = sns.countplot(stars['Color'])
ax.set_title('Color of stars')

In [None]:
sns.set(rc={'figure.figsize':(12, 10)})
ax = sns.countplot(stars['Spectral_Class'], palette='Set2')
ax.set_title('Asteroid spectral types')

In [None]:
sns.set(rc={'figure.figsize':(12, 10)})
ax = sns.countplot(stars['Type'], palette="Set2")
ax.set_title('Types of stars')

In [None]:
for col in stars.columns.drop("Type"):
    plt.figure(figsize=(14,4))
    plt.title(f'Relation between Type and {col}')
    sns.lineplot(x="Type", y=col, data=stars)
    plt.show()

In [None]:
type_mean = stars.groupby("Type")["Temperature"].mean()

plt.subplots(figsize=(10, 8))
plt.title("Relation between Type and Temperature")
sns.barplot(type_mean.index, type_mean.values, palette="Set2")
plt.show()

In [None]:
l_mean = stars.groupby("Type")["L"].mean()

plt.subplots(figsize=(10, 8))
plt.title("Relation between Type and Luminosity")
sns.barplot(l_mean.index, l_mean.values, palette="Set2")
plt.show()

In [None]:
r_mean = stars.groupby("Type")["R"].mean()

plt.subplots(figsize=(10, 8))
plt.title("Relation between Type and Radius")
sns.barplot(r_mean.index, r_mean.values, palette="Set2")
plt.show()

In [None]:
am_mean = stars.groupby("Type")["A_M"].mean()

plt.subplots(figsize=(10, 8))
plt.title("Relation between Type and Absolute Magnitute")
sns.barplot(am_mean.index, am_mean.values, palette="Set2")
plt.show()

### Data preparation and modeling

In [None]:
stars = pd.get_dummies(data=stars,columns=["Color","Spectral_Class"],drop_first=True)

In [None]:
stars.head().transpose()

In [None]:
x = stars.drop('Type', axis=1)
y = stars['Type']

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

### Let's see which models will give the best result without setting parameters

In [None]:
!pip install lazypredict
from lazypredict.Supervised import LazyClassifier

In [None]:
clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models,predictions = clf.fit(x_train, x_test, y_train, y_test)

In [None]:
print(models)

In [None]:
from lazypredict.Supervised import LazyRegressor

In [None]:
reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(x_train, x_test, y_train, y_test)

In [None]:
print(models)