#### Importing Libraries

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score

from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import preprocessing 
label = preprocessing.LabelEncoder()

In [None]:
df = pd.read_csv("../input/star-type-classification/Stars.csv")
df.head()

#### Data-Description  
Temperature -- K  
L -- L/Lo  
R -- R/Ro  
AM -- Mv  
Color -- General Color of Spectrum

#### Target:  
Type  
from 0 to 5    
Red Dwarf - 0  
Brown Dwarf - 1  
White Dwarf - 2  
Main Sequence - 3  
Super Giants - 4  
Hyper Giants - 5  

#### Math:  
Lo = 3.828 x 10^26 Watts  
(Avg Luminosity of Sun)  
Ro = 6.9551 x 10^8 m  
(Avg Radius of Sun)  

In [None]:
df.dtypes

In [None]:
df['Type'].value_counts()

In [None]:
 df.isnull().any()

In [None]:
df['Color']= label.fit_transform(df['Color'])
df['Spectral_Class']= label.fit_transform(df['Spectral_Class']) 

print(df['Color'].unique())
print(df['Spectral_Class'].unique())

In [None]:
X = df.drop(['Type'],axis=1)
y = df['Type']

### Correaltion Matrix

In [None]:
fig= plt.figure(figsize=(9,9))
sns.heatmap(X.corr(), annot=True,cmap="YlGnBu")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
model1 = RandomForestClassifier(n_estimators=100,random_state=0).fit(X_train, y_train)
RF_pred = model1.predict(X_test)
print('the accuracy',accuracy_score(y_test, RF_pred))

In [None]:
cm = confusion_matrix(y_test, RF_pred)
print(cm)

In [None]:
plt.figure(figsize=(9,9))
plt.imshow(cm, interpolation='nearest', cmap='Pastel1')
plt.title('Confusion matrix', size = 15)
plt.colorbar()
tick_marks = np.arange(6)
plt.xticks(tick_marks, ["0", "1", "2", "3", "4", "5"], rotation=45, size = 5)
plt.yticks(tick_marks, ["0", "1", "2", "3", "4", "5"], size = 5)
plt.tight_layout()
plt.ylabel('Actual label', size = 15)
plt.xlabel('Predicted label', size = 15)
width, height = cm.shape

for x in range(width):
    for y in range(height):
        plt.annotate(str(cm[x][y]), xy=(y, x), 
                    horizontalalignment='center',
                    verticalalignment='center')

In [None]:
DT_clf = DecisionTreeClassifier(random_state=0)
DT_clf.fit(X_train, y_train)
DT_pred=DT_clf.predict(X_test)
print('the accuracy',accuracy_score(y_test,DT_pred))

In [None]:
cm = confusion_matrix(y_test, DT_pred)
print(cm)

In [None]:
plt.figure(figsize=(9,9))
plt.imshow(cm, interpolation='nearest', cmap='Pastel1')
plt.title('Confusion matrix', size = 15)
plt.colorbar()
tick_marks = np.arange(6)
plt.xticks(tick_marks, ["0", "1", "2", "3", "4", "5"], rotation=45, size = 5)
plt.yticks(tick_marks, ["0", "1", "2", "3", "4", "5"], size = 5)
plt.tight_layout()
plt.ylabel('Actual label', size = 15)
plt.xlabel('Predicted label', size = 15)
width, height = cm.shape

for x in range(width):
    for y in range(height):
        plt.annotate(str(cm[x][y]), xy=(y, x), 
                    horizontalalignment='center',
                    verticalalignment='center')