In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

## Importing The Dataset 

In [None]:
dfStarClassification = pd.read_csv("../input/star-type-classification/Stars.csv",sep=",")
dfStarClassification.head()

In [None]:
dfStarClassification.info()

In [None]:
dfStarClassification = dfStarClassification.astype({"Type": str})

In [None]:
dfStarClassification.info()

In [None]:
dfStarClassification.Color.unique()

In [None]:
def ColoumnArrangement(df,ColoumnName):
    dfSeries = pd.Series(data=df[ColoumnName])
    dfSeries = dfSeries.str.upper()
    dfSeries = dfSeries.str.replace(" ","-")
    df = df.drop(columns=[ColoumnName])
    df.insert(loc=4,column=ColoumnName,value= dfSeries)
    return df


In [None]:
dfStarClassification = ColoumnArrangement(dfStarClassification,"Color")

In [None]:
dfStarClassification.head()

In [None]:
dfStarClassification.Color.unique()

In [None]:
dfStarClassification.Spectral_Class.unique()

In [None]:
correlation = dfStarClassification.corr()
correlation

In [None]:
sns.heatmap(correlation,xticklabels=correlation.columns,yticklabels=correlation.columns)

In [None]:
sns.catplot(x="Type",data= dfStarClassification,kind="count")
plt.show()

In [None]:
continuousColumns = ["Temperature","L","R","A_M"]
for item  in continuousColumns:
    sns.boxplot(x="Type", y=item, data=dfStarClassification)
    plt.show()


## Preprocessing

In [None]:
willOneHotEncode = ["Color","Spectral_Class"]
willScale  = continuousColumns

In [None]:
for item in willOneHotEncode:
    dfStarClassification = pd.concat([dfStarClassification,pd.get_dummies(dfStarClassification[item],prefix=item)],axis=1)
    dfStarClassification = dfStarClassification.drop(columns=[item])
dfStarClassification.head()


In [None]:
minMaxScaler = MinMaxScaler()
scaledColums = pd.DataFrame(minMaxScaler.fit_transform(dfStarClassification[willScale]),columns=willScale)
scaledColums.describe()

In [None]:
dfStarClassification.drop(willScale,axis=1,inplace=True)

In [None]:
dfStarClassification = pd.concat([dfStarClassification,scaledColums],axis=1)

In [None]:
dfStarClassification

In [None]:
correlation = dfStarClassification.corr().abs()
correlation.head()

## Train Test Split 

In [None]:
target = ["Type"]
features = dfStarClassification.columns.drop(target)
train,test = train_test_split(dfStarClassification,test_size = 0.22,random_state= 12)
xTrain = train[features]
yTrain = train[target]
xTest  = test[features]
yTest = test[target]


## KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knnModel = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=6, p=2,
                     weights='uniform')

knnModel= KNeighborsClassifier().fit(xTrain,yTrain.values.ravel())


In [None]:
yPred= knnModel.predict(xTest)
accuracy_score(yTest,yPred)


In [None]:
print(classification_report(yTest,yPred))


## DecisionTree

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dTree = DecisionTreeClassifier(criterion="gini", max_depth=4)
start = time.time()
dTree.fit(xTrain, yTrain)
end = time.time()
preddt = dTree.predict(xTest)
print(classification_report(yTest,preddt))
print("Prosesing Time",end-start)

In [None]:
dTree = DecisionTreeClassifier(criterion="entropy", max_depth=3)
start = time.time()
dTree.fit(xTrain, yTrain)
end = time.time()
preddt = dTree.predict(xTest)
print(classification_report(yTest,preddt))
print("Prosesing Time: ",end-start)