# Testing the Accuracy of Decision Tree Classifier,K - Nearest Neighbors, and Support Vector Machine of Stars, Galaxies, and Quasars #

# Machine Learning Prediction and Distribution Model of Stars, Quasars, and Galaxies

In [1]:
#Importing Libraries
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import warnings

In [2]:
#Getting Models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from tensorflow import keras
from nbloader import Notebook
from sklearn.metrics import confusion_matrix

%matplotlib inline

In [3]:
#Exploratory Data Analysis
df = pd.read_csv('C:\\Users\\admin\\Desktop\\Datasets\\Skyserverdata.csv')
df.head()

FileNotFoundError: [Errno 2] File b'C:\\Users\\admin\\Desktop\\Datasets\\Skyserverdata.csv' does not exist: b'C:\\Users\\admin\\Desktop\\Datasets\\Skyserverdata.csv'

In [None]:
df.shape

In [None]:
df.describe

In [None]:
#Dropping the not needed columns objid, specobjid, 
df.drop(['objid', 'specobjid',], axis = 1)

In [None]:
#Count of Quasars, Stars, and Galaxies
sns.countplot(x=df['class'], palette='coolwarm')

In [None]:
#Wavelength Distribution Plot
f, axes = plt.subplots(3, 1, figsize=(16, 10), sharex=True)
c = ['STAR', 'GALAXY', 'QSO']

for ax_id in range(3):
    sns.distplot(df.loc[df['class']==c[ax_id],'u'], hist=False, color='purple', ax=axes[ax_id], label='u')
    sns.distplot(df.loc[df['class']==c[ax_id],'g'], hist=False, color='blue', ax=axes[ax_id], label='g')
    sns.distplot(df.loc[df['class']==c[ax_id],'r'], hist=False, color='green', ax=axes[ax_id], label='r')
    sns.distplot(df.loc[df['class']==c[ax_id],'i'], hist=False, color='red', ax=axes[ax_id], label='i')
    axes[ax_id].set(xlabel=c[ax_id], ylabel='Intensity')

# Cosmological Distribution of Galaxies

In [None]:
#3d Model(ra, dec,distance)
from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.cosmology import WMAP9 as cosmo

dfgal = df.loc[(df['class']) == 'GALAXY']


redist = cosmo.comoving_distance(dfgal['redshift'])
dfgal['distance'] = redist.value

dfgal.head()

In [None]:
def cartesian(dist,alpha,delta):
    x = dist*np.cos(np.deg2rad(delta))*np.cos(np.deg2rad(alpha))
    y = dist*np.cos(np.deg2rad(delta))*np.sin(np.deg2rad(alpha))
    z = dist*np.sin(np.deg2rad(delta))
    return x,y,z

cart = cartesian(dfgal['distance'],dfgal['ra'],dfgal['dec'])
dfgal['x_coord'] = cart[0]
dfgal['y_coord'] = cart[1]
dfgal['z_coord'] = cart[2]
dfgal.head()

In [None]:
#Galaxies Distribution
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
#Get the Coordinates
coord = SkyCoord(ra=dfgal['ra']*u.degree, dec = dfgal['dec']*u.degree, distance = redist*u.mpc, frame = 'icrs')



import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D


fig = plt.figure(figsize=(12,10))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(dfgal['x_coord'],dfgal['y_coord'],dfgal['z_coord'], s = 0.7)
ax.set_xlabel('X (mpc)')
ax.set_ylabel('Y (mpc)')
ax.set_zlabel('Z (mpc)')
ax.set_title('Galactic Distribution from SDSS',fontsize=18)
plt.show()

# Star Distribution Model

In [None]:
#3d Model(ra, dec,distance)
from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.cosmology import WMAP9 as cosmo

dfstar = df.loc[(df['class']) == 'STAR']


redist = cosmo.comoving_distance(dfstar['redshift'])
dfstar['distance'] = redist.value

dfstar.head()

In [None]:
def cartesian(dist,alpha,delta):
    x = dist*np.cos(np.deg2rad(delta))*np.cos(np.deg2rad(alpha))
    y = dist*np.cos(np.deg2rad(delta))*np.sin(np.deg2rad(alpha))
    z = dist*np.sin(np.deg2rad(delta))
    return x,y,z

cart = cartesian(dfstar['distance'],dfstar['ra'],dfstar['dec'])
dfstar['x_coord'] = cart[0]
dfstar['y_coord'] = cart[1]
dfstar['z_coord'] = cart[2]
dfstar.head()

In [None]:
#Star Distribution
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
#Get the Coordinates
coord = SkyCoord(ra=dfstar['ra']*u.degree, dec = dfstar['dec']*u.degree, distance = redist*u.mpc, frame = 'icrs')



import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D


fig = plt.figure(figsize=(12,10))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(dfstar['x_coord'],dfstar['y_coord'],dfstar['z_coord'], s = 0.7)
ax.set_xlabel('X (mpc)')
ax.set_ylabel('Y (mpc)')
ax.set_zlabel('Z (mpc)')
ax.set_title('Star Distribution from SDSS',fontsize=18)
plt.show()

# Quasar Distribution Model

In [None]:
#3d Model(ra, dec,distance)
from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.cosmology import WMAP9 as cosmo

dfqua = df.loc[(df['class']) == 'QSO']


redist = cosmo.comoving_distance(dfqua['redshift'])
dfqua['distance'] = redist.value

dfqua.head()

In [None]:
def cartesian(dist,alpha,delta):
    x = dist*np.cos(np.deg2rad(delta))*np.cos(np.deg2rad(alpha))
    y = dist*np.cos(np.deg2rad(delta))*np.sin(np.deg2rad(alpha))
    z = dist*np.sin(np.deg2rad(delta))
    return x,y,z

cart = cartesian(dfqua['distance'],dfqua['ra'],dfqua['dec'])
dfqua['x_coord'] = cart[0]
dfqua['y_coord'] = cart[1]
dfqua['z_coord'] = cart[2]
dfqua.head()

In [None]:
#Star Distribution
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
#Get the Coordinates
coord = SkyCoord(ra=dfqua['ra']*u.degree, dec = dfqua['dec']*u.degree, distance = redist*u.mpc, frame = 'icrs')



import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D


fig = plt.figure(figsize=(12,10))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(dfstar['x_coord'],dfstar['y_coord'],dfstar['z_coord'], s = 0.7)
ax.set_xlabel('X (mpc)')
ax.set_ylabel('Y (mpc)')
ax.set_zlabel('Z (mpc)')
ax.set_title('Star Distribution from SDSS',fontsize=18)
plt.show()

# Pre-processing Data

In [None]:
#Sample
df.head()

In [None]:
#Classify Data
x = df.drop('class', axis=1)
y = df['class']

#Data Scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler(copy=True, with_mean = True, with_std = True)
x=scaler.fit_transform(x)

#Training
x_train, x_test, y_train, y_test= train_test_split(x,y, test_size=0.3, random_state=128)

# Machine Learning Models#

In [None]:
#Decision Tree Classifier
dtclassifier = DecisionTreeClassifier(max_leaf_nodes=15, max_depth=3)

In [None]:
#Linear Classifiers
    #Logistics Regression
LRClassifier = LogisticRegression()
    
    #Naives Bayes Classifier
NBClassifier = MultinomialNB()

In [None]:
#K-Nearest Neighbors
NeneClassifier = KNeighborsClassifier(n_neighbors = 3)

In [None]:
#Support Vector Machine Classifier
SVCModel = SVC()

# Accuracy of Machine Learning Models

In [None]:
#Fitting the Models
dtclassifier.fit(x_train, y_train)
SVCModel.fit(x_train, y_train)
NeneClassifier.fit(x_train, y_train)

In [None]:
#Prediction 
y_preds = dtclassifier.predict(x_test)
y_predsNeNe = NeneClassifier.predict(x_test)
y_predsSVC = SVCModel.predict(x_test)

print("Test Accuracy for Decision Tree Classifier: ", dtclassifier.score(x_test, y_test))
print("Test Accuracy for K-Nearest Neighbors:", NeneClassifier.score(x_test, y_test))
print("Test Accuracy for Support Vector Classifier: ", SVCModel.score(x_test, y_test))

In [None]:
#Confusion Matrix

#Decision Tree Classifier
conf_matrix = confusion_matrix(y_test, y_preds)
conf_matrix_pd = pd.DataFrame(data = conf_matrix,
                      index = ['Galaxy', 'Quasar','Star'],
                      columns = ['Galaxy', 'Quasar', 'Star'])
conf_matrix_pd
    

In [None]:
conf_matrix = confusion_matrix(y_test, y_predsNeNe)
conf_matrix_pd = pd.DataFrame(data = conf_matrix,
                      index = ['Galaxy', 'Quasar','Star'],
                      columns = ['Galaxy', 'Quasar', 'Star'])
conf_matrix_pd

In [None]:
conf_matrix = confusion_matrix(y_test, y_predsSVC)
conf_matrix_pd = pd.DataFrame(data = conf_matrix,
                      index = ['Galaxy', 'Quasar','Star'],
                      columns = ['Galaxy', 'Quasar', 'Star'])
conf_matrix_pd

# Prediction of Machine Learning Model

In [None]:
#Exploratory Data Analysis
df = pd.read_csv('C:\\Users\\admin\\Desktop\\Datasets\\Skyserverdata.csv')
df.head()

In [None]:
df2 = pd.head_csv