# Stellar Classification

### Import Packages

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import seaborn as sn
import matplotlib.pyplot as plt

### Read data and check data types

In [2]:
df = pd.read_csv('stellar_data.csv')

In [3]:
df.dtypes

obj_ID         float64
alpha          float64
delta          float64
u              float64
g              float64
r              float64
i              float64
z              float64
run_ID           int64
rerun_ID         int64
cam_col          int64
field_ID         int64
spec_obj_ID    float64
class           object
redshift       float64
plate            int64
MJD              int64
fiber_ID         int64
dtype: object

### Check for null values

In [4]:
df.isnull().values.any()

False

### Separate target variable from the rest of the data

In [5]:
target = df['class'].copy()
data = df.drop(columns=['class']).copy()

In [6]:
target.value_counts()

GALAXY    59445
STAR      21594
QSO       18961
Name: class, dtype: int64

### Separate data into training and testing set

In [7]:
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.3)

### Feature Scaling

In [8]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

### Fit logistic regression model to training set

In [9]:
classifier = LogisticRegression(max_iter=1000, random_state=0)
classifier.fit(x_train, y_train)

LogisticRegression(max_iter=1000, random_state=0)

### Predict the test result

In [10]:
y_pred = classifier.predict(x_test)

### Create the confusion matrix

In [11]:
confusion_matrix = pd.crosstab(y_test, y_pred, rownames=['Actual'], colnames=['Predicted'])
confusion_matrix

Predicted,GALAXY,QSO,STAR
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
GALAXY,17300,258,352
QSO,709,4886,4
STAR,0,10,6481


### Calculate evaluation metrics

In [12]:
metrics.accuracy_score(y_test, y_pred)

0.9555666666666667

In [13]:
report = metrics.classification_report(y_test, y_pred, digits=3, output_dict=True)
report = pd.DataFrame(report).transpose()
report

Unnamed: 0,precision,recall,f1-score,support
GALAXY,0.960631,0.965941,0.963278,17910.0
QSO,0.948002,0.872656,0.90877,5599.0
STAR,0.94793,0.998459,0.972539,6491.0
accuracy,0.955567,0.955567,0.955567,0.955567
macro avg,0.952188,0.945685,0.948196,30000.0
weighted avg,0.955526,0.955567,0.955109,30000.0
