In [None]:
# Importing Libraries
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt

In [2]:
# Loading dataset
dataset = pd.read_csv('NASA.csv')
dataset.head()

Unnamed: 0,id,name,est_diameter_min,est_diameter_max,relative_velocity,miss_distance,orbiting_body,sentry_object,absolute_magnitude,hazardous
0,2162635,162635 (2000 SS164),1.198271,2.679415,13569.249224,54839740.0,Earth,False,16.73,False
1,2277475,277475 (2005 WK4),0.2658,0.594347,73588.726663,61438130.0,Earth,False,20.0,True
2,2512244,512244 (2015 YE18),0.72203,1.614507,114258.692129,49798720.0,Earth,False,17.83,False
3,3596030,(2012 BV13),0.096506,0.215794,24764.303138,25434970.0,Earth,False,22.2,False
4,3667127,(2014 GE35),0.255009,0.570217,42737.733765,46275570.0,Earth,False,20.09,True


In [4]:
# dropping unwanted columns: id, name
dataset = dataset.drop(columns = ['id', 'name'])
dataset = shuffle(dataset)

In [5]:
dataset.shape

(90836, 8)

In [6]:
# splitting dataset into features and target
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [8]:
y.shape

(90836,)

In [9]:
# one hot encoding 'orbiting_body' and 'sentry_object'
column_transform = ColumnTransformer(
    transformers=[('encoder', OneHotEncoder(), [4, 5])], remainder='passthrough')
X = np.array(column_transform.fit_transform(X))

In [10]:
# label encoding the target column
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [11]:
# Splitting dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, train_size=0.8, random_state=1)

In [12]:
# Standardization
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [13]:
# Logistic Regression
logisticRegression = LogisticRegression()
logisticRegression.fit(X_train, y_train)

y_pred_lr = logisticRegression.predict(X_test)

logisticRegression_score = accuracy_score(y_test, y_pred_lr)
print(logisticRegression_score)



0.9010347864376926


In [None]:
# Support Vector Classification
classifier = SVC(kernel='rbf', random_state=0)
classifier.fit(X_train, y_train)

y_pred_svc = classifier.predict(X_test)
SVC_score = accuracy_score(y_test, y_pred_svc)
print(SVC_score)