In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as web
import datetime as dt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR

In [2]:
df = pd.read_csv('/content/drive/MyDrive/Dataset/Datasets/META_object.csv')

In [3]:
df.head()

Unnamed: 0,id,name,abs_magnitude,estm_diam,estm_diam_max,orbiting_body,velocity,distance,Target
0,2162117,162117 (1998 SD15),19.14,0.394962,0.883161,Earth,71745.40105,58143623.32,False
1,2349507,349507 (2008 QY),18.5,0.530341,1.185878,Earth,109949.7571,55801047.82,True
2,2455415,455415 (2003 GA),21.45,0.136319,0.304818,Earth,24865.5068,67206887.72,False
3,3132126,(2002 PB),20.63,0.198863,0.444672,Earth,78890.07681,30396444.12,False
4,3557844,(2011 DW),22.7,0.076658,0.171412,Earth,56036.51948,63118626.51,False


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 338199 entries, 0 to 338198
Data columns (total 9 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   id             338199 non-null  int64  
 1   name           338199 non-null  object 
 2   abs_magnitude  338171 non-null  float64
 3   estm_diam      338171 non-null  float64
 4   estm_diam_max  338171 non-null  float64
 5   orbiting_body  338199 non-null  object 
 6   velocity       338199 non-null  float64
 7   distance       338199 non-null  float64
 8   Target         338199 non-null  bool   
dtypes: bool(1), float64(5), int64(1), object(2)
memory usage: 21.0+ MB


In [5]:
df.isnull().sum()

Unnamed: 0,0
id,0
name,0
abs_magnitude,28
estm_diam,28
estm_diam_max,28
orbiting_body,0
velocity,0
distance,0
Target,0


In [6]:
df.fillna(0,inplace=True)

In [7]:
df.isnull().sum()

Unnamed: 0,0
id,0
name,0
abs_magnitude,0
estm_diam,0
estm_diam_max,0
orbiting_body,0
velocity,0
distance,0
Target,0


In [8]:
df.columns

Index(['id', 'name', 'abs_magnitude', 'estm_diam', 'estm_diam_max',
       'orbiting_body', 'velocity', 'distance', 'Target'],
      dtype='object')

In [9]:
column = ['id', 'name','orbiting_body','abs_magnitude','distance']
df.drop(columns=column,inplace=True)

In [10]:
df.head()

Unnamed: 0,estm_diam,estm_diam_max,velocity,Target
0,0.394962,0.883161,71745.40105,False
1,0.530341,1.185878,109949.7571,True
2,0.136319,0.304818,24865.5068,False
3,0.198863,0.444672,78890.07681,False
4,0.076658,0.171412,56036.51948,False


In [11]:
df.corr()

Unnamed: 0,estm_diam,estm_diam_max,velocity,Target
estm_diam,1.0,1.0,0.222416,0.164855
estm_diam_max,1.0,1.0,0.222416,0.164855
velocity,0.222416,0.222416,1.0,0.187021
Target,0.164855,0.164855,0.187021,1.0


In [12]:
print("Total number of Target: {}".format(df.shape[0]))
print("Number of False: {}".format(df[df.Target == False].shape[0]))
print("Number of True: {}".format(df[df.Target == True].shape[0]))

Total number of Target: 338199
Number of False: 295037
Number of True: 43162


In [13]:
df.Target

Unnamed: 0,Target
0,False
1,True
2,False
3,False
4,False
...,...
338194,False
338195,False
338196,False
338197,False


In [14]:
df.shape

(338199, 4)

In [15]:
from sklearn.preprocessing import LabelEncoder
y=df.iloc[:,-1]

# Encode label category
# True -> 1
# false -> 0

gender_encoder = LabelEncoder()
y = gender_encoder.fit_transform(y)
y

array([0, 1, 0, ..., 0, 0, 0])

In [16]:
X=df.iloc[:, :-1]
X.head()

Unnamed: 0,estm_diam,estm_diam_max,velocity
0,0.394962,0.883161,71745.40105
1,0.530341,1.185878,109949.7571
2,0.136319,0.304818,24865.5068
3,0.198863,0.444672,78890.07681
4,0.076658,0.171412,56036.51948


In [17]:
# Scale the data to be between -1 and 1
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [19]:
from sklearn.svm import SVC
from sklearn import metrics

In [None]:

svc=SVC() #Default hyperparameters
svc.fit(X_train,y_train)
y_pred=svc.predict(X_test)
print('Accuracy Score:')
print(metrics.accuracy_score(y_test,y_pred))

Accuracy Score:
0.8729006505026612


In [None]:
svc=SVC(kernel='linear')
svc.fit(X_train,y_train)
y_pred=svc.predict(X_test)
print('Accuracy Score:')
print(metrics.accuracy_score(y_test,y_pred))

Accuracy Score:
0.8727528089887641


In [None]:
svc=SVC(kernel='rbf')
svc.fit(X_train,y_train)
y_pred=svc.predict(X_test)
print('Accuracy Score:')
print(metrics.accuracy_score(y_test,y_pred))

Accuracy Score:
0.8729006505026612


In [None]:
svc=SVC(kernel='poly')
svc.fit(X_train,y_train)
y_pred=svc.predict(X_test)
print('Accuracy Score:')
print(metrics.accuracy_score(y_test,y_pred))

In [None]:
from sklearn.model_selection import GridSearchCV

# defining parameter range
param_grid = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['linear','rbf']}

grid = GridSearchCV(svc, param_grid, refit = True, verbose = 3)

# fitting the model for grid search
grid.fit(X_train, y_train)

In [None]:
from sklearn.svm import SVC
svm_model= SVC()

In [None]:
tuned_parameters = {
 'C': (np.arange(0.1,1,0.1)) , 'kernel': ['linear'],
 'C': (np.arange(0.1,1,0.1)) , 'gamma': [0.01,0.02,0.03], 'kernel': ['rbf'],
 'degree': [2,3,4] ,'gamma':[0.01,0.02,0.03], 'C':(np.arange(0.1,1,0.1)) , 'kernel':['poly']
                   }

In [None]:
from sklearn.model_selection import GridSearchCV

model_svm = GridSearchCV(svm_model, tuned_parameters,cv=10,scoring='accuracy')

In [None]:
model_svm.fit(X_train, y_train)
print(model_svm.best_score_)

In [None]:
print(model_svm.best_params_)

In [None]:
svc=SVC(C= 0.9, degree= 3, gamma= 0.05, kernel='linear')
svc.fit(X_train,y_train)
y_pred=svc.predict(X_test)
print('Accuracy Score:')
print(metrics.accuracy_score(y_test,y_pred))