# Testing

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sympy import Matrix
from IPython.display import display, Math
import pandas as pd

In [2]:
def prec_mat(y,yhat):
    
    y_true = len(y[y==1])
    y_false = len(y[y==-1])
    
    yhat_true = len(yhat[yhat==1])
    yhat_false = len(yhat[yhat==-1])
    
    error = y - yhat
    
    inc_true = len(error[error==2])
    inc_false = len(error[error==-2])
    
    cor_true = y_true - inc_true
    cor_false = y_false - inc_false
    
    y_mat = np.array([[cor_true/y_true,inc_false/y_false],[inc_true/y_true,cor_false/y_false]])

    return y_mat

## Least Squared

In [3]:
data = pd.read_csv('nasa_test.csv')
data.drop(['Relative Velocity km per sec','Miss Dist.(kilometers)','Jupiter Tisserand Invariant','Epoch Osculation',
           'Eccentricity','Inclination','Asc Node Longitude','Orbital Period','Perihelion Arg','Perihelion Time','Mean Anomaly',
           'Mean Motion'],axis=1,inplace=True)

In [4]:
data

Unnamed: 0,Absolute Magnitude,Est Dia in KM(max),Orbit Uncertainity,Minimum Orbit Intersection,Semi Major Axis,Perihelion Distance,Aphelion Dist,Hazardous
0,20.4,0.494356,1,0.031146,0.855359,0.690580,1.020137,1
1,23.6,0.113250,7,0.086149,1.008238,0.883224,1.133252,-1
2,20.8,0.411188,0,0.044154,1.322797,0.962833,1.682761,1
3,25.3,0.051765,8,0.095969,2.506961,1.100599,3.913323,-1
4,23.4,0.124177,6,0.140935,1.540760,0.999165,2.082354,-1
...,...,...,...,...,...,...,...,...
464,21.7,0.271669,6,0.135162,2.131253,1.149324,3.113181,-1
465,20.4,0.494356,2,0.034161,1.466590,0.108481,2.824699,1
466,25.6,0.045086,7,0.013373,1.126420,0.991296,1.261545,-1
467,19.7,0.682402,3,0.196366,0.710146,0.362727,1.057565,-1


In [5]:
y = data.Hazardous.values.reshape(-1,1)
X_unscaled = data.drop(['Hazardous'],axis=1).values
X_scale = np.array([32.1,34.83693825,9,0.477891,5.07200846,1.29983165,8.98385155])
X = np.divide(X_unscaled,X_scale)

In [6]:
w_LS = np.array([-3.16260673,-4.34208973,-0.32246162,-2.34655778,-38.23236007,5.30305961,35.44484863])
display(Matrix(np.round(w_LS,1)))

Matrix([
[ -3.2],
[ -4.3],
[ -0.3],
[ -2.3],
[-38.2],
[  5.3],
[ 35.4]])

In [7]:
yhat_LS = np.reshape(np.sign(X@w_LS),y.shape)
error_LS = np.count_nonzero(y - yhat_LS) / len(y)
print('The error using least squared is',np.round(error_LS*100,1),'%')

The error using least squared is 15.8 %


In [8]:
mat_LS = prec_mat(y,yhat_LS)
display(Matrix(np.round(mat_LS*100,1)))

Matrix([
[  0.0,   0.0],
[100.0, 100.0]])

## Least Squared SMOTE

### Initial Guess

In [9]:
data = pd.read_csv('nasa_test.csv')
data.drop(['Relative Velocity km per sec','Miss Dist.(kilometers)','Orbit Uncertainity','Jupiter Tisserand Invariant',
           'Epoch Osculation','Eccentricity','Semi Major Axis','Inclination','Asc Node Longitude','Orbital Period','Perihelion Distance',
           'Perihelion Arg','Aphelion Dist','Perihelion Time','Mean Anomaly','Mean Motion'],axis=1,inplace=True)

In [10]:
data

Unnamed: 0,Absolute Magnitude,Est Dia in KM(max),Minimum Orbit Intersection,Hazardous
0,20.4,0.494356,0.031146,1
1,23.6,0.113250,0.086149,-1
2,20.8,0.411188,0.044154,1
3,25.3,0.051765,0.095969,-1
4,23.4,0.124177,0.140935,-1
...,...,...,...,...
464,21.7,0.271669,0.135162,-1
465,20.4,0.494356,0.034161,1
466,25.6,0.045086,0.013373,-1
467,19.7,0.682402,0.196366,-1


In [11]:
y = data.Hazardous.values.reshape(-1,1)
X_unscaled = data.drop(['Hazardous'],axis=1).values
X_scale = np.array([32.1,34.83693825,0.477891])
X = np.divide(X_unscaled,X_scale)

In [12]:
w_LS_S_G = np.array([-0.00911348044,15.9325905,-2.81304124])
display(Matrix(np.round(w_LS_S_G,2)))

Matrix([
[-0.01],
[15.93],
[-2.81]])

In [13]:
yhat_LS_S_G = np.reshape(np.sign(X@w_LS_S_G),y.shape)
error_LS_S_G = np.count_nonzero(y - yhat_LS_S_G) / len(y)
print('The error using least squared guess with SMOTE is',np.round(error_LS_S_G*100,1),'%')

The error using least squared guess with SMOTE is 13.4 %


In [14]:
mat_LS_S_G = prec_mat(y,yhat_LS_S_G)
display(Matrix(np.round(mat_LS_S_G*100,1)))

Matrix([
[73.0, 10.9],
[27.0, 89.1]])

### Ranking Results

In [15]:
data = pd.read_csv('nasa_test.csv')
data.drop(['Est Dia in KM(max)','Relative Velocity km per sec','Miss Dist.(kilometers)','Orbit Uncertainity',
           'Jupiter Tisserand Invariant','Epoch Osculation','Eccentricity','Inclination','Asc Node Longitude','Orbital Period',
           'Perihelion Arg','Perihelion Time','Mean Anomaly','Mean Motion'],axis=1,inplace=True)

In [16]:
data

Unnamed: 0,Absolute Magnitude,Minimum Orbit Intersection,Semi Major Axis,Perihelion Distance,Aphelion Dist,Hazardous
0,20.4,0.031146,0.855359,0.690580,1.020137,1
1,23.6,0.086149,1.008238,0.883224,1.133252,-1
2,20.8,0.044154,1.322797,0.962833,1.682761,1
3,25.3,0.095969,2.506961,1.100599,3.913323,-1
4,23.4,0.140935,1.540760,0.999165,2.082354,-1
...,...,...,...,...,...,...
464,21.7,0.135162,2.131253,1.149324,3.113181,-1
465,20.4,0.034161,1.466590,0.108481,2.824699,1
466,25.6,0.013373,1.126420,0.991296,1.261545,-1
467,19.7,0.196366,0.710146,0.362727,1.057565,-1


In [17]:
y = data.Hazardous.values.reshape(-1,1)
X_unscaled = data.drop(['Hazardous'],axis=1).values
X_scale = np.array([32.1,0.477891,5.07200846,1.29983165,8.98385155])
X = np.divide(X_unscaled,X_scale)

In [18]:
w_LS_S = np.array([-5.14142661,-3.93357432,-74.85054776,10.34111043,68.84737334])
display(Matrix(np.round(w_LS_S,1)))

Matrix([
[ -5.1],
[ -3.9],
[-74.9],
[ 10.3],
[ 68.8]])

In [19]:
yhat_LS_S = np.reshape(np.sign(X@w_LS_S),y.shape)
error_LS_S = np.count_nonzero(y - yhat_LS_S) / len(y)
print('The error using least squared with SMOTE is',np.round(error_LS_S*100,1),'%')

The error using least squared with SMOTE is 15.8 %


In [20]:
mat_LS_S = prec_mat(y,yhat_LS_S)
display(Matrix(np.round(mat_LS_S*100,1)))

Matrix([
[  0.0,   0.0],
[100.0, 100.0]])

## LASSO SMOTE

In [21]:
data = pd.read_csv('nasa_test.csv')
data.drop(['Est Dia in KM(max)','Relative Velocity km per sec','Miss Dist.(kilometers)','Orbit Uncertainity',
           'Epoch Osculation','Eccentricity','Semi Major Axis','Inclination','Asc Node Longitude','Perihelion Arg',
           'Perihelion Time','Mean Anomaly'],axis=1,inplace=True)
data.drop(['Mean Motion'],axis=1,inplace=True)

In [22]:
data

Unnamed: 0,Absolute Magnitude,Minimum Orbit Intersection,Jupiter Tisserand Invariant,Orbital Period,Perihelion Distance,Aphelion Dist,Hazardous
0,20.4,0.031146,6.867,288.948855,0.690580,1.020137,1
1,23.6,0.086149,5.954,369.779900,0.883224,1.133252,-1
2,20.8,0.044154,4.884,555.697196,0.962833,1.682761,1
3,25.3,0.095969,3.221,1449.839167,1.100599,3.913323,-1
4,23.4,0.140935,4.367,698.555256,0.999165,2.082354,-1
...,...,...,...,...,...,...,...
464,21.7,0.135162,3.567,1136.451278,1.149324,3.113181,-1
465,20.4,0.034161,3.902,648.726258,0.108481,2.824699,1
466,25.6,0.013373,5.527,436.665711,0.991296,1.261545,-1
467,19.7,0.196366,7.953,218.584650,0.362727,1.057565,-1


In [23]:
y = data.Hazardous.values.reshape(-1,1)
X_unscaled = data.drop(['Hazardous'],axis=1).values
X_scale = np.array([32.1,0.477891,9.025,4172.23134,1.29983165,8.98385155])
X = np.divide(X_unscaled,X_scale)

In [24]:
w_LASSO = np.array([-4.85650151,-3.83907764,2.60652119,-15.1986243,2.2952872,14.65899485])
display(Matrix(np.round(w_LASSO,1)))

Matrix([
[ -4.9],
[ -3.8],
[  2.6],
[-15.2],
[  2.3],
[ 14.7]])

In [25]:
yhat_LASSO = np.reshape(np.sign(X@w_LASSO),y.shape)
error_LASSO = np.count_nonzero(y - yhat_LASSO) / len(y)
print('The error using LASSO is',np.round(error_LASSO*100,1),'%')

The error using LASSO is 19.6 %


In [26]:
mat_LASSO = prec_mat(y,yhat_LASSO)
display(Matrix(np.round(mat_LASSO*100,1)))

Matrix([
[100.0, 23.3],
[  0.0, 76.7]])

## Neural Network