# <center> Orbit Classification </center>

## Importing libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import warnings

from sklearn.metrics import accuracy_score,classification_report
%matplotlib inline
warnings.filterwarnings('ignore')

## Importing Data

In [None]:
data = pd.read_csv('../input/orbitclassification/classast - pha.csv')

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data['class'].unique()

## Data Description

a (AU) -- Semi-major axis of the orbit in AU <br>
e -- Eccentricity of the orbit <br>
i (deg) -- Inclination of the orbit with respect to the ecliptic plane and the equinox of J2000 (J2000-Ecliptic) in degrees <br>
w (deg) -- Argument of perihelion (J2000-Ecliptic) in degrees <br>
Node (deg) -- Longitude of the ascending node (J2000-Ecliptic) in degrees <br>
M (deg) -- Mean anomoly at epoch in degrees <br>
q (AU) -- Perihelion distance of the orbit in AU <br>
Q (AU) -- Aphelion distance of the orbit in AU <br>
P (yr) -- Orbital period in Julian years <br>
H (mag) -- Absolute V-magnitude <br>
MOID (AU) -- Minimum orbit intersection distance (the minimum distance between the osculating orbits of the NEO and the Earth <br>
class -- Object classification <br>

## Missing data

In [None]:
sns.heatmap(data.isnull())

## Data Correlation

In [None]:
plt.figure(figsize=(12,8))
sns.heatmap(data.corr(),annot=True)

## EDA

In [None]:
data.columns

In [None]:
sns.pairplot(data=data,hue='class')

In [None]:
plt.figure(figsize=(10,8),num=1)
plt.subplot(2,2,1)
sns.boxplot(x='class',y='a (AU)',palette='CMRmap',data=data)
plt.subplot(2,2,2)
sns.boxplot(x='class',y='i (deg)',palette='CMRmap',data=data)
plt.subplot(2,2,3)
sns.boxplot(x='class',y='w (deg)',palette='CMRmap',data=data)
plt.subplot(2,2,4)
sns.boxplot(x='class',y='Node (deg)',palette='CMRmap', data=data)

In [None]:
plt.figure(figsize=(10,8),num=1)
plt.subplot(2,2,1)
sns.boxplot(x='class',y='M (deg)',palette='CMRmap',data=data)
plt.subplot(2,2,2)
sns.boxplot(x='class',y='q (AU)',palette='CMRmap',data=data)
plt.subplot(2,2,3)
sns.boxplot(x='class',y='Q (AU)',palette='CMRmap',data=data)
plt.subplot(2,2,4)
sns.boxplot(x='class',y='P (yr)',palette='CMRmap', data=data)

## Data Split

In [None]:
X = data.drop('class',axis=1)
X = X.drop('Q (AU)',axis=1)
## Q is dropped due its extremely high correlation with a (AU)
Y = data['class']

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=104)

## Model Creation

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)

In [None]:
accuracy = []
for i in range(1,200):    
    kn = KNeighborsClassifier(n_neighbors=i)
    kn.fit(X_train,Y_train)
    predK = kn.predict(X_test)
    accuracy.append([accuracy_score(Y_test,predK),i])
temp = accuracy[0]
for m in accuracy:
    if temp[0] < m[0]:
        temp=m
kno = KNeighborsClassifier(n_neighbors=temp[1])

In [None]:
from sklearn.svm import SVC
svc = SVC()

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.1,1, 10, 100, 1000,2000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} 
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=3)

In [None]:
from sklearn.metrics import accuracy_score,classification_report
accuracy = []
models = [lr,rfc,knn,kno,svc,grid]
for m in models:
    m.fit(X_train,Y_train)
    prediction = m.predict(X_test)
    accuracy.append([m,accuracy_score(Y_test,prediction)])

In [None]:
temp = accuracy[0]
for m in accuracy:
    if temp[1]<m[1]:
        temp=m

### The best method is thus a Random Forest Classifier as it gives us an accuracy of 99.62%

In [None]:
print(classification_report(Y_test,temp[0].predict(X_test)))
print("Accuracy of this model: ",round(accuracy_score(Y_test,temp[0].predict(X_test))*100,2),'%')