In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
pd.options.mode.chained_assignment = None

In [2]:
train_df = pd.read_csv('train.csv', index_col=0)
test_df = pd.read_csv('test.csv', index_col=0)

Y_columns = ['koi_disposition', 'koi_pdisposition', 'koi_score']
misc_columns = ['kepid', 'kepoi_name', 'kepler_name', 'koi_tce_delivname']

train_X = train_df.drop(columns=Y_columns + misc_columns)
train_Y = train_df[Y_columns + misc_columns]

test_X = test_df.drop(columns=Y_columns + misc_columns)
test_Y = test_df[Y_columns + misc_columns]

In [3]:
les = {}
Y = pd.concat([train_Y, test_Y])
for dtype, col in zip(Y.dtypes, Y.columns):
  if dtype == 'object':
    les[col] = LabelEncoder()
    les[col].fit(Y[col])
    train_Y[col] = les[col].transform(train_Y[col])
    test_Y[col] = les[col].transform(test_Y[col])

### KOI Disposition - Exoplanet Archive Disposition

In [4]:
m = DecisionTreeClassifier()
cvs = cross_val_score(m, train_X, train_Y['koi_disposition'], cv=5)
m.fit(train_X, train_Y['koi_disposition'])
score = m.score(test_X, test_Y['koi_disposition'])
print(f'Cross Validation Score: {cvs.min()}\nScore: {score}')

Cross Validation Score: 0.8247863247863247
Score: 0.8610968733982574


In [5]:
m = KNeighborsClassifier()
cvs = cross_val_score(m, train_X, train_Y['koi_disposition'], cv=5)
m.fit(train_X, train_Y['koi_disposition'])
score = m.score(test_X, test_Y['koi_disposition'])
print(f'Cross Validation Score: {cvs.min()}\nScore: {score}')

Cross Validation Score: 0.6259607173356105
Score: 0.6417221937467965


In [6]:
m = RandomForestClassifier()
cvs = cross_val_score(m, train_X, train_Y['koi_disposition'], cv=5)
m.fit(train_X, train_Y['koi_disposition'])
score = m.score(test_X, test_Y['koi_disposition'])
print(f'Cross Validation Score: {cvs.min()}\nScore: {score}')

Cross Validation Score: 0.8743589743589744
Score: 0.8913377754997437


### KOI P-Disposition - Disposition Using Kepler Data

In [7]:
m = DecisionTreeClassifier()
cvs = cross_val_score(m, train_X, train_Y['koi_pdisposition'], cv=5)
m.fit(train_X, train_Y['koi_pdisposition'])
score = m.score(test_X, test_Y['koi_pdisposition'])
print(f'Cross Validation Score: {cvs.min()}\nScore: {score}')

Cross Validation Score: 0.9897435897435898
Score: 0.9958995386981035


In [8]:
m = KNeighborsClassifier()
cvs = cross_val_score(m, train_X, train_Y['koi_pdisposition'], cv=5)
m.fit(train_X, train_Y['koi_pdisposition'])
score = m.score(test_X, test_Y['koi_pdisposition'])
print(f'Cross Validation Score: {cvs.min()}\nScore: {score}')

Cross Validation Score: 0.7754056362083689
Score: 0.7806253203485393


In [9]:
m = RandomForestClassifier()
cvs = cross_val_score(m, train_X, train_Y['koi_pdisposition'], cv=5)
m.fit(train_X, train_Y['koi_pdisposition'])
score = m.score(test_X, test_Y['koi_pdisposition'])
print(f'Cross Validation Score: {cvs.min()}\nScore: {score}')

Cross Validation Score: 0.9871794871794872
Score: 0.99128651973347
