In [1]:
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.metrics import log_loss
from sklearn import preprocessing
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from tensorflow.keras.optimizers import Adam
%matplotlib inline

In [2]:
train_data=pd.read_csv('../input/particles-datasets/training.csv.gz')
train_data.head()

In [3]:
test_data=pd.read_csv('../input/particles-datasets/test.csv.gz')
test_data.head()

In [4]:
train_data.shape

In [5]:
test_data.shape

In [6]:
train_data.describe()

In [7]:
train_data.info()

In [8]:
train_data['Label'].value_counts()

In [9]:
label_to_class = {'Electron': 0, 'Ghost': 1, 'Kaon': 2, 'Muon': 3, 'Pion': 4, 'Proton': 5}
class_to_label = {0: 'Electron', 1: 'Ghost', 2: 'Kaon', 3: 'Muon', 4: 'Pion', 5: 'Proton'}

def get_class_ids(labels):
    return np.array([label_to_class[i] for i in labels])

In [10]:
train_data['Class'] = get_class_ids(train_data.Label.values)

In [44]:
train_data['Class'].unique()

In [12]:
features = list(set(train_data.columns) - {'Label', 'Class'})

In [13]:
features

In [14]:
training, validation = train_test_split( train_data ,test_size = 0.1, random_state=0)
print(len(training), len(validation))

## Using KNN

In [15]:
from sklearn.neighbors import KNeighborsClassifier
model_knn = KNeighborsClassifier(n_neighbors=3)
model_knn.fit(training[features].values, training.Class.values)

In [16]:
result_knn=model_knn.predict(validation[features].values)

In [17]:
target_names = ['Electron', 'Ghost', 'Kaon', 'Muon', 'Pion', 'Proton']
print(classification_report(validation.Class.values, result_knn, target_names=target_names))

In [48]:
result_knn[:100]

## Using XGBoost

In [26]:
from xgboost import XGBClassifier
clf_XGB = XGBClassifier()
clf_XGB.fit(training[features].values, training.Class.values)

In [28]:
result_xgb=clf_XGB.predict(validation[features].values)

In [29]:
target_names = ['Electron', 'Ghost', 'Kaon', 'Muon', 'Pion', 'Proton']
print(classification_report(validation.Class.values, result_xgb, target_names=target_names))

In [39]:
plt.figure(figsize=(12,6))
plt.xlabel('Range')
plt.ylabel('Values')
plt.plot(range(100), validation.Class.values[:100], color = 'yellow', label = 'True Values')
plt.plot(range(100), result_xgb[:100], color = 'black', label = 'Predicted Values')
plt.title('Comparing True and Predicted Values')
plt.legend()

plt.tight_layout()

## Using NN

In [21]:
def nn_model(input_dim):
    model = Sequential()
    model.add(Dense(1000, input_dim=input_dim))
    model.add(Activation('tanh'))
    model.add(Dense(500))
    model.add(Activation('tanh'))
    model.add(Dense(100))
    model.add(Activation('tanh'))
    model.add(Dropout(0.2))
    model.add(Dense(6))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=Adam())
    return model

In [22]:
model_nn = nn_model(len(features))
model_nn.fit(training[features].values, np_utils.to_categorical(training.Class.values),  verbose=1,epochs= 20, batch_size=256)

In [23]:
result_nn=model_nn.predict(validation[features].values)

In [24]:
target_names = ['Electron', 'Ghost', 'Kaon', 'Muon', 'Pion', 'Proton']
print(classification_report(validation.Class.values, np.argmax(result_nn,axis=1), target_names=target_names))

In [25]:
log_loss(validation.Class.values, result_nn)

In [40]:
plt.figure(figsize=(12,6))
plt.xlabel('Range')
plt.ylabel('Values')
plt.plot(range(100), validation.Class.values[:100], color = 'yellow', label = 'True Values')
plt.plot(range(100), np.argmax(result_nn[:100],axis=1), color = 'black', label = 'Predicted Values')
plt.title('Comparing True and Predicted Values')
plt.legend()

plt.tight_layout()