## Import the libraries

In [1]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
from sklearn.preprocessing import OneHotEncoder
warnings.filterwarnings("ignore")

2024-05-07 18:42:33.808744: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Check the GPU availability
tf.config.list_physical_devices('GPU')

[]

## Import the dataset

In [3]:
data = pd.read_csv("magic-gamma-telescope_data.csv");

In [4]:
data.head()

Unnamed: 0.1,Unnamed: 0,fLength,fWidth,fSize,fConc,fConc1,fAsym,fM3Long,fM3Trans,fAlpha,fDist,class
0,0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.011,-8.2027,40.092,81.8828,g
1,1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.261,g
2,2,162.052,136.031,4.0612,0.0374,0.0187,116.741,-64.858,-45.216,76.96,256.788,g
3,3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.449,116.737,g
4,4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.648,356.462,g


In [5]:
data.shape

(19020, 12)

## Specify features and labels

In [6]:
feature = data.iloc[:, 1:-1]

In [7]:
feature

Unnamed: 0,fLength,fWidth,fSize,fConc,fConc1,fAsym,fM3Long,fM3Trans,fAlpha,fDist
0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.0110,-8.2027,40.0920,81.8828
1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.2610
2,162.0520,136.0310,4.0612,0.0374,0.0187,116.7410,-64.8580,-45.2160,76.9600,256.7880
3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.4490,116.7370
4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.6480,356.4620
...,...,...,...,...,...,...,...,...,...,...
19015,21.3846,10.9170,2.6161,0.5857,0.3934,15.2618,11.5245,2.8766,2.4229,106.8258
19016,28.9452,6.7020,2.2672,0.5351,0.2784,37.0816,13.1853,-2.9632,86.7975,247.4560
19017,75.4455,47.5305,3.4483,0.1417,0.0549,-9.3561,41.0562,-9.4662,30.2987,256.5166
19018,120.5135,76.9018,3.9939,0.0944,0.0683,5.8043,-93.5224,-63.8389,84.6874,408.3166


In [8]:
feature.shape

(19020, 10)

In [9]:
target = data.iloc[:, -1]
target.shape

(19020,)

In [10]:
target_test = data.iloc[:, -2]
target_test.shape

(19020,)

## One-hot-encode

In [11]:
labels_reshaped = target.to_numpy().reshape(-1, 1)

In [12]:
encoder = OneHotEncoder(sparse_output=False)
one_hot_encoded_labels= encoder.fit_transform(labels_reshaped)

In [13]:
target_encoded = pd.DataFrame(one_hot_encoded_labels, columns=[f'{i}' for i in target.unique()])

In [14]:
target_encoded.shape

(19020, 2)

In [15]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(feature, target_encoded, test_size=0.3, random_state=42)

In [16]:
x_train.shape # without x_valid

(13314, 10)

In [17]:
y_train.shape # without y_valid

(13314, 2)

In [18]:
x_valid, x_train = x_train[:5000], x_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [19]:
x_train.shape

(8314, 10)

In [20]:
x_valid.shape

(5000, 10)

In [21]:
y_train.shape

(8314, 2)

In [22]:
y_valid.shape

(5000, 2)

In [23]:
# from tensorflow.keras.utils import to_categorical

# # Assuming y_train and y_valid are your original labels
# y_train_encoded = to_categorical(y_train, num_classes=64)
# y_valid_encoded = to_categorical(y_valid, num_classes=64)


In [24]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[10]))
# model.add(keras.layers.Dense(100, activation="relu"))
model.add(keras.layers.Dense(2, activation="softmax"))

2024-05-07 18:42:36.284531: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [25]:
# Show the created model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 10)                0         
                                                                 
 dense (Dense)               (None, 2)                 22        
                                                                 
Total params: 22
Trainable params: 22
Non-trainable params: 0
_________________________________________________________________


In [26]:
# Show the layers in the model
model.layers

[<keras.layers.reshaping.flatten.Flatten at 0x70c980527310>,
 <keras.layers.core.dense.Dense at 0x70c981ed5450>]

In [27]:
# Since the labels are not one-hot encoded, we use the Sparse Categorical Crossentropy Loss
model.compile(loss="categorical_crossentropy",
              optimizer="sgd",
              metrics=["accuracy"])

In [28]:
history = model.fit(x_train, y_train, 
                    epochs=20,
                    validation_data=(x_valid, y_valid),
                    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 20: early stopping


In [29]:
# Get the metrics (i.e., loss and accuracy) on the the test set. The function "model.metrics_names" returns the model's metrics
model.evaluate(x_test, y_test)



[30.443187713623047, 0.7215211987495422]