<a href="https://colab.research.google.com/github/marcinwolter/MachineLearning-KISD-2025/blob/main/HIGGS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setup

In [1]:
import numpy as np
import keras
from keras import layers

import urllib.request # Added for downloading the file
from numpy import loadtxt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

## Prepare the data

In [2]:
# Model / data parameters

# Download the dataset
url = 'https://archive.ics.uci.edu/static/public/280/higgs.zip'
#urllib.request.urlretrieve(url, 'higgs.zip') # Downloads and saves as 'higgs.zip'

#!unzip higgs.zip
# load the dataset and limit the number of rows
dataset = loadtxt('HIGGS.csv.gz', delimiter=',', max_rows=1000000)
#print("Dataset")
#print(dataset[0:3])


y = dataset[:,0]
x_all = StandardScaler().fit_transform(dataset[:,1:])
x_low = x_all[:,0:21]
x_high = x_all[:,21:28]

print(x_high[0:3])
print(x_low[0:3])
print(x_all[0:3])

x_all_train, x_all_test, y_all_train, y_all_test = \
    train_test_split(x_all, y, test_size=.3, random_state=42)

x_high_train, x_high_test, y_high_train, y_high_test = \
    train_test_split(x_high, y, test_size=.3, random_state=42)

x_low_train, x_low_test, y_low_train, y_low_test = \
    train_test_split(x_low, y, test_size=.3, random_state=42)




[[ 0.47534187 -0.11938089 -0.44080864 -0.22588748 -0.47893775 -0.12218181
  -0.26562841]
 [-1.08925726 -0.50512521 -0.39442728 -0.07985097 -0.36837201 -0.11230662
  -0.51574745]
 [-0.18530108  0.21963763 -0.39447261 -0.14713863 -0.32359464 -0.45865679
  -0.57393828]]
[[-2.16420160e-01 -6.30354073e-01  2.25333975e-01 -1.11938335e+00
  -6.84758941e-01 -4.97529583e-01 -2.45206999e-01 -1.08585447e+00
  -9.73218727e-01  7.63660961e-01 -6.49309390e-01  9.23623406e-01
   1.01755001e-01  3.01479053e-01 -1.56677171e+00 -1.04115025e+00
  -8.37120754e-01 -6.48530521e-01 -1.00020820e-02 -4.37943041e-02
   1.50322546e+00]
 [-1.48734823e-01  3.25961377e-01  3.58283181e-01  8.34213519e-01
  -3.10324331e-01  2.20950514e-01 -5.50988535e-01 -1.57895139e+00
   1.14125374e+00 -3.60909146e-01 -2.13044986e-01  1.26196524e+00
   1.15720626e+00 -1.01175786e+00 -1.25259279e+00  7.28002584e-01
  -8.37120754e-01 -1.16107439e+00 -1.12975661e+00  9.00292205e-04
  -7.13288646e-01]
 [-3.41103637e-01  1.45808436e+00 

## Build the model

In [3]:
input_shape_high = (7,)

model_high = keras.Sequential(
    [
        keras.Input(shape=input_shape_high),
        layers.Dense(128, activation="relu"),
        layers.Dense(128, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(128, activation="relu"),
        layers.Dense(128, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(1, activation="sigmoid"),
    ]
)

model_high.summary()

## Train the model

In [4]:
batch_size = 128
epochs = 50

model_high.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

model_high.fit(x_high_train, y_high_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

Epoch 1/50
[1m4922/4922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 3ms/step - accuracy: 0.6848 - loss: 0.5826 - val_accuracy: 0.7051 - val_loss: 0.5557
Epoch 2/50
[1m4922/4922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 3ms/step - accuracy: 0.7028 - loss: 0.5597 - val_accuracy: 0.7080 - val_loss: 0.5532
Epoch 3/50
[1m4922/4922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - accuracy: 0.7050 - loss: 0.5565 - val_accuracy: 0.7096 - val_loss: 0.5506
Epoch 4/50
[1m4922/4922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 3ms/step - accuracy: 0.7084 - loss: 0.5523 - val_accuracy: 0.7114 - val_loss: 0.5480
Epoch 5/50
[1m4922/4922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - accuracy: 0.7087 - loss: 0.5514 - val_accuracy: 0.7105 - val_loss: 0.5479
Epoch 6/50
[1m4922/4922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - accuracy: 0.7097 - loss: 0.5506 - val_accuracy: 0.7112 - val_loss: 0.5478
Epoch 7/50

<keras.src.callbacks.history.History at 0x79002b0fb890>

## Evaluate the trained model

In [5]:
score = model_high.evaluate(x_high_test, y_high_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.545439600944519
Test accuracy: 0.7138199806213379


In [6]:
input_shape_low = (21,)

model_low = keras.Sequential(
    [
        keras.Input(shape=input_shape_low),
        layers.Dense(128, activation="relu"),
        layers.Dense(128, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(128, activation="relu"),
        layers.Dense(128, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(128, activation="relu"),
        layers.Dense(128, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(1, activation="sigmoid"),
    ]
)

model_low.summary()

## Train the model

In [7]:
batch_size = 128
epochs = 50

model_low.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

model_low.fit(x_low_train, y_low_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

Epoch 1/50
[1m4922/4922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 4ms/step - accuracy: 0.6142 - loss: 0.6495 - val_accuracy: 0.6602 - val_loss: 0.6117
Epoch 2/50
[1m4922/4922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 3ms/step - accuracy: 0.6635 - loss: 0.6116 - val_accuracy: 0.6740 - val_loss: 0.6017
Epoch 3/50
[1m4922/4922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - accuracy: 0.6743 - loss: 0.6001 - val_accuracy: 0.6829 - val_loss: 0.5910
Epoch 4/50
[1m4922/4922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - accuracy: 0.6814 - loss: 0.5929 - val_accuracy: 0.6843 - val_loss: 0.5887
Epoch 5/50
[1m4922/4922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3ms/step - accuracy: 0.6859 - loss: 0.5877 - val_accuracy: 0.6896 - val_loss: 0.5848
Epoch 6/50
[1m4922/4922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - accuracy: 0.6891 - loss: 0.5844 - val_accuracy: 0.6923 - val_loss: 0.5800
Epoch 7/50

<keras.src.callbacks.history.History at 0x78fff0200790>

## Evaluate the trained model

In [8]:
score = model_low.evaluate(x_low_test, y_low_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.5514819025993347
Test accuracy: 0.7167533040046692
