In [9]:
import os
import numpy as np
import pandas as pd
import pickle as pkl
import imageio
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from PIL import Image
import PIL
from astropy.nddata.utils import Cutout2D
from scipy import ndimage
import time
import matplotlib.pyplot as plt

In [2]:
# define functions to normalize image
def normalize(arr):
    arr = np.array(arr)
    arr = arr - np.min(arr)
    arr = arr / np.max(arr)
    return np.array(arr)

def gaussian_normalize(arr):
    arr = np.array(arr)
    arr = arr - np.mean(arr)
    arr = arr / np.std(arr)
    return np.array(arr)

# Bubble or no bubble

In [3]:
df = pkl.load(open("FeaturesDataFrame.p", "rb"))
df = df.query('not (bubblecount == -1 & blobpeakfeature > 1)').copy()
df.reset_index(drop=True, inplace=True)
y = 1-(np.array(df['bubblecount'])<=0)
x = np.array(df[['edgefeature', 'blobfeature', 'blobpeakfeature']])
for i in range(3):
    x[:,i] = gaussian_normalize(x[:,i])
print(x.shape, y.shape)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(23334, 3) (23334,)
(18667, 3) (4667, 3) (18667,) (4667,)


In [10]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(8, activation=tf.nn.relu, input_shape=(3,)),
    tf.keras.layers.Dense(2)
])

In [5]:
model.compile(optimizer = 'adam', 
               loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
               metrics = ["accuracy"])
model.fit(x_train,y_train,epochs = 20)
test_accuracy = model.evaluate(x_test, y_test, verbose = 3)
print("\nLoss: %.4f, Accuracy: %.4f"%tuple(test_accuracy))
y_pred = model.predict_classes(x_test)
print(confusion_matrix(y_test,y_pred))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Loss: 0.1353, Accuracy: 0.9623
[[3182   53]
 [ 123 1309]]


# Number of bubbles

In [6]:
df = pkl.load(open("FeaturesDataFrame.p", "rb"))
df = df.query('not (bubblecount == -1 & blobpeakfeature > 1)').copy()
df.reset_index(drop=True, inplace=True)
y = np.array(df['bubblecount'])+1
x = np.array(df[['edgefeature', 'blobfeature', 'blobpeakfeature']])
for i in range(3):
    x[:,i] = gaussian_normalize(x[:,i])
print(x.shape, y.shape)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(23334, 3) (23334,)
(18667, 3) (4667, 3) (18667,) (4667,)


In [7]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(8, activation=tf.nn.relu, input_shape=(3,)),
    tf.keras.layers.Dense(6)
])

In [8]:
model.compile(optimizer = 'adam', 
               loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
               metrics = ["accuracy"])
model.fit(x_train,y_train,epochs = 20)
test_accuracy = model.evaluate(x_test, y_test, verbose = 3)
print("\nLoss: %.4f, Accuracy: %.4f"%tuple(test_accuracy))
y_pred = model.predict_classes(x_test)
print(confusion_matrix(y_test,y_pred))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Loss: 0.2295, Accuracy: 0.9396
[[3110    0   16    0    0]
 [  11   20   51    0    0]
 [ 139    0 1253    0    0]
 [   2    0   61    2    0]
 [   0    0    2    0    0]]
