In [14]:
import pandas as pd 
import numpy as np
import keras
from PIL import Image
import os
import json
import cv2
import torch

In [15]:
directory = "data/dataset_1/0"
images = []

def preprocess_image(image_path, target_size):
    image = Image.open(image_path)
    image = image.resize(target_size)
    image = np.array(image) / 255.0  # Normalize pixel values to [0, 1]
    return image

for filename in os.listdir(directory):
    # Check if the file is an image (you might want to improve this check)
    if filename.endswith(".jpg") or filename.endswith(".png"):
        # Construct the full path to the image file
        filepath = os.path.join(directory, filename)
        image = Image.open(filepath)
        print("Image:", filename)
        print("Image format:", image.format)
        print("Image size:", image.size)
        print("Image mode:", image.mode)
        image = preprocess_image(filepath, target_size=(60,40))
        images.append(image)
        break


# # Path to your JSON file
# json_file_path = "BallSimSample\data.json"
# df = pd.read_json(json_file_path)
# print(df.head())

Image: 0.jpg
Image format: JPEG
Image size: (60, 40)
Image mode: L


In [16]:
def process_json(json_file, img_folder):
    with open(json_file, 'r') as f:
        data = json.load(f)
    
    length = len(data) - 1
    # print(length)
    # Extract data for each sample
    img_files = [data[str(i)]["img_file"] for i in range(length)]
    car_positions = [data[str(i)]["car_pos"] for i in range(length)]
    car_velocities = [data[str(i)]["car_vel"] for i in range(length)]
    ball_in_frames = [data[str(i)]["ball_in_frame"] for i in range(length)]
    ball_positions = data["ball_trj"]["pos"]
    ball_velocities = data["ball_trj"]["vel"]
    
    # Load and store images
    images = []
    for i, img_file in enumerate(img_files):
        img_path = os.path.join(img_folder, img_file)
        # print(img_path)
        img = cv2.imread(img_path)
        if img is not None:
            # Convert image to numpy array and normalize
            img = img.astype(np.float32) / 255.0
            images.append(img)
            # print("SUCCESS")
        else:
            print(f"Error loading image: {img_path}")
    
    # Convert images to numpy array
    images = np.array(images)
    
    # Create a dictionary with the extracted data
    sample_data = {
        'images': images,
        'car_positions': car_positions,
        'car_velocities': car_velocities,
        'ball_in_frames': ball_in_frames,
        'ball_positions': ball_positions,
        'ball_velocities': ball_velocities
    }
    
    return sample_data

def create_dataframe_from_folders(json_root_folder, img_folder):
    all_samples_data = []
    
    # Iterate over subfolders in the root JSON folder
    for i, folder in enumerate(os.listdir(json_root_folder)):
        folder_path = os.path.join(json_root_folder, folder)
        # print(folder_path)
        if os.path.isdir(folder_path):
            # Initialize an empty list to hold data for the current sample
            sample_data = process_json(os.path.join(folder_path, 'data.json'), img_folder+str(i))
            
            # Append the data for the current sample to the list of all samples
            all_samples_data.append(sample_data)
    
    # Create DataFrame from the list of dictionaries
    df = pd.DataFrame(all_samples_data)
    return df

# Example usage:
json_root_folder = 'data/dataset_1'
img_folder = 'data/dataset_1/'
df = create_dataframe_from_folders(json_root_folder, img_folder)
df.head()

Unnamed: 0,images,car_positions,car_velocities,ball_in_frames,ball_positions,ball_velocities
0,"[[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[[2.966482266153105, 3.027535346713439, 3.0876...","[[0.29231126032610155, 0.28769300499707195, 0...."
1,"[[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[[4.406167965707559, 4.457454497927218, 4.5075...","[[0.2464564468427059, 0.24076560924405016, 0.2..."
2,"[[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[[5.288511802797698, 5.4255085884006995, 5.560...","[[0.6563893731302016, 0.6450800900983077, 0.63..."
3,"[[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[[4.145333906935041, 4.174442777541345, 4.2025...","[[0.14068958460443076, 0.13584468615545922, 0...."
4,"[[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[[2.827627505129355, 2.7414890450588323, 2.657...","[[-0.41343995805969874, -0.4048754126102626, -..."


In [17]:

# first_image = df['images'][89]
# for i in range(len(first_image)):
#     # Convert the current image to uint8 (required by cv2.imshow())
#     current_image = (first_image[i] * 255).astype('uint8')
#     # Display the current image
#     cv2.imshow(f'Image {i+1}', current_image)
#     # Wait for a key press
#     cv2.waitKey(0)

# # Close all OpenCV windows
# cv2.destroyAllWindows()

In [18]:
from sklearn.model_selection import train_test_split

X = df[['images', 'car_positions', 'car_velocities']]
y = df[['ball_positions', 'ball_velocities']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

print(X_train)



                                               images  \
0   [[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...   
1   [[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...   
2   [[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...   
3   [[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...   
4   [[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...   
..                                                ...   
75  [[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...   
76  [[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...   
77  [[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...   
78  [[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...   
79  [[[[0. 0. 0.], [0. 0. 0.], [0. 0. 0.], [0. 0. ...   

                                        car_positions  \
0   [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...   
1   [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...   
2   [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...   
3   [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0...   
4   [[0.0, 0.0], [0.0, 0.0], [

In [19]:
from keras.layers import Input, Conv3D, MaxPooling3D, Flatten, Dense, concatenate, LSTM, Reshape
from keras.models import Model

num_images = 10
image_height = 40
image_width = 60 
num_channels = 3
num_classes = 2

# Define inputs
image_input = Input(shape=(num_images, image_height, image_width, num_channels), name='image_input')
velocity_input = Input(shape=(num_images, 2), name='velocity_input')
position_input = Input(shape=(num_images, 2), name='position_input')

# CNN branch for image processing
conv1 = Conv3D(32, kernel_size=(3, 3, 3), activation='relu')(image_input)  
maxpool1 = MaxPooling3D(pool_size=(2, 2, 2))(conv1) 
conv2 = Conv3D(64, kernel_size=(3, 3, 3), activation='relu')(maxpool1)
maxpool2 = MaxPooling3D(pool_size=(2, 2, 2))(conv2)
flatten_image = Flatten()(maxpool2)

# RNN layer 
# NEED RNN for images
lstm_velocity = LSTM(32)(velocity_input)
lstm_position = LSTM(32)(position_input)

# Concatenate features
combined = concatenate([flatten_image, lstm_velocity, lstm_position])

# Dense layers for further processing
dense1 = Dense(128, activation='relu')(combined)

output_positions = Dense(10 * 2, activation='linear')(dense1)
reshaped_positions = Reshape((2, 10))(output_positions)

output_velocities = Dense(10 * 2, activation='linear')(dense1)
reshaped_velocities = Reshape((2, 10))(output_velocities)

# Define model
model = Model(inputs=[image_input, velocity_input, position_input], outputs=[reshaped_positions, reshaped_velocities])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])

# Print model summary
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 image_input (InputLayer)       [(None, 10, 40, 60,  0           []                               
                                 3)]                                                              
                                                                                                  
 conv3d_8 (Conv3D)              (None, 8, 38, 58, 3  2624        ['image_input[0][0]']            
                                2)                                                                
                                                                                                  
 max_pooling3d_8 (MaxPooling3D)  (None, 4, 19, 29, 3  0          ['conv3d_8[0][0]']               
                                2)                                                          

In [20]:
import tensorflow as tf

images_tf = tf.convert_to_tensor(X_train['images'].tolist())
car_positions_tf = tf.convert_to_tensor(X_train['car_positions'].tolist())
car_velocities_tf = tf.convert_to_tensor(X_train['car_velocities'].tolist())
ball_positions_tf = tf.convert_to_tensor(y_train['ball_positions'].tolist())
ball_velocities_tf = tf.convert_to_tensor(y_train['ball_velocities'].tolist())
# print(images_tf.shape)
model.fit([images_tf, car_positions_tf, car_velocities_tf], [ball_positions_tf, ball_velocities_tf], epochs=100, verbose=1)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x238027db040>

In [21]:
test_images_tf = tf.convert_to_tensor(X_test['images'].tolist())
test_car_positions_tf = tf.convert_to_tensor(X_test['car_positions'].tolist())
test_car_velocities_tf = tf.convert_to_tensor(X_test['car_velocities'].tolist())
test_ball_positions_tf = tf.convert_to_tensor(y_test['ball_positions'].tolist())
test_ball_velocities_tf = tf.convert_to_tensor(y_test['ball_velocities'].tolist())

output = model.evaluate([test_images_tf, test_car_positions_tf, test_car_velocities_tf], [test_ball_positions_tf, test_ball_velocities_tf])

print(f"Total Loss: ", output[0])
print(f"Ball Pos Loss: ", output[1])
print(f"Ball Vel Loss: ", output[2])
print(f"Ball Pos Acc: ", output[3])
print(f"Ball Vel Acc: ", output[4])

Total Loss:  4.972076892852783
Ball Pos Loss:  4.608035564422607
Ball Vel Loss:  0.36404114961624146
Ball Pos Acc:  0.30000001192092896
Ball Vel Acc:  0.125
