In [1]:
import cv2
import csv
import numpy as np
import pandas as pd
import os
from tqdm import tqdm, keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224
IMAGE_SIZE = (IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS = 1
EPOCHS = 10

filenames = os.listdir("/kaggle/input/sports-classification/Train")
categories = []
for filename in filenames:
    category = filename.split('_')[0]
    if category == 'Basketball':
        categories.append(0)
    elif category == 'Football':
        categories.append(1)
    elif category == 'Rowing':
        categories.append(2)
    elif category == 'Swimming':
        categories.append(3)
    elif category == 'Tennis':
        categories.append(4)
    else:
        categories.append(5)

df = pd.DataFrame({
    'filename': filenames,
    'category': categories
})

model=Sequential()

model.add(Conv2D(20, kernel_size=(5, 5), activation='relu',padding='same',input_shape=(224,224,3)))
model.add(MaxPooling2D((2,2), strides=(2,2), padding='same'))
### 1st layer
model.add(Conv2D(50, (5,5), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2), padding='same'))
model.add(Flatten())
model.add(Dense(500, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(6, activation='softmax'))



model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

model.summary()




df["category"] = df["category"].replace(
    {0: 'Basketball', 1: 'Football', 2: 'Rowing', 3: 'Swimming', 4: 'Tennis', 5: 'Yoga'})

train_df, validate_df = train_test_split(df, test_size=0.25, random_state=42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)

total_train = train_df.shape[0]
total_validate = validate_df.shape[0]
batch_size = 512

train_datagen = ImageDataGenerator(
    rescale=1. / 255
)

train_datagen2 = ImageDataGenerator(
    rotation_range=15,
    rescale=1. / 255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1,
)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    "/kaggle/input/sports-classification/Train",
    x_col='filename',
    y_col='category',
    target_size=IMAGE_SIZE,
    class_mode='categorical',
    batch_size=batch_size,
)



validation_datagen = ImageDataGenerator(rescale=1. / 255)

validation_generator = validation_datagen.flow_from_dataframe(
    validate_df,
    "/kaggle/input/sports-classification/Train",
    x_col='filename',
    y_col='category',
    target_size=IMAGE_SIZE,
    class_mode='categorical',
    batch_size=batch_size,
)

#if os.path.exists('model.h5'):
 #   model.load_weights('model.h5')
#else:
history = model.fit(
        train_generator,
        epochs=EPOCHS,
        batch_size=batch_size,
        validation_data=validation_generator,
        validation_steps=total_validate // batch_size,
        steps_per_epoch=total_train // batch_size,
        

    )
    #model.save_weights('model.h5')

test_filenames = os.listdir("/kaggle/input/sports-classification/Test")
test_df = pd.DataFrame({
    'filename': test_filenames
})
nb_samples = test_df.shape[0]

test_gen = ImageDataGenerator(rescale=1. / 255)
test_generator = test_gen.flow_from_dataframe(
    test_df,
    "/kaggle/input/sports-classification/Test",
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size=IMAGE_SIZE,
    batch_size=batch_size,
    shuffle=False,
)

2022-12-24 21:27:12.306965: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 224, 224, 20)      1520      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 20)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 112, 112, 50)      25050     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 56, 56, 50)        0         
_________________________________________________________________
flatten (Flatten)            (None, 156800)            0         
_________________________________________________________________
dense (Dense)                (None, 500)               78400500  
_________________________________________________________________
batch_normalization (BatchNo (None, 500)               2

2022-12-24 21:27:20.941585: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Found 688 validated image filenames.


In [2]:
predict = model.predict(test_generator, steps=np.ceil(nb_samples / batch_size))
test_df['category'] = np.argmax(predict, axis=-1)

label_map = dict((v, k) for k, v in train_generator.class_indices.items())
test_df['category'] = test_df['category'].replace(label_map)

# test_df['category'] = test_df['category'].replace({'dog': 1, 'cat': 0})
test_df['category'] = test_df['category'].replace(
    {'Basketball': 0, 'Football': 1, 'Rowing': 2, 'Swimming': 3, 'Tennis': 4, 'Yoga': 5})


In [3]:
submission_df = test_df.copy()
submission_df['image_name'] = submission_df['filename']
submission_df['label'] = submission_df['category']
submission_df.drop(['filename', 'category'], axis=1, inplace=True)

submission_df.to_csv('saved-data', index=False)


In [4]:

TEST_DIR = '/kaggle/input/sports-classification/Test'

predictions=[]
ind=1
for test_imgs in tqdm(os.listdir(TEST_DIR)):
    path=os.path.join(TEST_DIR,test_imgs)
    test_img=cv2.imread(path,1)
    test_img=cv2.resize(test_img,(224,224))
    test_img=cv2.cvtColor(test_img,cv2.COLOR_BGR2RGB)
    test_img=cv2.normalize(test_img,None,alpha=0,beta=1,norm_type=cv2.NORM_MINMAX,dtype=cv2.CV_32F)
    test_img=test_img.reshape(-1,224,224,3)
    pred=model.predict([test_img])[0]
    max=0
    index=np.array(pred).argmax()
    predictions.append([test_imgs,index])
    
header=["image_name","label"]
file=open("datafile",'w+',newline='')
with file:
    write=csv.writer(file)
    write.writerow(header)
    write.writerows(predictions)
print(predictions)

100%|██████████| 688/688 [01:06<00:00, 10.29it/s]

[['623.jpg', 3], ['208.jpg', 5], ['473.jpg', 1], ['333.jpg', 1], ['537.jpg', 1], ['45.jpg', 2], ['369.jpg', 0], ['56.jpg', 5], ['654.jpg', 3], ['89.jpg', 2], ['20.jpg', 3], ['275.jpg', 3], ['212.jpg', 3], ['239.jpg', 1], ['58.jpg', 5], ['150.jpg', 3], ['6.jpg', 3], ['109.jpg', 5], ['149.jpg', 2], ['187.jpg', 3], ['521.jpg', 3], ['436.jpg', 3], ['76.jpg', 5], ['539.jpg', 1], ['355.jpg', 1], ['516.jpg', 5], ['71.jpg', 3], ['474.jpg', 1], ['501.jpg', 5], ['342.jpg', 3], ['429.jpg', 5], ['646.jpg', 3], ['682.jpg', 2], ['544.jpg', 3], ['377.jpg', 5], ['272.jpg', 5], ['270.jpg', 5], ['182.jpg', 3], ['215.jpg', 2], ['489.jpg', 3], ['576.jpg', 3], ['185.jpg', 5], ['613.jpg', 5], ['243.jpg', 3], ['153.jpg', 0], ['189.jpg', 5], ['143.jpg', 5], ['476.jpg', 3], ['327.jpg', 0], ['253.jpg', 3], ['343.jpg', 3], ['115.jpg', 3], ['131.jpg', 5], ['446.jpg', 3], ['626.jpg', 0], ['425.jpg', 3], ['5.jpg', 5], ['366.jpg', 3], ['151.jpg', 5], ['426.jpg', 3], ['503.jpg', 0], ['8.jpg', 3], ['641.jpg', 3], ['62


