<a href="https://colab.research.google.com/github/nadaelgendy2/NeuralNetworkProj/blob/main/sportsclassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install tflearn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tflearn
  Downloading tflearn-0.5.0.tar.gz (107 kB)
[K     |████████████████████████████████| 107 kB 15.1 MB/s 
Building wheels for collected packages: tflearn
  Building wheel for tflearn (setup.py) ... [?25l[?25hdone
  Created wheel for tflearn: filename=tflearn-0.5.0-py3-none-any.whl size=127299 sha256=7c616522541a92091f8dbff237a50c4517654a460d208ba2cbc6b8a7ccf09570
  Stored in directory: /root/.cache/pip/wheels/65/9b/15/cb1e6b279c14ed897530d15cfd7da8e3df8a947e593f5cfe59
Successfully built tflearn
Installing collected packages: tflearn
Successfully installed tflearn-0.5.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

import numpy as np
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import load_img
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random
import os
from tensorflow.keras.models import Model, load_model
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
import os

In [None]:
SportsTrainSet = os.listdir("/content/drive/MyDrive/Dataset/NN Dataset/Train")

Fast_Run = False
imW=128
imH=128
channels=3
imsize=(imW, imH)

SportClasses = []
for filename in SportsTrainSet:
    category = filename.split('_')[0]
    if category == 'Basketball':   SportClasses.append(0)
    elif category == 'Football':   SportClasses.append(1)
    elif category == 'Rowing':     SportClasses.append(2)
    elif category == 'Swimming':   SportClasses.append(3)
    elif category == 'Tennis':     SportClasses.append(4)
    elif category == 'Yoga':       SportClasses.append(5)

df = pd.DataFrame({
    'filename': SportsTrainSet,
    'category': SportClasses
})

#api deep learning model building
model = Sequential()
#relu in hidden layers to prevent vanishing gradient
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(imW, imH, channels)))
model.add(BatchNormalization()) #applies a transformation that maintains the mean output close to 0 and the output standard deviation close to 1
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25)) #reduce the overfitting with a frequency of rate

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
#softmax used to predict multinomial dist by transforming raw outputs to out vector
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(6, activation='softmax')) # 6 layers because we have 6 classes

model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

model.summary()
#to prevent overfitting stop after 10 epochs and valulossed isnot decreased
earlystop = EarlyStopping(patience=10)
#reducing learning rate if the accuracy didnt increase for 2 steps
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=2,verbose=1,factor=0.5, min_lr=0.00001)
callbacks = [earlystop, learning_rate_reduction]

#as we use image genrator we will convert col category into string
df["category"] = df["category"].replace({0: 'Basketball', 1: 'Football',2: 'Rowing',3: 'Swimming',4: 'Tennis',5: 'Yoga'})

train_df, validate_df = train_test_split(df, test_size=0.20, random_state=42)
train_df = train_df.reset_index(drop=True)


total_train = train_df.shape[0]
batch_size=20
#it converts to one-hot encoding
train_datagen = ImageDataGenerator(rotation_range=15, rescale=1./255, shear_range=0.1, zoom_range=0.2, horizontal_flip=True, width_shift_range=0.1, height_shift_range=0.1)
train_generator = train_datagen.flow_from_dataframe( train_df,  "/content/drive/MyDrive/Dataset/NN Dataset/Train", x_col='filename', y_col='category', target_size=imsize, class_mode='categorical', batch_size=batch_size)

epochs=3 if Fast_Run else 20
history = model.fit_generator( train_generator, epochs=epochs,  steps_per_epoch=total_train//batch_size, callbacks=callbacks)

model.save_weights("SportsClassificationCNNModel.h5")

test_filenames = os.listdir("/content/drive/MyDrive/Dataset/NN Dataset/Test")
test_df = pd.DataFrame({
    'filename': test_filenames
})
nb_samples = test_df.shape[0]
#original data and then transform it on a random basis, returning the output resultant 
test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
    test_df,  "/content/drive/MyDrive/Dataset/NN Dataset/Test", x_col='filename',  y_col=None,class_mode=None,target_size=imsize, batch_size=batch_size, shuffle=False)

predict = model.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size))
#categorical classification pred is prob of each category(highest avg one)
test_df['category'] = np.argmax(predict, axis=-1)

#convert values predicted to classes
label_map = dict((i,j) for j,i in train_generator.class_indices.items())
test_df['category'] = test_df['category'].replace(label_map)
# map data with categories
test_df['category'] = test_df['category'].replace({ 'Basketball':0, 'Football':1 ,'Rowing':2,'Swimming':3, 'Tennis':4,'Yoga':5 })


submission_df = test_df.copy()
submission_df['image_name'] = submission_df['filename']
submission_df['label'] = submission_df['category']
submission_df.drop(['filename', 'category'], axis=1, inplace=True)
submission_df.to_csv('submissionfinal2.csv', index=False)


model.save("SportsClassificationCNNModel.h5")

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 126, 126, 32)      896       
                                                                 
 batch_normalization_8 (Batc  (None, 126, 126, 32)     128       
 hNormalization)                                                 
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 63, 63, 32)       0         
 2D)                                                             
                                                                 
 dropout_8 (Dropout)         (None, 63, 63, 32)        0         
                                                                 
 conv2d_7 (Conv2D)           (None, 61, 61, 64)        18496     
                                                                 
 batch_normalization_9 (Batc  (None, 61, 61, 64)      

  history = model.fit_generator( train_generator, epochs=epochs,  steps_per_epoch=total_train//batch_size, callbacks=callbacks)






Epoch 2/20



Epoch 3/20



Epoch 4/20



Epoch 5/20



Epoch 6/20



Epoch 7/20



Epoch 8/20



Epoch 9/20



Epoch 10/20



Epoch 11/20



Epoch 12/20



Epoch 13/20



Epoch 14/20



Epoch 15/20



Epoch 16/20



Epoch 17/20



Epoch 18/20



Epoch 19/20



Epoch 20/20



Found 688 validated image filenames.


  predict = model.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size))


In [None]:

FAST_RUN = False
imW=128
imH=128
channels=3
IMAGE_SIZE=(imW, imH)
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model

_input = Input((224,224,3)) 

#building VGGNEt16 model 
conv1  = Conv2D(filters=64, kernel_size=(3,3), padding="same", activation="relu")(_input)
conv2  = Conv2D(filters=64, kernel_size=(3,3), padding="same", activation="relu")(conv1)
pool1  = MaxPooling2D((2, 2))(conv2)

conv3  = Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu")(pool1)
conv4  = Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu")(conv3)
pool2  = MaxPooling2D((2, 2))(conv4)

conv5  = Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu")(pool2)
conv6  = Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu")(conv5)
conv7  = Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu")(conv6)
pool3  = MaxPooling2D((2, 2))(conv7)

conv8  = Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")(pool3)
conv9  = Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")(conv8)
conv10 = Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")(conv9)
pool4  = MaxPooling2D((2, 2))(conv10)

conv11 = Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")(pool4)
conv12 = Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")(conv11)
conv13 = Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu")(conv12)
pool5  = MaxPooling2D((2, 2))(conv13)

flat   = Flatten()(pool5)
dense1 = Dense(4096, activation="relu")(flat)
dense2 = Dense(4096, activation="relu")(dense1)
output = Dense(6, activation="softmax")(dense2)

model= Model(inputs=_input, outputs=output)
print(model.summary())
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

#creating folder contain 6 folders of the 6 sports 
import shutil
for dirname, _, filenames in os.walk('/content/drive/MyDrive/neural dataset/Train'):
    for filename in filenames:
        file_name = filename[:-4].split('_')
        src = os.path.join(dirname, filename)
        dirr = os.path.join("train/"+file_name[0], filename)
       
        if not os.path.exists("train/"+file_name[0]):
           os.makedirs("train/"+file_name[0])
        shutil.copy(src, dirr)

#training & validating by the train dataset 
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

earlystop = EarlyStopping(patience=10 )

learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)
batch_size=15

train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=0.2)

IMAGE_SIZE = 224
TRAIN_DIR = '/content/train'

train_generator = train_datagen.flow_from_directory(
        TRAIN_DIR,
        target_size=(IMAGE_SIZE, IMAGE_SIZE),
        batch_size=32,
        class_mode='categorical',
        subset = 'training')

validation_generator = train_datagen.flow_from_directory(
        TRAIN_DIR,
        target_size=(IMAGE_SIZE, IMAGE_SIZE),
        batch_size=32,
        class_mode='categorical',
        subset = 'validation')
epochs=1 if FAST_RUN else 20

history = model.fit(
    train_generator,
    validation_data=validation_generator, 
    epochs=epochs,
    callbacks = [earlystop, learning_rate_reduction]
    
)

model.save("VGGNET16.h5")

#testing by making directory for the test data& itetrating on it  
from tensorflow.keras.utils import img_to_array
import keras.utils as image
test_dir = "/content/drive/MyDrive/neural dataset/Test"
photos = []
for i in os.listdir(test_dir):

    img_path = test_dir + '/' + i
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = img_to_array(img)
    photos.append(img_array)

photos = np.array(photos)

#loading VGGNET16 model 
from tensorflow.keras.models import Model, load_model
model2 = load_model("/content/VGGNET16.h5")


pred=model.predict(photos)

# making csv file 
labels = ['Basketball','Football','Rowing', 'Swimming','Tennis', 'Yoga']
p_labels = []

for i in range(len(photos)):
  pos = np.argmax(pred[i], axis=0)
  print(pred[i])
  print(pos)
  image_label = labels[pos]

  if image_label == "Basketball":
    p_labels.append(0)
  elif image_label == "Football":
    p_labels.append(1)
  elif image_label == "Rowing":
    p_labels.append(2)
  elif image_label == "Swimming":
    p_labels.append(3)
  elif image_label == "Tennis":
    p_labels.append(4)
  elif image_label == "Yoga":
    p_labels.append(5)

print(p_labels)


submittion=pd.DataFrame({'image_name':  os.listdir(test_dir) , 'label': p_labels})

submittion.to_csv('p_vggnet16.csv', index=False)