In [1]:
import os
from glob import glob # extract path of each file
import pandas as pd # data preprocessing
from functools import reduce

In [2]:
dfAnno_test = pd.read_csv('/kaggle/input/data-data/test/annotations.csv')
dfAnno_train = pd.read_csv('/kaggle/input/data-data/train/annotations.csv')
dfAnnoBall_test = dfAnno_test[dfAnno_test["class"] == "football"]
dfAnnoBall_train = dfAnno_train[dfAnno_train["class"] == "football"]

In [3]:
dfAnno_test['class'].value_counts()

player      173
football     13
referee       8
Name: class, dtype: int64

In [4]:
dfAnno_train['class'].value_counts()

player      1234
football     100
referee       68
Name: class, dtype: int64

In [5]:
imagesToTrain=dfAnnoBall_train["filename"]
imagesToTest=dfAnnoBall_test["filename"]

In [6]:
imagesTrain = dfAnnoBall_train['filename'].unique()
len(imagesTrain)

100

In [7]:
imagesTest = dfAnnoBall_test['filename'].unique()
len(imagesTest)

13

In [8]:
imagesTrain_df = pd.DataFrame(imagesTrain,columns=['filename'])
img_train = tuple(imagesTrain_df['filename'])

In [9]:
imagesTest_df = pd.DataFrame(imagesTest,columns=['filename'])
img_test = tuple(imagesTest_df['filename'])

In [10]:
len(img_train), len(img_test)

(100, 13)

In [11]:
train_df = dfAnnoBall_train.query(f'filename in {img_train}')
test_df = dfAnnoBall_test.query(f'filename in {img_test}')

In [12]:
# center x, center y
train_df['center_x'] = ((train_df['xmax']+train_df['xmin'])/2)/train_df['width']
train_df['center_y'] = ((train_df['ymax']+train_df['ymin'])/2)/train_df['height']
# w
train_df['w'] = (train_df['xmax']-train_df['xmin'])/train_df['width']
# h
train_df['h'] = (train_df['ymax']-train_df['ymin'])/train_df['height']

In [13]:
train_df.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax,center_x,center_y,w,h
10,yt1s-com---Resume-Liverpool-01-Burnley-Premier...,1280,720,football,408,401,438,431,0.330469,0.577778,0.023438,0.041667
25,1-fps-2_00722_jpeg_jpg.rf.099a2974c597239872e5...,398,224,football,243,75,250,80,0.619347,0.345982,0.017588,0.022321
36,1-fps-2_00045_jpeg_jpg.rf.024d4573e1a8ce924d9e...,398,224,football,95,138,104,147,0.25,0.636161,0.022613,0.040179
49,1-fps-2_00005_jpeg_jpg.rf.017542df0a9b465349d4...,398,224,football,243,90,250,96,0.619347,0.415179,0.017588,0.026786
57,1-fps-2_00038_jpeg_jpg.rf.073d52d4e3a48453caf7...,398,224,football,147,83,155,89,0.379397,0.383929,0.020101,0.026786


In [14]:
# center x, center y
test_df['center_x'] = ((test_df['xmax']+test_df['xmin'])/2)/test_df['width']
test_df['center_y'] = ((test_df['ymax']+test_df['ymin'])/2)/test_df['height']
# w
test_df['w'] = (test_df['xmax']-test_df['xmin'])/test_df['width']
# h
test_df['h'] = (test_df['ymax']-test_df['ymin'])/test_df['height']

In [15]:
test_df.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax,center_x,center_y,w,h
17,1-fps-2_00339_jpeg_jpg.rf.7a78aba0034a7c3daf39...,398,224,football,216,61,224,69,0.552764,0.290179,0.020101,0.035714
23,1-fps-2_00014_jpeg_jpg.rf.2d7d526c69632431e3d9...,398,224,football,293,89,299,94,0.743719,0.408482,0.015075,0.022321
39,1-fps-2_00269_jpeg_jpg.rf.7cafcbbf190500cadd61...,398,224,football,281,103,292,112,0.719849,0.479911,0.027638,0.040179
55,1-fps-2_00267_jpeg_jpg.rf.1ece704258aa28a27c14...,398,224,football,251,104,259,112,0.640704,0.482143,0.020101,0.035714
64,1-fps-2_00022_jpeg_jpg.rf.cb129f66c83086ad7333...,398,224,football,168,136,177,145,0.433417,0.627232,0.022613,0.040179


In [16]:
# label encoding
def label_encoding(x):
    labels = {'football':0}
    return labels[x]

In [17]:
train_df['id'] = train_df['class'].apply(label_encoding)
test_df['id'] = test_df['class'].apply(label_encoding)

In [18]:
train_df.head(10)

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax,center_x,center_y,w,h,id
10,yt1s-com---Resume-Liverpool-01-Burnley-Premier...,1280,720,football,408,401,438,431,0.330469,0.577778,0.023438,0.041667,0
25,1-fps-2_00722_jpeg_jpg.rf.099a2974c597239872e5...,398,224,football,243,75,250,80,0.619347,0.345982,0.017588,0.022321,0
36,1-fps-2_00045_jpeg_jpg.rf.024d4573e1a8ce924d9e...,398,224,football,95,138,104,147,0.25,0.636161,0.022613,0.040179,0
49,1-fps-2_00005_jpeg_jpg.rf.017542df0a9b465349d4...,398,224,football,243,90,250,96,0.619347,0.415179,0.017588,0.026786,0
57,1-fps-2_00038_jpeg_jpg.rf.073d52d4e3a48453caf7...,398,224,football,147,83,155,89,0.379397,0.383929,0.020101,0.026786,0
67,yt1s-com---Resume-Liverpool-01-Burnley-Premier...,1280,720,football,224,449,253,477,0.186328,0.643056,0.022656,0.038889,0
78,1-fps-2_00049_jpeg_jpg.rf.0dba8e746d7b9d95ff11...,398,224,football,169,144,177,152,0.434673,0.660714,0.020101,0.035714,0
92,yt1s-com---Resume-Tottenham-20-West-Bromwich-P...,1280,720,football,885,449,903,472,0.698438,0.639583,0.014063,0.031944,0
103,yt1s-com---Resume-Liverpool-14-Manchester-City...,1280,720,football,521,260,547,287,0.417187,0.379861,0.020313,0.0375,0
112,1-fps-2_00036_jpeg_jpg.rf.1ec0d97bec4830cd5629...,398,224,football,147,80,154,86,0.378141,0.370536,0.017588,0.026786,0


In [19]:
parent_train_images='/kaggle/input/data-data/train'
parent_test_images='/kaggle/input/data-data/test'

In [20]:
parent_folder = '/kaggle/working/soccer-exercise-ball'
train_folder = '/kaggle/working/soccer-exercise-ball/trainFootball'
test_folder = '/kaggle/working/soccer-exercise-ball/testFootball'

os.mkdir(parent_folder)
os.mkdir(train_folder)
os.mkdir(test_folder)

In [21]:
cols = ['filename','id','center_x','center_y', 'w', 'h']
groupby_obj_train = train_df[cols].groupby('filename')
groupby_obj_test = test_df[cols].groupby('filename')

In [22]:
import shutil

In [23]:
#groupby_obj_train.get_group('000009.jpg').set_index('filename').to_csv('sample.txt',index=False,header=False)
# save each image in train/test folder and repective labels in .txt
def save_data(filename, src_folder_path, folder_path, group_obj):
    # move image
    src = os.path.join(src_folder_path,filename)
    dst = os.path.join(folder_path,filename)
    shutil.copy(src,dst) # move image to the destination folder

    # save the labels
    text_filename = os.path.join(folder_path,
                                 os.path.splitext(filename)[0]+'.txt')
    group_obj.get_group(filename).set_index('filename').to_csv(text_filename,sep=' ',index=False,header=False)

In [24]:
filename_series = pd.Series(groupby_obj_train.groups.keys())
filename_series_test = pd.Series(groupby_obj_test.groups.keys())

In [25]:
filename_series.apply(save_data,args=(parent_train_images,train_folder,groupby_obj_train))

0     None
1     None
2     None
3     None
4     None
      ... 
95    None
96    None
97    None
98    None
99    None
Length: 100, dtype: object

In [26]:
filename_series_test.apply(save_data,args=(parent_test_images,test_folder,groupby_obj_test))

0     None
1     None
2     None
3     None
4     None
5     None
6     None
7     None
8     None
9     None
10    None
11    None
12    None
dtype: object

In [27]:
!git clone https://github.com/ultralytics/yolov5

Cloning into 'yolov5'...
remote: Enumerating objects: 15921, done.[K
remote: Counting objects: 100% (41/41), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 15921 (delta 17), reused 28 (delta 13), pack-reused 15880[K
Receiving objects: 100% (15921/15921), 14.66 MiB | 23.79 MiB/s, done.
Resolving deltas: 100% (10916/10916), done.


In [28]:
os.chdir('/kaggle/working/yolov5')

In [29]:
!pip install -r requirements.txt

Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting ultralytics>=8.0.147 (from -r requirements.txt (line 18))
  Downloading ultralytics-8.0.160-py3-none-any.whl (609 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m609.3/609.3 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
Collecting setuptools>=65.5.1 (from -r requirements.txt (line 42))
  Downloading setuptools-68.1.2-py3-none-any.whl (805 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m805.1/805.1 kB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: setuptools, thop, ultralytics
  Attempting uninstall: setuptools
    Found existing installation: setuptools 59.8.0
    Uninstalling setuptools-59.8.0:
      Successfully uninstalled setuptools-59.8.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the 

In [30]:
!python train.py --data /kaggle/input/ball-yaml/dataset.yaml --cfg yolov5s.yaml --batch-size 32 --name Model --epochs 100

[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice: (30 second timeout) 
[34m[1mwandb[0m: W&B disabled due to login timeout.
[34m[1mtrain: [0mweights=yolov5s.pt, cfg=yolov5s.yaml, data=/kaggle/input/ball-yaml/dataset.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=100, batch_size=32, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=Model, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0

In [31]:
ls

CITATION.cff     [0m[01;34m__pycache__[0m/   export.py         [01;34msegment[0m/        val.py
CONTRIBUTING.md  benchmarks.py  hubconf.py        setup.cfg       yolov5s.pt
LICENSE          [01;34mclassify[0m/      [01;34mmodels[0m/           train.py
README.md        [01;34mdata[0m/          requirements.txt  tutorial.ipynb
README.zh-CN.md  detect.py      [01;34mruns[0m/             [01;34mutils[0m/
