In [1]:
%matplotlib inline
import os
from pathlib import Path
from fastai.datasets import Config
import pandas as pd
import numpy as np

import torch
import gc

base_path = Config.data_path()
data_path = base_path/'Steel_Defects_Detection'
competition_name = 'severstal-steel-defect-detection'
data_path.mkdir(parents=True, exist_ok=True)

## Files setup

In [7]:
!kaggle competitions files -c {competition_name}

name                    size  creationDate         
---------------------  -----  -------------------  
train.csv               18MB  2019-07-18 01:25:58  
sample_submission.csv  141KB  2019-07-18 01:26:00  
train_images.zip         1GB  2019-07-18 01:26:19  
test_images.zip        129MB  2019-07-18 01:26:20  


In [8]:
!kaggle competitions download --force -c {competition_name} -p {data_path}

Downloading train.csv.zip to /home/jupyter/.fastai/data/Steel_Defects_Detection
 72%|███████████████████████████▍          | 5.00M/6.91M [00:00<00:00, 31.0MB/s]
100%|██████████████████████████████████████| 6.91M/6.91M [00:00<00:00, 41.5MB/s]
Downloading sample_submission.csv to /home/jupyter/.fastai/data/Steel_Defects_Detection
  0%|                                                | 0.00/141k [00:00<?, ?B/s]
100%|█████████████████████████████████████████| 141k/141k [00:00<00:00, 102MB/s]
Downloading train_images.zip to /home/jupyter/.fastai/data/Steel_Defects_Detection
 99%|██████████████████████████████████████▌| 1.15G/1.16G [00:09<00:00, 117MB/s]
100%|███████████████████████████████████████| 1.16G/1.16G [00:09<00:00, 125MB/s]
Downloading test_images.zip to /home/jupyter/.fastai/data/Steel_Defects_Detection
 95%|█████████████████████████████████████▊  | 122M/129M [00:02<00:00, 50.6MB/s]
100%|████████████████████████████████████████| 129M/129M [00:02<00:00, 66.0MB/s]


In [9]:
data_path.ls()

[PosixPath('/home/jupyter/.fastai/data/Steel_Defects_Detection/train.csv.zip'),
 PosixPath('/home/jupyter/.fastai/data/Steel_Defects_Detection/train_images.zip'),
 PosixPath('/home/jupyter/.fastai/data/Steel_Defects_Detection/sample_submission.csv'),
 PosixPath('/home/jupyter/.fastai/data/Steel_Defects_Detection/test_images.zip')]

In [12]:
from zipfile import ZipFile

file_name = "train.csv.zip"
print(f"Unziping {file_name} ...")
with ZipFile(data_path/file_name, 'r') as zip_ref:
    zip_ref.extractall(data_path)

os.remove(data_path/file_name)
print("Done")

file_name = "train_images.zip"
print(f"Unziping {file_name} ...")
train_files_destination = data_path/'train_images'
train_files_destination.mkdir()
with ZipFile(data_path/file_name, 'r') as zip_ref:
    zip_ref.extractall(train_files_destination)

os.remove(data_path/file_name)
print("Done")

file_name = "test_images.zip"
print(f"Unziping {file_name} ...")
test_files_destination = data_path/'test_images'
test_files_destination.mkdir()
with ZipFile(data_path/file_name, 'r') as zip_ref:
    zip_ref.extractall(test_files_destination)

os.remove(data_path/file_name)
print("Done")

Unziping train.csv.zip ...
Done
Unziping train_images.zip ...
Done
Unziping test_images.zip ...
Done


In [13]:
data_path.ls()

[PosixPath('/home/jupyter/.fastai/data/Steel_Defects_Detection/train.csv'),
 PosixPath('/home/jupyter/.fastai/data/Steel_Defects_Detection/sample_submission.csv'),
 PosixPath('/home/jupyter/.fastai/data/Steel_Defects_Detection/test_images'),
 PosixPath('/home/jupyter/.fastai/data/Steel_Defects_Detection/train_images')]

## EDA

In [2]:
train_df = pd.read_csv(data_path/'train.csv')

In [3]:
train_df.head()

Unnamed: 0,ImageId_ClassId,EncodedPixels
0,0002cc93b.jpg_1,29102 12 29346 24 29602 24 29858 24 30114 24 3...
1,0002cc93b.jpg_2,
2,0002cc93b.jpg_3,
3,0002cc93b.jpg_4,
4,00031f466.jpg_1,


In [4]:
# Prepare train df

train_df['Image_Id'] = train_df['ImageId_ClassId'].apply(lambda x: x[:-2])
train_df['Class_Id'] = train_df['ImageId_ClassId'].apply(lambda x: x[-1:])
train_df.drop('ImageId_ClassId', axis=1, inplace=True)
train_df.set_index('Image_Id', inplace=True)

In [5]:
train_df.head()

Unnamed: 0_level_0,EncodedPixels,Class_Id
Image_Id,Unnamed: 1_level_1,Unnamed: 2_level_1
0002cc93b.jpg,29102 12 29346 24 29602 24 29858 24 30114 24 3...,1
0002cc93b.jpg,,2
0002cc93b.jpg,,3
0002cc93b.jpg,,4
00031f466.jpg,,1


Unnamed: 0_level_0,EncodedPixels,Class_Id
Image_Id,Unnamed: 1_level_1,Unnamed: 2_level_1
0002cc93b.jpg,29102 12 29346 24 29602 24 29858 24 30114 24 3...,1
0002cc93b.jpg,,2
0002cc93b.jpg,,3
0002cc93b.jpg,,4


Unnamed: 0_level_0,EncodedPixels,Class_Id
Image_Id,Unnamed: 1_level_1,Unnamed: 2_level_1
00031f466.jpg,,1
00031f466.jpg,,2
00031f466.jpg,,3
00031f466.jpg,,4
