In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
from fastai.vision.all import *
from PIL import Image
import PIL
import numpy as np

## Loading and Converting the data back to images

In [None]:
# Data path
data_path = '/kaggle/input/digit-recognizer'

# Loading the CSV file containing MNIST data
mnist_df_ = pd.read_csv(os.path.join(data_path, 'train.csv'))
tst_df = pd.read_csv(os.path.join(data_path,'test.csv'))
label = mnist_df_['label']
mnist_df = mnist_df_.drop('label',axis=1)

# Creating a function to convert the pixel values to a PIL image
def create_image(data_row):
    pixels = data_row.values.reshape(28, 28).astype(np.uint8)
    return Image.fromarray(pixels)

# Defining the base directory for saving images
base_dir = 'mnist_tiny'

# Creating folders to store the images
for folder in set(label):
    os.makedirs(os.path.join(base_dir, 'train', str(folder)), exist_ok=True)

os.makedirs(os.path.join(base_dir, 'test'), exist_ok=True)

# List to store the Path objects
image_paths = []

# Converting the CSV data to images and saving them with the label as filename
for i in range(len(mnist_df)):
    img = create_image(mnist_df.iloc[i])
    img_label = label.iloc[i]  # Get the label for the current image
    img_path = os.path.join(base_dir, 'train', str(img_label), f'{i}.png')
    img.save(img_path)
    #image_paths.append(Path(img_path))  # Append the Path object to the list

for i in range(len(tst_df)):
    img = create_image(tst_df.iloc[i])
    img_path = os.path.join(base_dir, 'test', f'{i}.png')
    img.save(img_path)
    #image_paths.append(Path(img_path))  # Append the Path object to the list


In [None]:
#getting the image files
trn_path = '/kaggle/working/mnist_tiny/train'
files = get_image_files(trn_path)

In [None]:
img = PILImage.create(files[0])
print(img.size)
img.to_thumb(128)

In [None]:
from fastcore.parallel import *
#Viewing the shape of each image
def f(o): return PILImage.create(o).size
sizes = parallel(f, files, n_workers=8)
pd.Series(sizes).value_counts()

In [None]:
#Creating a data loader
dls = ImageDataLoaders.from_folder(trn_path, valid_pct=0.2, seed=42,
    item_tfms=Resize(480, method='squish'),
    batch_tfms=aug_transforms(size=128, min_scale=0.75))

dls.show_batch(max_n=6)

## Creating the first model

In [None]:
#Creating a resnet26d learner
learn = vision_learner(dls, 'resnet26d', metrics=error_rate, path='.').to_fp16()

In [None]:
#Finding optimum learning rate
learn.lr_find(suggest_funcs=(valley, slide))

In [None]:
#Finetuning for 3 epoch
learn.fine_tune(3, 0.001)

## Creating first submission

In [None]:
#loading the sample submission
ss = pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')
ss

In [None]:
#Getting test files
tst_files = get_image_files('/kaggle/working/mnist_tiny/test').sorted()
tst_dl = dls.test_dl(tst_files)

In [None]:
#making prediction
probs,_,idxs = learn.get_preds(dl=tst_dl, with_decoded=True)
idxs

In [None]:
ss['Label'] = idxs
ss.to_csv('subm.csv', index=False)
!head subm.csv

In [None]:
#Visualizing the top losses
interp = Interpretation.from_learner(learn)
interp.plot_top_losses(9, figsize=(15,10))