Skip to content
Switch branches/tags
Go to file
Cannot retrieve contributors at this time
executable file 20 lines (19 sloc) 966 Bytes
from scipy.misc import imread
import numpy as np
import pandas as pd
import os
root = './train' # or ‘./test’ depending on for which the CSV is being created
# go through each directory in the root folder given above
for directory, subdirectories, files in os.walk(root):
# go through each file in that directory
for file in files:
# read the image file and extract its pixels
im = imread(os.path.join(directory,file))
value = im.flatten()
# I renamed the folders containing digits to the contained digit itself. For example, digit_0 folder was renamed to 0.
# so taking the 9th value of the folder gave the digit (i.e. "./train/8" ==> 9th value is 8), which was inserted into the first column of the dataset.
value = np.hstack((directory[8:],value))
df = pd.DataFrame(value).T
df = df.sample(frac=1) # shuffle the dataset
with open('train.csv', 'a') as dataset:
df.to_csv(dataset, header=False, index=False)