---
---
# Internet of Things First Look
This notebook is a shallow look into the data's contents and attributes. More detailed analyses can be found in subsequent exploratory notebooks.

## Imports & Installs

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from matplotlib import rcParams
from sklearn.model_selection import train_test_split

In [None]:
# Set global parameters & variables
rcParams['figure.facecolor'] = 'whitesmoke'
rcParams['figure.figsize'] = (13, 6)
rcParams['figure.titlesize'] = 'x-large'
rcParams['scatter.edgecolors'] = 'k'
rcParams['figure.edgecolor'] = 'k'
rcParams['figure.constrained_layout.use'] = True
np.random.seed(42)

## Auxiliary Functions



In [None]:
# Create a helper function for viewing images
def iot_viewer(df, num_pics = 1):
    tmp = df.sample(num_pics)
    ln = len(tmp)
    # Reshape the flattened array
    sample_pics = np.reshape(np.asarray(tmp.drop('target', axis = 1)), (ln, 32, 32, 1))
    for pic in sample_pics:
        plt.figure(figsize = (8, 8), facecolor = 'whitesmoke')
        plt.imshow(pic, cmap = 'gray')
        plt.axis('off')
        plt.show()

---
# Load and View Data

In [None]:
IoT_firmware = pd.read_csv('../input/iot-firmware-image-classification/firmware/firmware.csv')
IoT_firmware.drop('Unnamed: 1027', axis = 1, inplace = True)

IoT_firmware

## Explore Data Basics

In [None]:
# Look for missing values
num_missing = IoT_firmware.isnull().sum().sum()

print(f'\nThere are {num_missing} missing values.\n')

In [None]:
for col in ['class', 'target', 'filename']:
    print('\n', col, '\n')
    print(f'{IoT_firmware.groupby(col, axis = 0).count().iloc[:, :5]}\n\n')

In [None]:
dim = int(np.sqrt(1024))

print(f'The pictures are {(dim, dim, 1)} grayscale thumbnail images.')

In [None]:
name_target = IoT_firmware.loc[:, ['filename', 'class', 'target']]

name_target.to_csv('name_target.csv', index = False)

name_target

---
# Split & Save Formatted Data

In [None]:
X = IoT_firmware.drop(['filename', 'class', 'target'], axis = 1)

Y = IoT_firmware['target']

X_tr, X_ts, y_tr, y_ts = train_test_split(X, Y, test_size = .1, 
                                         stratify = Y)

X_val, X_test, y_val, y_test = train_test_split(X_ts, y_ts, test_size = .5, 
                                               stratify = y_ts)

# Combine X's and y's for saving and export
train = pd.concat([X_tr, y_tr], axis = 1)
validation = pd.concat([X_val, y_val], axis = 1)
test = pd.concat([X_test, y_test], axis = 1)



In [None]:
# Save training, validation, and test sets for future use

train.to_csv('IOTF_train.csv')

validation.to_csv('IOTF_val.csv')

test.to_csv('IOTF_test.csv')

---
# View Sample Images

In [None]:
# Choose a random number of images to display
n = 5
iot_viewer(train, n)

---
# Conclusion
* No missing pixel values
 * shape (32, 32, 1)
 * 38887 grayscale thumbnail images
* Three target classes
 * benignware = 0
 * hackware = 2
 * malware = 1
* Each 'filename' is unique

## Next Steps
* Exploratory Data Analysis (EDA)
* Model Kingsmoot

---
---