# Power Plant Identification

In [None]:
# Ensures that any methods/classes are reloaded if they are changed
%reload_ext autoreload
%autoreload 2

# Makes plots inline and higher resolution if using higher res monitors (uncomment second line otherwise)
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
# Load dependencies
from fastai.vision import ImageDataBunch, cnn_learner, ClassificationInterpretation, get_transforms, imagenet_stats, ShowGraph, models, open_image
from fastai.metrics import error_rate, accuracy
import matplotlib.pyplot as plt

In [None]:
# Create a directory to store the data (only if it doesn't already exist)
!mkdir -p data/

## Power Plant Identification

In [None]:
# Download the low resolution Landsat power plant imagery (only if it already hasn't been downloaded)
!wget -nc -O data/landsat_power_plants.zip https://github.com/kylebradbury/bc-tutorials/raw/master/data/landsat_power_plants_train.zip

In [None]:
# Unzip the low resolution Landsat power plant imagery
!unzip -n data/landsat_power_plants.zip -d data/pp_landsat/

In [None]:
path = 'data/pp_landsat/'

In [None]:
batch_size = 5
image_size = 76 # pixels (for both height and width)

In [None]:
data = ImageDataBunch.from_folder(path, ds_tfms=get_transforms(), size=image_size, bs=batch_size).normalize(imagenet_stats)

In [None]:
data.show_batch(rows=3, figsize=(7,6))

In [None]:
print(data.classes)

In [None]:
learn = cnn_learner(data, models.resnet34, metrics=accuracy, callback_fns=ShowGraph, pretrained=True)

In [None]:
#learn.unfreeze() # Uncomment this line to train ALL of the layers

In [None]:
learn.fit_one_cycle(10, max_lr=slice(1e-4,1e-3))

In [None]:
#learn.save('resnet34') # Use this line to save your trained model

In [None]:
#learn.load('resnet34') # Use this line to load from a trained model

In [None]:
interp = ClassificationInterpretation.from_learner(learn)

In [None]:
interp.plot_top_losses(16, figsize=(15,11))

In [None]:
interp.plot_confusion_matrix(figsize=(6,6), dpi=120)

In [None]:
interp.most_confused(min_val=2)

## Make some predictions

In [None]:
# Load the test data

# If you need to reload the data, remove the "-nc" command which prevents overwriting
!wget -nc -O data/landsat_power_plants_test.zip https://duke.box.com/shared/static/k0rew8ghke15iz8gy6cl2orb0e7ki3mg.zip

In [None]:
# Unzip the test data
!unzip -n data/landsat_power_plants_test.zip -d data/pp_landsat_test/

In [None]:
import os
path = 'data/pp_landsat_test/'

# Get a list of the test files 
test_list = [f for f in os.listdir(path) if os.path.isfile(path + '/' + f)]

In [None]:
# Ensure you have the right number of images in your test set
len(test_list) == 920

In [None]:
# produces a simple-to-use progress bar
from tqdm import tqdm

# Create a place to store the ids and predictions
ids = []
preds = []
for file in tqdm(test_list):
    idnum = file[:-4]
    img = open_image(path + '/' + file)
    pred_class,pred_idx,outputs = learn.predict(img)
    label = pred_class.obj
    ids.append(int(idnum))
    preds.append(label.upper())

In [None]:
# Convert the lists of plant types and plant ids into a CSV file using Pandas

import pandas as pd
d = {'fuel':preds, 'id':ids}
predictions = pd.DataFrame(d)
predictions.to_csv('test_data_predictions.csv', index=False)