In [1]:
import numpy as np 
import pandas as pd 
import pickle

from FDSSC import models, utils
from datasets.datasets import LocationChecker, FDSSCDataset, RandomRotation
from matplotlib import pyplot as plt
import torch
from torch.utils.data import DataLoader

In [33]:
datadir = "data/stacked_images"
labeldir = "miningSites/labeled_masked_labels.csv"
df = pd.read_csv(labeldir)
freqdict = utils.freqdict(df, "MAJOR_COMMODITY_CODE")
max_num = max(freqdict.values())
mult_dict = {key: max_num // value for key, value in freqdict.items()}

multlist = [mult_dict[each] for each in df["MAJOR_COMMODITY_CODE"]]

outputlist = []
for (_, row), factor in zip(df.iterrows(), multlist):
    for i in range(factor):
        outputlist.append(row)

outputdf = pd.DataFrame(outputlist, columns=df.columns)
print(outputdf.shape)
outputdf.head(5)

(15984, 12)


Unnamed: 0.1,Unnamed: 0,MINERAL_DEPOSIT_NO,DEPOSIT_COMMODITY_CODES,MAJOR_COMMODITY_CODE,MINDEP_CLASS_CODE,STATUS_TYPE_VALUE,SIZE_TYPE_VALUE,ORE_LITHOLOGY_CODE,LONGITUDE,LATITUDE,label,paths
0,0,40,CYP,NONE,OCCURRENCE,Not worked,Low Significance,LATR,132.608253,-26.980629,0,data/stacked_images/LC08_L1TP_102079_20191204_...
1,1,3490,Cu,Cu,OCCURRENCE,Abandoned,Low Significance,VEIN,133.314408,-26.980648,5,data/stacked_images/LC08_L1TP_102079_20191204_...
1,1,3490,Cu,Cu,OCCURRENCE,Abandoned,Low Significance,VEIN,133.314408,-26.980648,5,data/stacked_images/LC08_L1TP_102079_20191204_...
1,1,3490,Cu,Cu,OCCURRENCE,Abandoned,Low Significance,VEIN,133.314408,-26.980648,5,data/stacked_images/LC08_L1TP_102079_20191204_...
2,2,9170,Cr,NONE,OCCURRENCE,Not worked,Low Significance,IMAF,132.537458,-26.984733,0,data/stacked_images/LC08_L1TP_102079_20191204_...


In [34]:
outputdf = outputdf.sample(frac=1)
outputdf.head()

Unnamed: 0.1,Unnamed: 0,MINERAL_DEPOSIT_NO,DEPOSIT_COMMODITY_CODES,MAJOR_COMMODITY_CODE,MINDEP_CLASS_CODE,STATUS_TYPE_VALUE,SIZE_TYPE_VALUE,ORE_LITHOLOGY_CODE,LONGITUDE,LATITUDE,label,paths
1379,1379,10527,U,U,OCCURRENCE,Not worked,Low Significance,GRVL,132.789285,-31.452419,7,data/stacked_images/LC08_L1TP_102082_20191204_...
2901,2901,8591,SDST,NONE,OCCURRENCE,Abandoned,Low Significance,SDST,138.462213,-33.612037,0,data/stacked_images/LC08_L1TP_098083_20200125_...
287,287,3759,Fe,Fe,OCCURRENCE,Not worked,Low Significance,QMTU,135.33005,-29.469041,6,data/stacked_images/LC08_L1TP_100081_20200107_...
53,53,170,OPAL,NONE,OCCURRENCE,Seasonal,Low Significance,OPAL,133.246793,-27.298349,0,data/stacked_images/LC08_L1TP_102079_20191204_...
2298,2298,7151,JADE,NONE,OCCURRENCE,Not worked,Low Significance,MTHH,136.932193,-33.483123,0,data/stacked_images/LC08_L1TP_098083_20200125_...


In [35]:

outputdf.to_csv("miningSites/balanced_labels.csv", index=False)

In [2]:
datadir = "data/stacked_images"
labeldir = "siteData/allData/"
checker = LocationChecker(datadir, size=(9, 9), rigorous=False)
trainLandsat8Data = FDSSCDataset(datadir, labeldir+"train.csv", lochecker=checker, transform=[RandomRotation()])
valLandsat8Data = FDSSCDataset(datadir, labeldir+"val.csv", lochecker=checker)
testLandsat8Data = FDSSCDataset(datadir, labeldir+"test.csv", lochecker=checker)
trainLoader = DataLoader(trainLandsat8Data, batch_size=8, shuffle=True, num_workers=4)
valLoader = DataLoader(valLandsat8Data, batch_size=16, num_workers=1)
testLoader = DataLoader(testLandsat8Data, batch_size=16, num_workers=1)
#id2idx = utils.get_id2idx(pd.read_csv(csv), "MAJOR_COMMODITY_CODE")
id2idx = pickle.load(open("data/id2idxMasked.p", "rb"))
idx2id = {value: key for key, value in id2idx.items()}

In [3]:
# setting training device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# initializing model and loss and optim
model = models.FerDSSC_model((1, 9, 9, 11), len(list(id2idx.keys())))
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.0003)

# applying weight initialization
model.apply(utils.init_weights)

#putting model on training device
model.to(device)


FerDSSC_model(
  (input_spec_conv): Conv3d(1, 24, kernel_size=(1, 1, 7), stride=(1, 1, 1), padding=(0, 0, 3))
  (spectral_conv1): Spectral_conv(
    (bn_prelu): Bn_prelu(
      (bn): BatchNorm3d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (prelu): PReLU(num_parameters=1)
    )
    (conv3d): Conv3d(24, 12, kernel_size=(1, 1, 7), stride=(1, 1, 1), padding=(0, 0, 3))
  )
  (spectral_conv2): Spectral_conv(
    (bn_prelu): Bn_prelu(
      (bn): BatchNorm3d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (prelu): PReLU(num_parameters=1)
    )
    (conv3d): Conv3d(36, 12, kernel_size=(1, 1, 7), stride=(1, 1, 1), padding=(0, 0, 3))
  )
  (spectral_conv3): Spectral_conv(
    (bn_prelu): Bn_prelu(
      (bn): BatchNorm3d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (prelu): PReLU(num_parameters=1)
    )
    (conv3d): Conv3d(48, 12, kernel_size=(1, 1, 7), stride=(1, 1, 1), padding=(0, 0, 3))
  )
  (bn_prelu1): Bn

In [None]:
print(trainLandsat8Data[0])

In [4]:
epochs = 5
frequency = 5
# setting up callbacks
checkpointFolder = "TrainingCheckpoints/3rdRun/"
cbs = [utils.validate_callback(model, valLoader, loss_fn, device),
        utils.saving_checkpoints_callback(checkpointFolder, model, optimizer, frequency, epochs)]

trainingLosses, CBouts = utils.train(model, trainLoader, epochs=epochs, loss_fn=loss_fn,
                            optimizer=optimizer, callbacks=cbs)


Training...
  Batch   311  of    312.    Loss: 2.85     Elapsed: 0:01:19.
  Average training loss: 1.81
  Training epoch took: 0:01:19
Running Validation...
  Accuracy: 58.17
  Validation took: 0:00:25

Training...
  Batch   311  of    312.    Loss: 2.37     Elapsed: 0:01:18.
  Average training loss: 1.79
  Training epoch took: 0:01:19
Running Validation...
  Accuracy: 58.79
  Validation took: 0:00:24

Training...
  Batch   311  of    312.    Loss: 1.37     Elapsed: 0:01:12.
  Average training loss: 1.79
  Training epoch took: 0:01:12
Running Validation...
  Accuracy: 58.79
  Validation took: 0:00:24

Training...
  Batch   311  of    312.    Loss: 1.37     Elapsed: 0:01:21.
  Average training loss: 1.79
  Training epoch took: 0:01:22
Running Validation...
  Accuracy: 58.79
  Validation took: 0:00:27

Training...
  Batch   311  of    312.    Loss: 1.37     Elapsed: 0:01:16.
  Average training loss: 1.79
  Training epoch took: 0:01:16
Running Validation...
  Accuracy: 58.79
  Validation

In [5]:
valLosses = [each.item() for each in list(zip(*CBouts))[0]]
print(valLosses)
print(trainingLosses)

NameError: name 'CBouts' is not defined

In [6]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
def plot_loss_vals(loss_vals, lv2, title="Training loss", start_idx=1):
    sns.set(style='darkgrid')
    sns.set(font_scale=1.5)
    plt.rcParams["figure.figsize"] = (12,6)
    plt.plot([i + start_idx for i in range(len(loss_vals[start_idx - 1:]))], loss_vals[start_idx - 1:], 'b-o', label="Training Loss (softmax)")
    plt.plot([i + start_idx for i in range(len(lv2[start_idx - 1:]))], lv2[start_idx - 1:], 'r-o', label="Val Loss (softmax)")
    plt.title(title)
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()
plot_loss_vals(trainingLosses, valLosses, title="Training and Val Loss, FerDSSC, with 100 latent space", start_idx=2)

In [8]:
# evaluate test accuracy

class_correct = list(0. for i in range(len(id2idx)))
class_total = list(0. for i in range(len(id2idx)))
with torch.no_grad():
    for data in testLoader:
        inputs, labels = data["image"].to(device), data["label"].to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(labels.size(0)):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


KeyError: 0