# Set-Up

In [2]:
from google.colab import drive

# mount drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, TensorDataset
import torchvision.transforms as transforms

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, \
                            classification_report, roc_curve, auc, \
                            roc_auc_score

import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# read and display data
sms_filepath = '/content/drive/MyDrive/CS 131 final project/training_data/cleaned_combined.csv'
hn_filepath = '/content/drive/MyDrive/cleaned_combined.csv'

# specify your filepath!
df = pd.read_csv(hn_filepath)
df.head()

Unnamed: 0,B11,B2,B3,B4,B8,NDMI,NDVI,NDWI,SAVI,pineapple,latitude,longitude,year
0,0.3015,0.0438,0.07915,0.06085,0.3104,0.014545,0.672189,-0.593634,0.429641,0,9.728799,-85.211897,2018
1,0.0415,0.0062,0.0145,0.0087,0.1038,0.428768,0.845333,-0.754861,0.232898,0,10.017698,-83.27495,2018
2,0.0456,0.0126,0.019,0.0095,0.1473,0.527216,0.878827,-0.771497,0.314708,0,9.418162,-84.093854,2018
3,0.1271,0.02305,0.0453,0.0201,0.3301,0.444007,0.885208,-0.758657,0.54693,0,9.882322,-83.724646,2018
4,0.451,0.2554,0.3,0.3024,0.4976,0.049125,0.244,-0.247743,0.225231,0,10.006289,-83.30163,2018


## Collect Data

In [4]:
# get basic bands data
bands = [col for col in df.columns if col.startswith('B')]
X_bands = df[bands].to_numpy()


# get feature data
feats = ['NDMI', 'NDVI', 'NDWI', 'SAVI']
X_feats = df[feats].to_numpy()

# get coords data
coords = ['latitude', 'longitude']
X_coords = df[coords].to_numpy()

# combined data
X = np.hstack((X_bands, X_feats))
X_normalized = (X - X.mean(axis=0)) / X.std(axis=0)

# get test data
y = df['pineapple'].to_numpy()

## Util Functions

In [5]:
def analyze_results(y_test, y_pred, y_scores):
  # print classification report
  print(classification_report(y_test, y_pred))

  plt_confusion_matrix(y_test, y_pred)
  plt_bar_graph(y_test, y_pred)
  plt_ROC_curve(y_test, y_scores)


def plt_confusion_matrix(y_true, y_pred):
  # confusion matrix
  conf_matrix = confusion_matrix(y_true, y_pred)
  labels = set(['Non-pineapple', 'Pineapple'])
  plt.figure(figsize=(10, 7))
  sns.heatmap(conf_matrix, annot=True, fmt='g', cmap='Blues', xticklabels=sorted(labels), yticklabels=sorted(labels))
  plt.xlabel('Predicted labels')
  plt.ylabel('True labels')
  plt.title('Confusion Matrix')
  plt.show()


def plt_bar_graph(y_true, y_pred):
  # set up bar graph
  unique, counts_true = np.unique(y_true, return_counts=True)
  unique, counts_pred = np.unique(y_pred, return_counts=True)
  x = np.arange(len(unique))  # the label locations
  width = 0.35  # the width of the bars
  fig, ax = plt.subplots()
  rects1 = ax.bar(x - width/2, counts_true, width, label='True', color='navy')
  rects2 = ax.bar(x + width/2, counts_pred, width, label='Predicted', color='skyblue')

  # plot bar graph
  labels = set(['Non-pineapple', 'Pineapple'])
  ax.set_ylabel('Counts')
  ax.set_title('Counts by Class and Type')
  ax.set_xticks(x)
  ax.set_xticklabels(labels)
  ax.legend()
  fig.tight_layout()
  plt.show()

def plt_ROC_curve(y_test, y_scores):
  # plot ROC curve
  fpr, tpr, thresholds = roc_curve(y_test, y_scores)
  roc_auc = auc(fpr, tpr)

  plt.figure()
  plt.plot(fpr, tpr, color='skyblue',
          lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
  plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
  plt.xlim([0.0, 1.0])
  plt.ylim([0.0, 1.05])
  plt.xlabel('False Positive Rate')
  plt.ylabel('True Positive Rate')
  plt.title('ROC Curve')
  plt.legend(loc="lower right")
  plt.show()

# Fully-Connected Network (FCN)

data: bands + indices per-pixel data

### 1. Prep Dataset

In [6]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
rows, cols = X_train.shape

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

X_valid_tensor = torch.tensor(X_test, dtype=torch.float32)
y_valid_tensor = torch.tensor(y_test, dtype=torch.long)
valid_dataset = TensorDataset(X_valid_tensor, y_valid_tensor)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

### 2. Define CNN Architecture

In [13]:
class OptimizedFCN(nn.Module):
    def __init__(self):
        super(OptimizedFCN, self).__init__()
        self.fc1 = nn.Linear(cols, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(128, 2)  # output layer for 2 classes

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

#### Misc NNs

In [55]:
class BasicFCN(nn.Module):
    def __init__(self):
        super(BasicFCN, self).__init__()
        self.fc1 = nn.Linear(cols, 128)  # Input layer with 8 features
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)   # Output layer with 2 classes

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [13]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # adjust input channels
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        # adjust for your image size
        self.fc1 = nn.Linear(64 * 8 * 8, 512)
        self.fc2 = nn.Linear(512, 2)  # output 2 classes
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        # adjust flattening size
        x = x.view(-1, 64 * 8 * 8)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

### 3. Initialize Network, Loss Function, and Optimizer

In [11]:
# model = BasicFCN()
model = OptimizedFCN()
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=20, gamma=0.1)

### 4. Train and Validate the Network

In [12]:
num_epochs = 100

for epoch in range(num_epochs):
    # TRAINING PHASE
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # adjust learning rate
    scheduler.step()

    # VALIDATION PHASE
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # print per-epoch results
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}, Validation Loss: {val_loss/len(valid_loader):.4f}, Accuracy: {100 * correct / total:.2f}%')

Epoch 1/100, Loss: 0.4006, Validation Loss: 0.2976, Accuracy: 87.82%
Epoch 2/100, Loss: 0.3442, Validation Loss: 0.2877, Accuracy: 87.96%
Epoch 3/100, Loss: 0.3315, Validation Loss: 0.2843, Accuracy: 88.39%
Epoch 4/100, Loss: 0.3216, Validation Loss: 0.2800, Accuracy: 87.26%
Epoch 5/100, Loss: 0.3246, Validation Loss: 0.2789, Accuracy: 88.29%
Epoch 6/100, Loss: 0.3192, Validation Loss: 0.2808, Accuracy: 87.49%
Epoch 7/100, Loss: 0.3133, Validation Loss: 0.2799, Accuracy: 87.64%
Epoch 8/100, Loss: 0.3129, Validation Loss: 0.2788, Accuracy: 88.43%
Epoch 9/100, Loss: 0.3112, Validation Loss: 0.2807, Accuracy: 87.64%
Epoch 10/100, Loss: 0.3127, Validation Loss: 0.2744, Accuracy: 88.34%
Epoch 11/100, Loss: 0.3147, Validation Loss: 0.2783, Accuracy: 88.53%
Epoch 12/100, Loss: 0.3107, Validation Loss: 0.2718, Accuracy: 88.53%
Epoch 13/100, Loss: 0.3145, Validation Loss: 0.2746, Accuracy: 88.72%
Epoch 14/100, Loss: 0.3132, Validation Loss: 0.2741, Accuracy: 88.81%
Epoch 15/100, Loss: 0.3095, V

# Convolutional Neural Network

# U-Net