In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, precision_recall_curve, auc
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [23]:
# Load CSV file
csv_path = 'E:/STA 221/surfline_photo_labels.csv'
df = pd.read_csv(csv_path)

# Set image directory
img_dir = 'E:/STA 221/all_surfline_photos/'

In [25]:
# Convert labels to lowercase
df['Label'] = df['Label'].str.lower()

# Drop rows with label "uncertain" in-place
df.drop(df[df['Label'] == 'uncertain'].index, inplace=True)

label_encoder = LabelEncoder()
df['Label'] = label_encoder.fit_transform(df['Label'])
df.loc[df['Label'] != 'uncertain']

Unnamed: 0,Url,Label,LabelConfidence,link,photo
0,AmlDatastore://workspaceblobstore/UI/2023-07-1...,1,1,2023_05-18.0730_agatebeachor.jpg,2023_05-18.0730_agatebeachor.jpg
1,AmlDatastore://workspaceblobstore/UI/2023-07-1...,1,1,2023_05-03.1930_agatebeachor.jpg,2023_05-03.1930_agatebeachor.jpg
2,AmlDatastore://workspaceblobstore/UI/2023-07-1...,1,1,2023_05-18.1630_agatebeachor.jpg,2023_05-18.1630_agatebeachor.jpg
3,AmlDatastore://workspaceblobstore/UI/2023-07-1...,1,1,2023_05-18.2030_agatebeachor.jpg,2023_05-18.2030_agatebeachor.jpg
5,AmlDatastore://workspaceblobstore/UI/2023-07-1...,1,1,2023_05-30.2100_agatebeachor.jpg,2023_05-30.2100_agatebeachor.jpg
...,...,...,...,...,...
8932,AmlDatastore://workspaceblobstore/UI/2023-10-2...,0,1,2022_09-28.2100_otterrockor.jpg,2022_09-28.2100_otterrockor.jpg
8933,AmlDatastore://workspaceblobstore/UI/2023-10-2...,1,1,2023_08-20.1400_otterrockor.jpg,2023_08-20.1400_otterrockor.jpg
8934,AmlDatastore://workspaceblobstore/UI/2023-10-2...,0,1,2022_08-18.1900_agatebeachor.jpg,2022_08-18.1900_agatebeachor.jpg
8935,AmlDatastore://workspaceblobstore/UI/2023-10-2...,1,1,2023_06-14.1130_cannonbeach.jpg,2023_06-14.1130_cannonbeach.jpg


In [27]:
df.drop(df[df['photo'] == '2023_07-31.1800_cannonbeach.jpg'].index, inplace=True)

In [26]:
original_labels = label_encoder.classes_
print(original_labels)

['foggy' 'not_foggy']


In [30]:
label_counts = df['Label'].value_counts()

print("Label Counts:")
print(label_counts)

Label Counts:
1    6853
0    1776
Name: Label, dtype: int64


In [33]:
display(df)

Unnamed: 0,Url,Label,LabelConfidence,link,photo
0,AmlDatastore://workspaceblobstore/UI/2023-07-1...,1,1,2023_05-18.0730_agatebeachor.jpg,2023_05-18.0730_agatebeachor.jpg
1,AmlDatastore://workspaceblobstore/UI/2023-07-1...,1,1,2023_05-03.1930_agatebeachor.jpg,2023_05-03.1930_agatebeachor.jpg
2,AmlDatastore://workspaceblobstore/UI/2023-07-1...,1,1,2023_05-18.1630_agatebeachor.jpg,2023_05-18.1630_agatebeachor.jpg
3,AmlDatastore://workspaceblobstore/UI/2023-07-1...,1,1,2023_05-18.2030_agatebeachor.jpg,2023_05-18.2030_agatebeachor.jpg
5,AmlDatastore://workspaceblobstore/UI/2023-07-1...,1,1,2023_05-30.2100_agatebeachor.jpg,2023_05-30.2100_agatebeachor.jpg
...,...,...,...,...,...
8932,AmlDatastore://workspaceblobstore/UI/2023-10-2...,0,1,2022_09-28.2100_otterrockor.jpg,2022_09-28.2100_otterrockor.jpg
8933,AmlDatastore://workspaceblobstore/UI/2023-10-2...,1,1,2023_08-20.1400_otterrockor.jpg,2023_08-20.1400_otterrockor.jpg
8934,AmlDatastore://workspaceblobstore/UI/2023-10-2...,0,1,2022_08-18.1900_agatebeachor.jpg,2022_08-18.1900_agatebeachor.jpg
8935,AmlDatastore://workspaceblobstore/UI/2023-10-2...,1,1,2023_06-14.1130_cannonbeach.jpg,2023_06-14.1130_cannonbeach.jpg


In [34]:
# Count of each class
label_counts = df['Label'].value_counts()

# Number of data points to drop from the "non-foggy" class
num_to_drop = 5077

# Check if the number to drop is greater than the count of "non-foggy" class
if num_to_drop > label_counts[1]:
    print("Warning: The specified number of data points to drop is greater than the count of 'foggy' class.")
else:
    # Randomly drop data points from the "non-foggy" class
    df_to_drop = df[df['Label'] == 1].sample(n=num_to_drop, random_state=42)
    df_filtered = df.drop(df_to_drop.index)

    # Display the resulting DataFrame
    print(df_filtered)
    print(label_counts)


                                                    Url  Label  \
2     AmlDatastore://workspaceblobstore/UI/2023-07-1...      1   
3     AmlDatastore://workspaceblobstore/UI/2023-07-1...      1   
5     AmlDatastore://workspaceblobstore/UI/2023-07-1...      1   
6     AmlDatastore://workspaceblobstore/UI/2023-07-1...      1   
11    AmlDatastore://workspaceblobstore/UI/2023-07-1...      1   
...                                                 ...    ...   
8924  AmlDatastore://workspaceblobstore/UI/2023-10-2...      0   
8927  AmlDatastore://workspaceblobstore/UI/2023-10-2...      0   
8930  AmlDatastore://workspaceblobstore/UI/2023-10-2...      0   
8932  AmlDatastore://workspaceblobstore/UI/2023-10-2...      0   
8934  AmlDatastore://workspaceblobstore/UI/2023-10-2...      0   

      LabelConfidence                               link  \
2                   1   2023_05-18.1630_agatebeachor.jpg   
3                   1   2023_05-18.2030_agatebeachor.jpg   
5                   1   202

In [36]:
label_counts = df_filtered['Label'].value_counts()

print("Label Counts:")
print(label_counts)

Label Counts:
1    1776
0    1776
Name: Label, dtype: int64


In [37]:
data = []
labels = []

for index, row in df_filtered.iterrows():
    img_path = os.path.join(img_dir, row['photo'])
    try:
        img = load_img(img_path, target_size=(100, 100))  # Specify the target size of your images
    except Exception as e:
        print(f"Error loading image {img_path}: {e}")
    img_array = img_to_array(img)
    data.append(img_array)
    labels.append(row['Label'])

In [38]:
# Convert lists to numpy arrays
data = np.array(data)
labels = np.array(labels)

# Ensure labels are in the format 0 or 1 for binary classification
labels = labels.astype(int)

In [39]:
# Fourier transform preprocessing
data = np.fft.fft2(data)
data = np.fft.fftshift(data)
data = np.abs(data)
data = np.log1p(data)

In [40]:
# Normalize pixel values to be between 0 and 1
data = data / 255.0

In [41]:
# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)


In [44]:
# Build CNN Model
model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # For binary classification

# Compile the model with specified learning rate
opt = Adam(learning_rate=0.01)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Display model summary
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 98, 98, 32)        896       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 49, 49, 32)       0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 47, 47, 64)        18496     
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 23, 23, 64)       0         
 2D)                                                             
                                                                 
 conv2d_8 (Conv2D)           (None, 21, 21, 128)       73856     
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 10, 10, 128)     

In [45]:
# Early Stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the Model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

# Evaluate the Model
accuracy = model.evaluate(X_test, y_test)[1]
print(f'Test Accuracy: {accuracy * 100:.2f}%')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10

KeyboardInterrupt: 

In [None]:
# Display additional metrics
y_pred = model.predict(X_test)
y_pred_binary = (y_pred > 0.5).astype(int)

print("Classification Report:")
print(classification_report(y_test, y_pred_binary))

In [None]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred_binary)
print("Confusion Matrix:")
print(conf_matrix)

In [None]:
# ROC Curve
fpr, tpr, _ = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)

In [None]:
# Precision-Recall Curve
precision, recall, _ = precision_recall_curve(y_test, y_pred)
pr_auc = auc(recall, precision)

In [None]:
# Plot ROC Curve
plt.figure(figsize=(8, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'AUC = {roc_auc:.2f}')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

In [None]:
# Plot Precision-Recall Curve
plt.figure(figsize=(8, 8))
plt.plot(recall, precision, color='blue', lw=2, label=f'AUC = {pr_auc:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend(loc='lower right')
plt.show()