In [150]:
# import packages

import pandas as pd
import tensorflow as tf
import nasapy

In [None]:
# import nasa dataset for model training
# Credit to Data Science Student Society at UC San Diego

img_height = 256
img_width = 256

train_path = r"...\train-calibrated-shuffled.txt"
validation_path = r"...\val-calibrated-shuffled.txt"
test_path = r"...\test-calibrated-shuffled.txt"


def PathtoDataFrame(path):
    """Import Data from its File Location and Convert to a DataFrame.
        Args:
            path: File Path of Data
            """

    df = pd.DataFrame(pd.read_csv(path, sep = " "))
    df[2] = "'...initial path location'
    df.columns = ["a","Class","c"]
    df = df[['c','a','Class']]
    df["Paths"] = df[["c", "a"]].apply("".join, axis=1)
    for i,j in enumerate(df['Paths']):
        df['Paths'][i] = df['Paths'][i].replace('/','\\')
    df = df[['Class', 'Paths']]
    return df

train_df = PathtoDataFrame(train_path)
validation_df = PathtoDataFrame(validation_path)
test_df = PathtoDataFrame(test_path)

full_train_df = pd.concat([train_df, validation_df], axis = 0, ignore_index = True)

In [276]:
# Reclassify Images for Model Input

class_labels = {0: 'other', 1: 'other', 2: 'other', 3: 'chemin inlet open', 4: 'other',
                         5: 'drill holes', 6: 'other', 7: 'other', 8: 'ground', 9: 'horizon', 10: 'inlet',
                         11: 'other', 12: 'other', 13: 'other', 14: 'mastcam cal target',
                         15: 'observation tray', 16: 'other', 17: 'portion tube', 18: 'other',
                         19: 'other', 20: 'other', 21: 'scoop', 22: 'sun', 23: 'turret', 24: 'wheel'}

new_class_labels = {'chemin inlet open': 0,
                    'drill holes': 1, 'ground': 2, 'horizon': 3, 'inlet': 4,
                    'mastcam cal target': 5, 'observation tray': 6, 'portion tube': 7,
                    'scoop': 8,  'turret': 9, 'wheel': 10, 'other': 11}
def classifier(series):
    """Convert Original Classes into the Classes used for Analysis.
        Args:
            series: Series of Image Classes
            """
    for ind, item in series.items():
        item = series.replace(item, class_labels[item], inplace = True)
    for ind, item in series.items():
        if item in new_class_labels.keys():
            item = series.replace(item, new_class_labels[item], inplace = True)

X_train, Y_train = full_train_df['Paths'], full_train_df['Class']
X_test, Y_test = test_df['Paths'], test_df['Class']

classifier(Y_train)
classifier(Y_test)

In [277]:
# Filter Data to Remove Uncommon Classes and Reduce Class Imbalance

# Separate Class 2: Ground and Remove 2/3 of Ground Observations
X_train_2s = X_train[Y_train == 2]
X_train_no_2s = X_train[Y_train != 2]
Y_train_2s = Y_train[Y_train == 2]
Y_train_no_2s = Y_train[Y_train != 2]

X_train_1 = X_train[Y_train_2s][:int(len(Y_train_2s)/3)]
Y_train_1 = Y_train[Y_train_2s][:int(len(Y_train_2s)/3)]

# Remove Class 11: Other
X_train_2 = X_train_no_2s[Y_train != 11]
Y_train_2 = Y_train_no_2s[Y_train != 11]

X_test = X_test[Y_test != 11]
Y_test = Y_test[Y_test != 11]

# Recombine Filtered Dataset
X_train = pd.concat([X_train_1, X_train_2], axis = 0, ignore_index = True)
Y_train = pd.concat([Y_train_1, Y_train_2], axis = 0, ignore_index = True)

# Randomly Shuffle the Training Data
Y_train_final = Y_train.sample(frac = 1)
idx = Y_train_final.index
X_train_final = X_train.reindex(idx)

In [278]:
# Create TF Dataset that is Usable for Model Input and Training

batch_size = 32
buffer_size = 32

def read_image_scale(img_path, label):
    """Import Images from their File Location and Scale them.
        Args:
            img_path: Path to Images
            label: Corresponding Image Labels
            """
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [img_height, img_width])
    img = img/255
    return img, label

def create_dataset(filenames, labels, is_training=True):
    """Load and parse dataset.
    Args:
        filenames: list of image paths
        labels: numpy array of shape (BATCH_SIZE, N_LABELS)
        is_training: boolean to indicate training mode
    """
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    dataset = dataset.map(read_image_scale, num_parallel_calls = tf.data.AUTOTUNE)

    if is_training == True:
        dataset = dataset.cache()
        dataset = dataset.shuffle(buffer_size = buffer_size)

    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size = tf.data.AUTOTUNE)

    return dataset

train_images = create_dataset(X_train_final, Y_train_final)
test_images = create_dataset(X_test, Y_test, is_training = False)

In [None]:
# Create initial model: init_model

init_model = tf.keras.applications.resnet_v2.ResNet50V2(include_top = False, input_shape = (256, 256, 3))

n_classes = 11
n_epochs = 10

data_aug = tf.keras.Sequential([tf.keras.layers.RandomFlip(mode = 'horizontal'), tf.keras.layers.RandomRotation(factor = 0.3), tf.keras.layers.RandomZoom(height_factor = (-0.3, 0.3))])

inputs = tf.keras.layers.Input(shape = [256, 256, 3])

x = data_aug(inputs)

x = init_model(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)

layer1 = tf.keras.layers.Dropout(.2)(x)
layer1 = tf.keras.layers.Dense(128, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(0.01), bias_regularizer = tf.keras.regularizers.l2(0.01))(layer1)
outputs = tf.keras.layers.Dense(n_classes, activation = 'softmax')(layer1)

model = tf.keras.Model(inputs = inputs, outputs = outputs)

# Compile model

learning_rate = 0.0001
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate), loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = False), metrics = ['accuracy'])

# Fit model
model.fit(train_images, epochs = n_epochs, validation_data = test_images)
model.save('init_model_fin.h5')

In [None]:
# Validate Model on Testing Data

model.evaluate(test_images)

In [None]:
# get curiosity rover data for past 1000 sols as of 8/8/2022
# greatest amount available to pull using nasa api

key = 'UAlPWPyCNSbK9PVLVBhwCGDlnjL6cyYjfiMEgle7'
nasa = nasapy.Nasa(key = key)
base_sol = 3558

curiosity_data = []

for sol in range(1000):
    curiosity = nasa.mars_rover(sol = base_sol - sol)
    curiosity_data.append(curiosity)

# convert data into usable dataframe: curiosity_fin

curiosity_inter = pd.DataFrame(curiosity_data)
filler_list = []

for col in curiosity_inter.columns:
    for ind in curiosity_inter.index:
        a = curiosity_inter.iloc[ind, col]
        b = pd.DataFrame(a)
        filler_list.append(b)
curiosity_fin = pd.concat(filler_list, ignore_index = True)
curiosity_fin = curiosity_fin.drop_duplicates(subset = 'img_src')

In [28]:
# Create TF Dataset that is usable for Model Prediction

def read_image_url(url):
    """Create Tensor from Image URL.
        Args:
            url: Image URL
            """
    url = tf.keras.utils.get_file(fname = 'file', origin = url)
    img = tf.io.read_file(url)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [img_height, img_width])
    img = img / 255
    return img

def create_dataset_from_url(filenames, is_training=True):
    """Load and parse dataset.
    Args:
        filenames: list of image URLs
        is_training: boolean to indicate training mode
    """
    dataset = tf.data.Dataset.from_tensor_slices((filenames))
    dataset = dataset.map(read_image_url, num_parallel_calls = tf.data.AUTOTUNE)

    if is_training == True:
        dataset = dataset.cache()
        dataset = dataset.shuffle(buffer_size = buffer_size)

    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size = tf.data.AUTOTUNE)

    return dataset

curiosity_images = create_dataset_from_url(curiosity_fin['img_src'], is_training = False)

# Predict the Classes of the Newest Images
model.predict(curiosity_images)