# Mars Perseverance Waypoints
> This notebook reads and processes more than 1,000 of daily json files listing the locations traveled by the rover, known as Percy, since March 2021. 

#### Load Python tools and Jupyter config

In [1]:
import os
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
from glob import glob
import geopandas as gpd
import matplotlib.pyplot as plt
import altair_stiles as altstiles
from shapely.geometry import Point, LineString

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('stiles')

In [3]:
today = pd.Timestamp("today").strftime("%Y%m%d")

---

## Read and combine

#### List all JSON files showing daily activity in the directory

In [4]:
files = glob("data/raw/waypoints_current_*.json")
files[:5]

['data/raw/waypoints_current_2021-08-07.json',
 'data/raw/waypoints_current_2023-03-10.json',
 'data/raw/waypoints_current_2021-04-24.json',
 'data/raw/waypoints_current_2021-06-19.json',
 'data/raw/waypoints_current_2022-07-17.json']

#### Read each file, normalize the json structure, assign a date column from file name and append in a list

In [5]:
dfs = []

for file in files:
    base = os.path.basename(file)
    date_str = base.replace("waypoints_current_", "").replace(".json", "")

    try:
        with open(file) as f:
            data = json.load(f)
    except json.JSONDecodeError:
        print(f"Skipping file {file} due to JSON decoding error.")
        continue

    # Extract 'features'
    features = data["features"]

    # Create a list to collect properties from all features
    all_features_properties = []

    # Loop through each feature to normalize its properties
    for feature in features:
        properties = feature["properties"]
        properties["date"] = date_str  # Add the date to each property
        all_features_properties.append(properties)

    # Convert the list of properties dictionaries into a dataframe
    properties_df = pd.DataFrame(all_features_properties)

    # Append that dataframe to our list
    dfs.append(properties_df)

#### Concatenate all the daily dataframes

In [6]:
all_data_df = pd.concat(dfs, ignore_index=True)

#### Convert longitude and latitude to geometric points and create geodataframe

In [7]:
geometry = [Point(xy) for xy in zip(all_data_df["lon"], all_data_df["lat"])]
geo_df = gpd.GeoDataFrame(all_data_df, geometry=geometry)

#### Ensure correct data types for potential date manipulation later

In [8]:
geo_df["date"] = pd.to_datetime(geo_df["date"])

#### Make numeric columns numeric

In [9]:
columns_to_convert = [
    "sol",
    "elev_geoid",
    # "easting",
    # "northing",
    # "radius",
    # "roll",
    # "pitch",
    # "yaw",
    # "tilt",
    "dist_m",
    "dist_km",
    "dist_mi",
]

for col in columns_to_convert:
    geo_df[col] = pd.to_numeric(geo_df[col], errors="coerce")

#### Sort geodataframe by earliest date

In [10]:
geo_df = geo_df.sort_values("sol", ascending=False)

---

## Process

#### Filter rows where 'images' is not NaN and contains data

In [11]:
has_images = geo_df["images"].apply(lambda x: isinstance(x, list) and len(x) > 0)

#### Empty dataframe to hold the normalized images data

In [12]:
images_df = pd.DataFrame()

#### Normalize and extract 'images' data where applicable

In [13]:
if has_images.any():
    # Normalize the 'images' data for rows that have it and reset index for merging
    images_info = pd.json_normalize(
        geo_df[has_images]["images"].explode()
    ).reset_index()
    # Select and rename columns if necessary from images_info
    selected_columns = images_info.drop_duplicates("index").set_index("index")

#### Merge the extracted 'images' data back into the original geodataframe

In [14]:
# Use a left join to keep all original rows, adding the new columns where available
geo_df = geo_df.merge(
    selected_columns, left_index=True, right_index=True, suffixes=(None, "_drop")
)

#### Clean up the columns

In [15]:
geo_df = geo_df[[col for col in geo_df.columns if not col.endswith("_drop")]]
geo_df = geo_df[[col for col in geo_df.columns if not col.endswith("_y")]]
geo_df = geo_df[[col for col in geo_df.columns if not col.endswith("_x")]]

#### Drop columns with all null values

In [16]:
geo_df_slim = geo_df.dropna(axis=1, how="any")
geo_df_cleaned = geo_df_slim.dropna(subset=["geometry"])

#### Convert the "[sol](https://en.wikipedia.org/wiki/Mars_sol)" column to earth days

In [17]:
sol_length_in_earth_days = 1 + 39 / (60 * 24) + 35 / (60 * 60 * 24)

In [18]:
geo_df_cleaned["earth_days"] = (
    geo_df_cleaned["sol"] * sol_length_in_earth_days
).round()

#### The result

In [19]:
len(geo_df_cleaned)

494

#### Set coordinate reference system for Martian geospatial data

In [21]:
geo_df_cleaned = geo_df_cleaned.set_crs("ESRI:104971")

#### Convert points to linestring to get rover path

In [22]:
# Remove rows with None, NaN, or empty geometries
valid_geo_df = geo_df_cleaned[
    ~(
        geo_df_cleaned.geometry.isnull()
        | geo_df_cleaned.geometry.apply(lambda geom: geom.is_empty)
    )
]

print(f"Valid geometries count: {len(valid_geo_df)}")

Valid geometries count: 480


#### Create the LineString

In [23]:
if not valid_geo_df.empty:
    # Extract coordinate tuples from each Point geometry
    coords = [(point.x, point.y) for point in valid_geo_df.geometry]

    # Ensure there are at least two points to form a LineString
    if len(coords) >= 2:
        # Create the LineString from the list of coordinate tuples
        linestring = LineString(coords)
        # Create a new GeoDataFrame with this LineString
        linestrings_gdf = gpd.GeoDataFrame(geometry=[linestring], crs=valid_geo_df.crs)
    else:
        print("Not enough points to form a LineString.")
else:
    print("No valid geometries to form a LineString.")

#### The result

In [24]:
linestrings_gdf = linestrings_gdf.set_crs("ESRI:104971")

---

## Exports

#### Rover path as linestring GeoJSON

In [None]:
linestrings_gdf.to_file(
    f"data/processed/rover_path_full.geojson",
    driver="GeoJSON",
)

#### Rover points as GeoJSON

In [28]:
try:
    geo_df_cleaned["date"] = pd.to_datetime(geo_df_cleaned["date"])
except Exception as e:
    print(f"An error occurred: {e}")

In [29]:
geo_df_cleaned.to_file(
    f"data/processed/rover_points_full.geojson",
    driver="GeoJSON",
)