In [2]:
#    MARS SEASONS
#    ~~~~~~~~~~~~
#
#    - Mars mission data processor by season.
#    - Author: Raul Jesus Lopez @la9una
#    - GitHub: https://github.com/la9una/mars_habitability
#
#    This script processes environmental data from the Mars2020 mission,
#    adding a 'Season' column based on Martian solar longitude (L_s) 
#    and generating a representative sample for each season.
#    
#    Actions:
#    1. Download data from GitHub.
#    2. Add seasons based on L_s:
#       - Spring: 0° ≤ L_s < 90°
#       - Summer: 90° ≤ L_s < 180°
#       - Autumn: 180° ≤ L_s < 270°
#       - Winter: 270° ≤ L_s < 360°
#    3. Create a stratified sample:
#       - Select 30 samples or 10% of records per season (whichever is larger).
#       - Include all records if fewer than 30 are available.
#    4. Save:
#       - Full dataset with seasons.
#       - Representative sample.
#
#    Data Source:
#    - Time equivalence table from Perseverance (Mars 2020) Analyst's Notebook:
#      https://an.rsl.wustl.edu/m20/AN/an3.aspx
#
#    Output Files:
#    - m20_perseverance_with_seasons.csv: Full dataset with seasons.
#    - m20_perseverance_sample.csv: Representative sample.


# Import libraries
import pandas as pd
import requests
import time
from io import StringIO

# Download a CSV file from a given URL.
def download_csv(url):
    response = requests.get(url)
    response.raise_for_status()
    return pd.read_csv(StringIO(response.text))

# Define the URL for the Mars data
url = "https://raw.githubusercontent.com/la9una/mars_habitability/refs/heads/main/m20_perseverance_time_table.csv"

# Download and display the first few rows of the CSV
data = download_csv(url)
data.head()

# Determines the Martian season based on the solar longitude (L_s)
def determine_season(solar_longitude):
    if 0 <= solar_longitude < 90:
        return 'Spring'
    elif 90 <= solar_longitude < 180:
        return 'Summer'
    elif 180 <= solar_longitude < 270:
        return 'Autumn'
    elif 270 <= solar_longitude < 360:
        return 'Winter'
    else:
        return 'Unknown'

# Processes the CSV and adds a 'Season' column
def process_csv(data):
    data['Solar longitude (deg)'] = data['Solar longitude (deg)'].str.replace(',', '.').astype(float)
    data['Season'] = data['Solar longitude (deg)'].apply(determine_season)
    return data

# Process the data and display the first few rows
data_with_seasons = process_csv(data)
data_with_seasons.head()

# Generates a representative sample by season
def sample_by_season(data, min_sample_size=30):
    samples = []
    for season in data['Season'].unique():
        season_data = data[data['Season'] == season]
        sample_size = max(int(0.1 * len(season_data)), min_sample_size)
        sample = season_data.sample(n=min(sample_size, len(season_data)), random_state=42, replace=False)
        samples.append(sample)
    return pd.concat(samples)

# Generate the sample and display the first few rows
sample_data = sample_by_season(data_with_seasons)
sample_data.head()

# Save the processed data and the sample to CSV files
data_with_seasons.to_csv("m20_perseverance_with_seasons.csv", index=False)
sample_data.to_csv("m20_perseverance_sample.csv", index=False)

print('Processed data saved to: m20_perseverance_with_seasons.csv')
print('Sampled data saved to: m20_perseverance_sample.csv')
    

Processed data saved to: m20_perseverance_with_seasons.csv
Sampled data saved to: m20_perseverance_sample.csv
