# This script combines and examines CSV files for daily extremes


This will look through daily .csv files generated by another script from a set of MYD021KM files. Each CSV contains a list, one per line, of:
 - Filename
 - Lat
 - Lon
 - Brightness Temperature
 
Of all pixels below a defined threshold.

To process the raw MYD021KM files, you must run: `../Scripts/MODIS_MINT.py`

In [None]:
from datetime import datetime, timedelta
import pandas as pd
import os

In [None]:
# Set up top level directories
# Output files will be stored here, input data is in YYYY/MM/DD subdirs
indir = '/gf2/eodg/SRP001_PROUD_TURBREP/MODIS_MINT/MINT/EXTREME/'

# Set up initial processing date
start_dt = datetime(2005, 1, 1)

# And the end data
end_dt = datetime(2020, 12, 31)

# Convert into pandas daterange
daterange = pd.date_range(start_dt, end_dt)

In [None]:
def proc_ex_csv(cur_date, indir):
    """Retrieve daily cold BT info from CSV file."""
    
    # Construct filename and check it exists.
    inf = indir + cur_date.strftime("%Y/%m/") + 'MYD_EX_' + cur_date.strftime("%Y%m%d") + '.csv'
    # If file doesn't exist, warn and return None
    if not os.path.exists(inf):
        print(" -  BAD:", cur_date, inf)
        return None
    # Otherwise, read the file
    data_df = pd.read_csv(inf, names=['fname', 'lat', 'lon', 'BT'])
    # Convert the MODIS filename into a datetime
    data_df['fname'] = data_df['fname'].str[61:73]
    data_df['ftime'] = pd.to_datetime(data_df['fname'], format='%Y%j.%H%M')
    # We don't need the filename, delete it
    del data_df['fname']
    # Set the datetime to be the index
    data_df = data_df.set_index('ftime')
    
    return data_df

In [None]:
# Initalise lists, these store each daily dataframe
extre_list = []

# Loop over date range, load files
counter = 0
for cur_date in daterange:
    
    # Retrieve data and add to list
    exdf = proc_ex_csv(cur_date, indir)
    if exdf is not None:
        extre_list.append(exdf)
    # Print occasionally just so we know it's working
    counter += 1
    if counter > 100:
        print(cur_date)
        counter = 0

extre_frame = pd.concat(extre_list, axis=0)

# Save everything to files
extre_frame.to_pickle("../data/extre_frame.pkl")