# FFT decomposition Atmosphere time sequences from Merra2

- author Sylvie Dagoret-Campagne
- affiliation : IJCLab
- creation date 2025-10-20 :
- last update : 2025-02-23
- Kernel @usdf **w_2024_50*
- Office emac : mamba_py311
- Home emac : base (conda)
- laptop : conda_py311

**Goal** : Fit the variation of Merra2 parameter impact the transmission

- CO2 fit : https://scikit-learn.org/stable/auto_examples/gaussian_process/plot_gpr_co2.html#sphx-glr-auto-examples-gaussian-process-plot-gpr-co2-py

- Kernels : https://scikit-learn.org/stable/modules/gaussian_process.html#gp-kernels

In [None]:
from platform import python_version
print(python_version())

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
from platform import python_version
print(python_version())

In [None]:
import os

In [None]:
# where are stored the figures
pathfigs = "figsFFTAtmosphereFomMerra2"
if not os.path.exists(pathfigs):
    os.makedirs(pathfigs)
figtype = ".png"

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from matplotlib.gridspec import GridSpec

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from astropy.coordinates.earth import EarthLocation
from datetime import datetime
from pytz import timezone

from scipy import interpolate
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, BallTree

import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd
import pickle
from collections import OrderedDict

plt.rcParams["figure.figsize"] = (4,3)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

import scipy
from scipy.optimize import curve_fit,least_squares

In [None]:
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

In [None]:
from astropy.modeling import models

In [None]:
from numpy.random import lognormal

In [None]:
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)
from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.time import Time
from astropy.timeseries import TimeSeries

In [None]:
# Remove to run faster the notebook
#import ipywidgets as widgets
#%matplotlib widget

In [None]:
from importlib.metadata import version

In [None]:
# wavelength bin colors
#jet = plt.get_cmap('jet')
#cNorm = mpl.colors.Normalize(vmin=0, vmax=NSED)
#scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
#all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

In [None]:
np.__version__

In [None]:
pd.__version__

In [None]:
from scipy.fftpack import fft, fftfreq

In [None]:
def convertNumToDatestr(num):
    year = num//10_000
    month= (num-year*10_000)//100
    day = (num-year*10_000-month*100)

    year_str = str(year).zfill(4)
    month_str = str(month).zfill(2)
    day_str = str(day).zfill(2)
    
    datestr = f"{year_str}-{month_str}-{day_str}"
    return pd.to_datetime(datestr)

In [None]:
def pdf_lognormal(x,a0,mu,sigma):
    """
    """
    pdf = a0*(np.exp(-(np.log(x) - mu)**2 / (2 * sigma**2))/ (x * sigma * np.sqrt(2 * np.pi)))
    return pdf

In [None]:
def fourier_analysis(dates, values,mode = "logxlogy",title="Analyse de Fourier - Spectre des fréquences",
                    xlabel="Fréquence (cycles par jour)",ylabel="Amplitude",label="Amplitude spectrale",figname=""):
    # Centrer les données autour de la moyenne
    values_centered = values - np.mean(values)

    # Nombre de points
    N = len(dates)
    # Intervalle d'échantillonnage (assume 1 jour entre chaque point)
    T = np.mean(np.diff(dates))  # Période d'échantillonnage

    # Fréquence de Nyquist (limite de Shannon)
    f_nyquist = 1 / (2 * T)
    
    # Transformée de Fourier
    fft_values = fft(values_centered)/ np.sqrt(N)
    freqs = fftfreq(N, T)  # Fréquences associées

    

    # Seulement la moitié du spectre est utile (symétrie)
    positive_freqs = freqs[:N // 2]
    positive_fft_values = np.abs(fft_values[:N // 2])

    # Tracer le spectre
    plt.figure(figsize=(16, 6),layout="constrained")
    
    plt.plot(positive_freqs, positive_fft_values,'ob-' ,ms=5,label=label)

    if mode == "logxliny":
        plt.xscale("log")  # Définit l'axe X en échelle logarithmique
        plt.yscale("linear")  # Garde l'axe Y en échelle linéaire
    elif mode == "logxlogy":
        plt.xscale("log")  # Définit l'axe X en échelle logarithmique
        plt.yscale("log")  # Garde l'axe Y en échelle logarithmique
    elif mode == "linxlogy":
        plt.xscale("linear")  # Définit l'axe X en  échelle linéaire
        plt.yscale("log")  # Garde l'axe Y en échelle logarithmique
    elif mode == "linxliny":
        plt.xscale("linear")  # Définit l'axe X en échelle linéaire
        plt.yscale("linear")  # Garde l'axe Y en échelle linéaire
        
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.axvline(1/365.25, color='r', linestyle='-', label="Cycle : 365 days - 1 year")
    plt.axvline(1/182.625, color='r', linestyle='--', label="Cycle : 182.6 days - 6 months")
    plt.axvline(1/91.3125, color='r', linestyle='-.', label="Cycle : 91.3 days - 3 months")
    plt.axvline(1/30.4375, color='r', linestyle=':', label="Cycle : 30.4 days - 1 month")
    plt.axvline(1/7, color='purple', linestyle='--', label="Cycle : 7 days - 1 week")
    plt.axvline(1, color='purple', linestyle='-', label="Cycle : 1 day ")
    plt.axvline(1/0.5, color='purple', linestyle='-.', label="Cycle : 0.5 day ")
    plt.axvline(f_nyquist, color='g', linestyle='--', label=f"Nyquist frequency({f_nyquist:.3f} cycles/days)")
    plt.legend(bbox_to_anchor=(1.1, 1.05),fontsize=18)

    if figname !="":
        plt.savefig(figname)
        
    
    plt.show()

# Appelle la fonction avec tes données
# fourier_analysis(dates, values)

In [None]:
365.25/12

## Configuration

In [None]:
observing_location = EarthLocation.of_site('Rubin Observatory')
tz = timezone('America/Santiago')

### MERRA2

In [None]:
filename_m2 = "../../SpectroMerra2/MerradataMerged/Merge_inst1_2d_asm_Nx_M2I1NXASM-2021-2024.csv"
filename_m2b = "../../SpectroMerra2/MerradataMerged/Merge_tavg1_2d_aer_Nx_M2T1NXAER-2021-2024.csv"

In [None]:
df_m = pd.read_csv(filename_m2)
df_mb = pd.read_csv(filename_m2b)

In [None]:
Nm = len(df_m)
Nmb = len(df_mb)
print("Number of points :: ",Nm,Nmb)

In [None]:
df_mb.columns

In [None]:
TMIN = pd.to_datetime(df_m.time.min())
TMAX = pd.to_datetime(df_m.time.max())

### Convert in MJD

In [None]:
df_m["mjd"] = Time(pd.to_datetime(df_m.time.values)).mjd
df_mb["mjd"] = Time(pd.to_datetime(df_mb.time.values)).mjd

In [None]:
mjd_zoom_start = Time("2024-01-01").mjd
mjd_zoom_stop = Time("2025-06-30").mjd

In [None]:
mjd_obs_start = df_m["mjd"].min() 
mjd_obs_stop = df_m["mjd"].max() 

## PWV

In [None]:
from matplotlib.dates import DateFormatter
#date_form = DateFormatter("%y-%m-%dT%H:%M")
date_form = DateFormatter("%y-%m")

fig = plt.figure(figsize=(18,10))
gs = GridSpec(2, 1,figure=fig)
#gs = GridSpec(1, 1,figure=fig)
ax1 = fig.add_subplot(gs[0])
ax2 = fig.add_subplot(gs[1])
        
leg1=ax1.get_legend()
leg2=ax2.get_legend()


ax1.plot(pd.to_datetime(df_m.Time.values), df_m.TQV.values,c="b",lw=0.5,label="Merra2")
ax1.set_xlabel("time")
ax1.xaxis.set_major_formatter(date_form)
ax1.set_title("Precipitable water vapor from Merra2")
ax1.legend()
ax1.set_ylabel("PWV (mm)")
#ax.set_xlim(TMIN,TMAX)

data = df_m.TQV.values
mean = np.mean(data)
median = np.median(data)
std = np.std(data)
textstr = "\n".join((f"Expected max-range for PWV : ",
                     f"- average : {mean:.2f} mm",
                     f"- median : {median:.2f} mm",
                     f"- sigma : {std:.2f} mm",     
                    ))
ax1.text(0.05, 0.95, textstr, transform=ax1.transAxes, fontsize=14,verticalalignment='top', bbox=props)


ax2.plot(df_m.mjd, df_m.TQV.values,c="b",lw=0.5,label="Merra2")
ax2.set_xlabel("time (MJD)")
ax2.legend()
ax2.set_ylabel("PWV (mm)")

figname =f"{pathfigs}/pwv_allpoints_merra2"+figtype
fig.savefig(figname)
plt.show()


### FFT for  PWV

In [None]:
dates = df_m.mjd
values = df_m.TQV.values
figname =f"{pathfigs}/FFT_pwv_merra2"+figtype

In [None]:
fourier_analysis(dates, values, mode= "logxliny",
                 title = "PWV Absolute value of FFT",
                 xlabel="days",
                 ylabel="PWV (mm)",
                 label="Merra2 PWV amplitude",
                 figname=figname)

## Ozone

In [None]:
from matplotlib.dates import DateFormatter
#date_form = DateFormatter("%y-%m-%dT%H:%M")
date_form = DateFormatter("%y-%m")

fig = plt.figure(figsize=(18,10))
gs = GridSpec(2, 1,figure=fig)

ax1 = fig.add_subplot(gs[0])
ax2 = fig.add_subplot(gs[1])
        
leg1=ax1.get_legend()
leg2=ax2.get_legend()


ax1.plot(pd.to_datetime(df_m.Time.values), df_m.TO3.values,c="r",lw=0.5,label="Merra2")
ax1.set_xlabel("time")
ax1.xaxis.set_major_formatter(date_form)
ax1.set_title("Ozone from Merra2")
#ax1.legend()
ax1.set_ylabel("Ozone (DU)")
#ax.set_xlim(TMIN,TMAX)

data = df_m.TO3.values
mean = np.mean(data)
median = np.median(data)
std = np.std(data)
textstr = "\n".join((f"Expected range for Ozone : ",
                     f"- average : {mean:.2f} DU",
                     f"- median : {median:.2f} DU",
                     f"- sigma : {std:.2f} DU",     
                    ))
ax1.text(0.05, 0.95, textstr, transform=ax1.transAxes, fontsize=14,verticalalignment='top', bbox=props)

ax2.plot(df_m.mjd, df_m.TO3.values,c="r",lw=0.5,label="Merra2")
ax2.set_xlabel("time (MJD)")
ax2.legend()
ax2.set_ylabel("Ozone (DU)")


figname =f"{pathfigs}/ozone_allpoints_merra2"+figtype
fig.savefig(figname)
plt.show()


### FFT for Ozone

In [None]:
dates = df_m.mjd
values = df_m.TO3.values
figname =f"{pathfigs}/FFT_ozone_merra2"+figtype

In [None]:
fourier_analysis(dates, values, mode= "logxliny",
                 title = "Ozone Absolute value of FFT",
                 xlabel="days",
                 ylabel="Ozone (DU)",
                 label="Merra2 Ozone amplitude",
                 figname=figname)

## Aerosol VAOD

In [None]:
from matplotlib.dates import DateFormatter
#date_form = DateFormatter("%y-%m-%dT%H:%M")
date_form = DateFormatter("%y-%m")

fig = plt.figure(figsize=(18,10))
gs = GridSpec(2, 1,figure=fig)
ax1 = fig.add_subplot(gs[0])
ax2 = fig.add_subplot(gs[1])
        
leg1=ax1.get_legend()
leg2=ax2.get_legend()


ax1.plot(pd.to_datetime(df_mb.Time.values), df_mb.TOTEXTTAU.values,c="g",lw=0.5,label="Merra2")
ax1.set_xlabel("time")
ax1.xaxis.set_major_formatter(date_form)
ax1.set_title("VAOD from Merra2")
ax1.legend()
ax1.set_ylabel("VAOD")
#ax.set_xlim(TMIN,TMAX)
data = df_mb.TOTEXTTAU.values
mean = np.mean(data)
median = np.median(data)
std = np.std(data)
textstr = "\n".join((f"Expected max-range for VAOD : ",
                     f"- average : {mean:.3f}",
                     f"- median : {median:.3f}",
                     f"- sigma : {std:.3f}",     
                    ))
ax1.text(0.05, 0.95, textstr, transform=ax1.transAxes, fontsize=14,verticalalignment='top', bbox=props)

ax2.plot(df_mb.mjd, df_mb.TOTEXTTAU.values,c="g",lw=0.5,label="Merra2")
ax2.set_xlabel("time (MJD)")
ax2.legend()
ax2.set_ylabel("VAOD")


figname =f"{pathfigs}/vaod_allpoints_merra2"+figtype
fig.savefig(figname)
plt.show()


### FFT for aerosols - VAOD

In [None]:
dates = df_mb.mjd
values = df_mb.TOTEXTTAU.values
figname =f"{pathfigs}/FFT_vaod_merra2"+figtype

In [None]:
fourier_analysis(dates, values, mode= "logxliny",
                 title = "VAOD - aerosols Absolute value of FFT",
                 xlabel="days",
                 ylabel="VAOD",
                 label="Merra2 VAOD amplitude",
                 figname=figname)

## Aerosol Angstrom

In [None]:
from matplotlib.dates import DateFormatter
#date_form = DateFormatter("%y-%m-%dT%H:%M")
date_form = DateFormatter("%y-%m")

fig = plt.figure(figsize=(18,10))
gs = GridSpec(2, 1,figure=fig)

ax1 = fig.add_subplot(gs[0])
ax2 = fig.add_subplot(gs[1])
        
leg1=ax1.get_legend()
leg2=ax2.get_legend()


ax1.plot(pd.to_datetime(df_mb.Time.values), df_mb.TOTANGSTR.values,c="purple",lw=0.5,label="Merra2")
ax1.set_xlabel("time")
ax1.xaxis.set_major_formatter(date_form)
ax1.set_title("VAOD_Angstrom from Merra2")
ax1.legend()
ax1.set_ylabel("VAOD_Angstrom")
#ax.set_xlim(TMIN,TMAX)
data = df_mb.TOTANGSTR.values
mean = np.mean(data)
median = np.median(data)
std = np.std(data)
textstr = "\n".join((f"Expected max-range for VAOD-Angstrom : ",
                     f"- average : {mean:.3f}",
                     f"- median : {median:.3f}",
                     f"- sigma : {std:.3f}",     
                    ))
ax1.text(0.05, 0.95, textstr, transform=ax1.transAxes, fontsize=14,verticalalignment='top', bbox=props)


ax2.plot(df_mb.mjd, df_mb.TOTANGSTR.values,c="purple",lw=0.5,label="Merra2")
ax2.set_xlabel("time (MJD)")
ax2.legend()
ax2.set_ylabel("VAOD")



figname =f"{pathfigs}/vaodangstrom_allpoints_merra2"+figtype
fig.savefig(figname)
plt.show()

### FFT For Aerosol Angstrom

In [None]:
dates = df_mb.mjd
values = df_mb.TOTANGSTR.values
figname =f"{pathfigs}/FFT_angstrom_merra2"+figtype

In [None]:
fourier_analysis(dates, values, mode= "logxliny",
                 title = "Angstrom exponent - aerosols Absolute value of FFT",
                 xlabel="days",
                 ylabel="Angstrom exponent",
                 label="Merra2 Angstrom  amplitude",
                 figname=figname)