In [275]:
import pandas as pd

from agage_archive.io import read_ale_gage
from agage_archive.data_selection import read_release_schedule
from agage_archive.io_other_formats import read_wang
from agage_archive.util import tz_local_to_utc


In [283]:
def read_files(species, site, network):
    df_wang = read_wang(species, site, network)
    df_wang.rename(columns={"mf": "mf_wang"}, inplace=True)

    df = read_ale_gage(species, site, network, utc=False).to_pandas()

    return df, df_wang

def check_timestamps(df, df_wang):
    """Check that the timestamps in df and df_wang are the same"""
    df_timestamps = set(df.index)
    df_wang_timestamps = set(df_wang.index)
    df_only_timestamps = df_timestamps - df_wang_timestamps
    df_wang_only_timestamps = df_wang_timestamps - df_timestamps

    if df_only_timestamps:
        print(f"Timestamps only in df: {df_only_timestamps}")

    if df_wang_only_timestamps:
        print(f"Timestamps only in df_wang: {df_wang_only_timestamps}")

    # Check timestamps are the same
    if len(df) != len(df_wang):
        raise ValueError("Dataframes are different lengths. Check for duplicates")

def compare_scales(df, df_wang, plot = False):
    """ Compare scales of two dataframes."""

    df_merged = pd.concat([df, df_wang], axis=1)

    if plot:
        (df_merged["mf"]/df_merged["mf_wang"]).plot(marker = ".", ylim = [0.95, 1.05], ylabel = "mf/mf_wang")

    return (df_merged["mf"]/df_merged["mf_wang"]).mean()


def list_missing_periods(df_only_timestamps, df):

    missing_periods = []
    current_period = []
    for timestamp in sorted(df_only_timestamps):
        # Check for non-NaN values in df and not in df_wang
        if not current_period:
            current_period.append(timestamp)
        else:
            # Find index value after timestamp in df
            next_index = df.index[df.index.get_loc(current_period[-1]) + 1]
            if timestamp == next_index:
                current_period.append(timestamp)
            else:
                missing_periods.append(current_period)
                current_period = []

    for period in missing_periods:
        print(f"{period[0]} to {period[-1]}")


def flagged(df, df_wang, utc = True):
    """ Find values that are NaN in df_wang, but not in df or vice versa"""

    def flagged_compare(df1, df2):
        """ Find values that are NaN in df1, but not in df2 """
        flagged_in_1_not_2 = df1.isna() & df2.notna()
        indices = flagged_in_1_not_2[flagged_in_1_not_2 == True].index
        if len(indices) > 0:
            if utc:
                indices = tz_local_to_utc(indices, site)
            for i in indices:
                print(i.strftime("%Y-%m-%d %H:%M"))

    print("Wang flagged:")
    flagged_compare(df_wang["mf_wang"], df["mf"])
    print("---------")

    print("Rigby flagged:")
    flagged_compare(df["mf"], df_wang["mf_wang"])
    print("---------")

In [287]:
network = "ALE"
rs = read_release_schedule(network)

for site in rs.columns:
    for species in rs.index:
        print(species, site)
        df, df_wang = read_files(species, site, network)
        check_timestamps(df, df_wang)
        compare_scales(df, df_wang, plot=False)
        flagged(df, df_wang)
        print("      ")
        print("**********************")

cfc-11 ADR
Wang flagged:
1979-02-15 17:02
1979-02-15 23:05
1979-11-21 23:03
1979-11-22 05:04
1979-11-22 11:05
1979-11-22 23:03
1980-06-27 23:07
1981-05-20 16:20
1983-12-24 11:04
---------
Rigby flagged:
---------
      
**********************
cfc-12 ADR
Wang flagged:
1980-06-02 20:05
1981-05-18 10:49
1982-05-29 13:47
1983-02-25 20:02
1983-06-14 13:08
1983-07-07 20:03
---------
Rigby flagged:
---------
      
**********************
ch3ccl3 ADR
Wang flagged:
1978-09-16 17:00
1978-12-06 11:04
1979-04-27 23:01
1979-11-22 16:21
1983-12-08 23:03
---------
Rigby flagged:
---------
      
**********************
ccl4 ADR
Wang flagged:
1980-12-05 16:20
1980-12-05 16:57
1981-02-28 11:03
1982-03-30 23:01
1982-10-16 11:04
1983-11-15 16:53
---------
Rigby flagged:
---------
      
**********************
n2o ADR
Wang flagged:
1978-10-31 15:21
1980-05-26 14:12
1980-06-06 20:06
1981-01-12 14:03
1982-03-30 14:05
1982-05-26 13:48
1982-10-20 14:08
1983-02-25 20:02
1983-05-21 08:07
---------
Rigby flagged: