In [1]:
import numpy as np
import sys
sys.path.insert(0, '..')
from algorithms.dbscan_gmm import DBSCAN_GMM
from algorithms.grid_based_dbscan import GridBasedDBSCAN
from algorithms.grid_based_dbscan_gmm import GridBasedDBSCAN_GMM
from utilities.plot_utils import *
from utility import Skills, ScatterDetection
import pandas as pd
import datetime
from plots_report import *
from get_sd_data import *
from matplotlib.dates import date2num, num2date

def estimate_skills(_dict_, labels):
    V, W, L = [], [], []
    for v, w, l in zip(_dict_["vel"], _dict_["wid"], labels):
        V.extend(v.tolist())
        W.extend(w.tolist())
        L.extend(l.tolist())
    V, W, L = np.array(V), np.array(W), np.array(L)
    X = np.array([V.tolist(), W.tolist()]).T
    sk = Skills(X, L)
    return sk

def estimate_df_skills(df, labels):
    V, W, G, T, L = np.array(df.v), np.array(df.w_l), np.array(df.slist), np.array(df.sec), np.array(df.labels)
    X = np.array([V.tolist(), W.tolist(), G.tolist()]).T
    sk = Skills(X, L)
    return sk

def _filter_by_time(start_time, end_time, data_dict):
    time = data_dict['time']
    start_i, end_i = None, None
    start_time, end_time = date2num(start_time), date2num(end_time)
    if start_time < time[0][0]: # Sometimes start time is a few seconds before the first scan
        start_time = time[0][0]
    for i, t in enumerate(time):
        if np.sum(start_time >= t) > 0 and start_i == None:
            start_i = i
        if np.sum(end_time > t) > 0 and start_i != None:
            end_i = i+1
    data_dict['gate'] = data_dict['gate'][start_i:end_i]
    data_dict['time'] = data_dict['time'][start_i:end_i]
    data_dict['beam'] = data_dict['beam'][start_i:end_i]
    data_dict['vel'] = data_dict['vel'][start_i:end_i]
    data_dict['wid'] = data_dict['wid'][start_i:end_i]
    data_dict['elv'] = data_dict['elv'][start_i:end_i]
    data_dict['trad_gsflg'] = data_dict['trad_gsflg'][start_i:end_i]
    return data_dict

def todf(dicts, keys=['gate', 'beam', 'vel', 'wid', 'time', 'trad_gsflg', 'elv', 'pow', 'clust_flg']):
    df = pd.DataFrame()
    _o = {}
    print(dicts.keys())
    for k in keys:
        _o[k] = []
        for x in dicts[k]:
            _o[k].extend(x)
    df = pd.DataFrame.from_records(_o)
    df = df.rename(columns={'gate':"slist", 'beam':"bmnum", 'vel':'v', 'wid':"w_l", 
                            'time':"time", 'pow':"p_l", 'clust_flg':"labels"})
    return df

def sma_bbox(scans, sdur=5, idx=None, dbeam=15, window=7):
    df = pd.DataFrame()
    plot=False
    for i in range(int(len(scans)/sdur)):
        if (idx is not None) and (i == idx): plot=True
        if i == 0: mlf = MiddleLatFilter(rad, scans=scans[i*sdur:(i+1)*sdur], plot=plot)
        elif i == int(len(scans)/sdur)-1: mlf._reset_(rad, scans[i*sdur:], plot=plot)
        else: mlf._reset_(rad, scans[i*sdur:(i+1)*sdur], plot=plot)
        dx = mlf.doFilter(fdata, dbeam=dbeam, window=window)
        slist = np.array(dx.slist)
        labs = np.array(dx["labels"])
        labs[labs<0] = np.nan
        labs = labs + (10*i)
        labs[np.isnan(labs)] = -1
        dx["labels"] = labs
        df = pd.concat([df, dx])
    return df

def lower_range(df, gf=None):
    u = df.copy()
    slist = np.array(u.slist)
    labs = np.array(u["labels"])
    if gf is not None: labs[slist<8] = gf
    u["labels"] = labs
    return u

In [2]:
case = 4


if case == 0:
    start_time = datetime.datetime(2017, 4, 4)
    end_time = datetime.datetime(2017, 4, 5)
    rad, bm = "cvw",7
    
    start_time = datetime.datetime(2015, 3, 17)
    end_time = datetime.datetime(2015, 3, 18)
    rad, bm = "bks",13

    db = DBSCAN_GMM(start_time, end_time, rad, BoxCox=True, load_model=False, save_model=True, run_gmm=False)
    setattr(db, "skill", estimate_skills(db.data_dict, db.clust_flg))
    #dbgmm = DBSCAN_GMM(start_time, end_time, rad, BoxCox=True, load_model=False, save_model=True)
    #setattr(dbgmm, "skill", estimate_skills(dbgmm.data_dict, dbgmm.clust_flg))
    #gbdb = GridBasedDBSCAN(start_time, end_time, rad, load_model=False, save_model=True)
    #setattr(gbdb, "skill", estimate_skills(gbdb.data_dict, gbdb.clust_flg))
    #gbdbgmm = GridBasedDBSCAN_GMM(start_time, end_time, rad, load_model=False, save_model=True)
    #setattr(gbdbgmm, "skill", estimate_skills(gbdbgmm.data_dict, gbdbgmm.clust_flg))

    rti = RangeTimePlot(110, np.unique(np.hstack(db.data_dict["time"])), "", num_subplots=4)
    rti.addClusterPlot(db.data_dict, db.clust_flg, bm, "DBSCAN", label_clusters=True, skill=db.skill)
    #rti.addClusterPlot(dbgmm.data_dict, dbgmm.clust_flg, bm, "DBSCAN + GMM", label_clusters=True, skill=dbgmm.skill)
    #rti.addClusterPlot(gbdb.data_dict, gbdb.clust_flg, bm, "GB-DBSCAN", label_clusters=True, skill=gbdb.skill)
    #rti.addClusterPlot(gbdbgmm.data_dict, gbdbgmm.clust_flg, bm, "GB-DBSCAN + GMM ", label_clusters=True, xlabel="Time, UT",
    #    skill=gbdbgmm.skill)
    rti.save("figs/rti.example.png")
if case == 1:
    plot_acfs(rad="kap")
    plot_lims(False)
    plot_lims(True)
    plot_rad_acfs()
    plot_hist_hr()
if case == 2:
    start_time = datetime.datetime(2015, 3, 17)
    end_time = datetime.datetime(2015, 3, 17, 12)
    rad, bm = "bks",15

    db = DBSCAN_GMM(start_time, end_time, rad, BoxCox=True, load_model=False, save_model=True, run_gmm=False)
    rti = RangeTimePlot(110, np.unique(np.hstack(db.data_dict["time"])), "", num_subplots=3)
    rti.addClusterPlot(db.data_dict, db.clust_flg, bm, "DBSCAN", label_clusters=True, skill=None)
    rti.addGSISPlot(db.data_dict, db.data_dict["trad_gsflg"], bm, "GS-ID:Traditioanl", show_closerange=True, xlabel='')
    rti.addVelPlot(db.data_dict, bm, "Velocity", vel_max=200, vel_step=50, xlabel='Time UT')
    rti.save("figs/dbscan.trad.png")
if case == 3:
    start_time = datetime.datetime(2015, 3, 17)
    end_time = datetime.datetime(2015, 3, 17, 12)
    rad, bm = "bks",7
    kinds = ["dbscan", "dbscan-gmm", "gb-dbscan", "gb-dbscan-gmm"]
    kinds = ["dbscan"]
    for kind in kinds:    
        if kind == "dbscan": db = DBSCAN_GMM(start_time, end_time, rad, BoxCox=True, load_model=False, save_model=True, run_gmm=False)
        if kind == "dbscan-gmm": db = DBSCAN_GMM(start_time, end_time, rad, BoxCox=True, load_model=False, save_model=True, run_gmm=True)
        if kind == "gb-dbscan": db = GridBasedDBSCAN(start_time, end_time, rad, load_model=False, save_model=True)
        if kind == "gb-dbscan-gmm": db = GridBasedDBSCAN_GMM(start_time, end_time, rad, load_model=False, save_model=True,
                features=['beam', 'gate', 'time','vel','wid'], scan_eps=10)
        sd = ScatterDetection(db.data_dict)
        #rti = RangeTimePlot(110, np.unique(np.hstack(db.data_dict["time"])), "", num_subplots=6)
        #rti.addClusterPlot(db.data_dict, db.clust_flg, bm, kind.upper(), label_clusters=True, skill=None)
        #rti.addGSISPlot(db.data_dict, sd.run(kind=1, case=0), bm, "GS-ID:Median(Sudden)", show_closerange=True, xlabel='')
        #rti.addGSISPlot(db.data_dict, sd.run(kind=1, case=1), bm, "GS-ID:Median(Blanchard 2006)", show_closerange=True, xlabel='')
        #rti.addGSISPlot(db.data_dict, sd.run(kind=1, case=2), bm, "GS-ID:Median(Blanchard 2009)", show_closerange=True, xlabel='')
        #rti.addGSISPlot(db.data_dict, sd.run(kind=1, case=3), bm, "GS-ID:Median(Proposed)", show_closerange=True, xlabel='')
        #rti.addVelPlot(db.data_dict, bm, "Velocity", vel_max=200, vel_step=50, xlabel='Time UT')
        #rti.save("figs/%s.median.png"%kind)

        #rti = RangeTimePlot(110, np.unique(np.hstack(db.data_dict["time"])), "", num_subplots=6)
        #rti.addClusterPlot(db.data_dict, db.clust_flg, bm, kind.upper(), label_clusters=True, skill=None)
        #rti.addGSISPlot(db.data_dict, sd.run(kind=2, thresh=[0.1,0.9], case=0), bm, "GS-ID:Median(Sudden)", show_closerange=True, xlabel='')
        #rti.addGSISPlot(db.data_dict, sd.run(kind=2, thresh=[0.1,0.9], case=1), bm, "GS-ID:Median(Blanchard 2006)",
        #        show_closerange=True, xlabel='')
        #rti.addGSISPlot(db.data_dict, sd.run(kind=2, thresh=[0.1,0.9], case=2), bm, "GS-ID:Median(Blanchard 2009)", 
        #        show_closerange=True, xlabel='')
        #rti.addGSISPlot(db.data_dict, sd.run(kind=2, case=3), bm, "GS-ID:Median(Proposed)", show_closerange=True, xlabel='')
        #rti.addVelPlot(db.data_dict, bm, "Velocity", vel_max=200, vel_step=50, xlabel='Time UT')
        #rti.save("figs/%s.kde.png"%kind)

        rti = RangeTimePlot(110, np.unique(np.hstack(db.data_dict["time"])), "", num_subplots=6)
        rti.addClusterPlot(db.data_dict, db.clust_flg, bm, kind.upper(), label_clusters=True, skill=None)
        rti.addGSISPlot(db.data_dict, sd.run(kind=0, case=0), bm, "GS-ID:Sudden", show_closerange=True, xlabel='')
        rti.addGSISPlot(db.data_dict, sd.run(kind=0, case=1), bm, "GS-ID:Blanchard 2006", show_closerange=True, xlabel='')
        rti.addGSISPlot(db.data_dict, sd.run(kind=0, case=2), bm, "GS-ID:Blanchard 2009", show_closerange=True, xlabel='')
        rti.addGSISPlot(db.data_dict, sd.run(kind=0, case=3), bm, "GS-ID:Proposed", show_closerange=True, xlabel='')
        rti.addVelPlot(db.data_dict, bm, "Velocity", vel_max=200, vel_step=50, xlabel='Time UT')
        rti.save("figs/%s.indp.png"%kind)
if case == 4:
    pass

In [3]:
#from sma import MiddleLatFilter
#start_time = datetime.datetime(2015, 3, 17)
#end_time = datetime.datetime(2015, 3, 17, 10)
#rad, bm = "bks",7

#start_time = datetime.datetime(2017, 4, 4)
#end_time = datetime.datetime(2017, 4, 5)
#rad, bm = "cvw",7

#fdata = FetchData( rad, [start_time, end_time] )
#_, scans = fdata.fetch_data(by="scan", scan_prop={"dur": 2, "stype": "themis"})
#print(" Total numbe of scans: ", len(scans))
#import pickle
#data_dict = pickle.load(open("../data/bks_2015-03-17_scans.pickle", 'rb'))
#data_dict = _filter_by_time(start_time, end_time, data_dict)

#import os
#os.system("rm figs/bks*")
#df = sma_bbox(scans, sdur=15, idx=None)
#from sma import ScatterTypeDetection
#bm=7
#sd = ScatterTypeDetection(df)
#rti = RangeTimePlot(110, np.unique(np.hstack(data_dict["time"])), "", num_subplots=3)
#rti.addParamPlot(df, bm, "Velocity", vel_max=100, vel_step=20, xlabel="")
#rti.addCluster(df, bm, "SMA", xlabel="")
#rti.addGSIS(sd.run(kind=0, case=0), bm, r"GsI:[Sudden]")
#rti.addGSIS(sd.run(kind=0, case=1), bm, r"GsI:[Blanchard 2006]")
#rti.addGSIS(sd.run(kind=0, case=2), bm, r"GsI:[Blanchard 2009]", xlabel='Time, UT')
#rti.addGSIS(sd.run(kind=0, case=3), bm, r"GsI:[X]", xlabel='Time, UT')
#rti.save("figs/bks_sma_01.png")
#rti.close()

In [4]:
run = False
if run:
    from sma import MiddleLatFilter
    start_time = datetime.datetime(2017, 4, 4)
    end_time = datetime.datetime(2017, 4, 5)
    rad, bm = "cvw",7

    fdata = FetchData( rad, [start_time, end_time] )
    _, scans = fdata.fetch_data(by="scan", scan_prop={"dur": 2, "stype": "themis"})
    print(" Total numbe of scans: ", len(scans))
    import pickle
    data_dict = pickle.load(open("../data/cvw_2017-04-04_scans.pickle", 'rb'))
    data_dict = _filter_by_time(start_time, end_time, data_dict)

    import os
    os.system("rm figs/cvw*")
    df = sma_bbox(scans, sdur=30, idx=None, dbeam=None, window=5)
    #db = DBSCAN_GMM(start_time, end_time, rad, BoxCox=True, load_model=False, save_model=False, run_gmm=True)
    #df = todf(db.data_dict)
    from sma import ScatterTypeDetection
    rti = RangeTimePlot(110, np.unique(np.hstack(data_dict["time"])), "", num_subplots=4)
    rti.addParamPlot(df, bm, "Velocity", p_max=100, p_min=-100, p_step=25, xlabel="", zparam="v", label='Velocity [m/s]')
    rti.addParamPlot(df, bm, "Power", p_max=30, p_min=3, p_step=3, xlabel="", zparam="p_l", label='Power [dB]')
    rti.addParamPlot(df, bm, "Spec. Width", p_max=100, p_min=0, p_step=10, xlabel="", zparam="w_l", label='Spec. Width [m/s]')
    rti.addCluster(lower_range(df, -1), bm, "BCA", label_clusters=True, skill=estimate_df_skills(df, df.labels), xlabel='Time, UT')
    rti.save("figs/cvw_07_sma.png")
    rti.close()
    sd = ScatterTypeDetection(df)
    rti = RangeTimePlot(110, np.unique(np.hstack(data_dict["time"])), "", num_subplots=5)
    rti.addCluster(lower_range(df, -1), bm, "BSC", label_clusters=True, skill=estimate_df_skills(df, df.labels))
    rti.addGSIS(sd.run(kind=1, case=0), bm, r"GsI:[Sudden]")
    rti.addGSIS(sd.run(kind=1, case=1), bm, r"GsI:[Blanchard 2006]")
    rti.addGSIS(sd.run(kind=1, case=2), bm, r"GsI:[Blanchard 2009]")
    sd = ScatterTypeDetection(df)
    rti.addGSIS(sd.run(kind=1, case=3, mod=True), bm, r"GsI:[Chakraborty]", xlabel='Time, UT')
    utility.get_kappa(sd.run(kind=1, case=0)["gflg"], sd.run(kind=1, case=1)["gflg"], "Sudden", "Blanchard 2006")
    utility.get_kappa(sd.run(kind=1, case=1)["gflg"], sd.run(kind=1, case=2)["gflg"], "Blanchard 2006", "Blanchard 2009")
    utility.get_kappa(sd.run(kind=1, case=2)["gflg"], sd.run(kind=1, case=3, mod=True)["gflg"], "Blanchard 2009", "Ch")
    utility.get_kappa(sd.run(kind=1, case=3, mod=True)["gflg"], sd.run(kind=1, case=0)["gflg"], "Ch", "Sudden")
    rti.save("figs/cvw_07_sma_is.png")
    rti.close()

In [5]:
run = False
if run:
    from sma import MiddleLatFilter
    start_time = datetime.datetime(2015, 3, 17)
    end_time = datetime.datetime(2015, 3, 17, 12)
    rad, bm = "bks",13

    fdata = FetchData( rad, [start_time, end_time] )
    _, scans = fdata.fetch_data(by="scan", scan_prop={"dur": 2, "stype": "themis"})
    print(" Total numbe of scans: ", len(scans))
    import pickle
    data_dict = pickle.load(open("../data/bks_2015-03-17_scans.pickle", 'rb'))
    data_dict = _filter_by_time(start_time, end_time, data_dict)

    import os
    os.system("rm figs/bks*")
    df = sma_bbox(scans, sdur=30, idx=None, dbeam=15, window=5)
    #db = DBSCAN_GMM(start_time, end_time, rad, BoxCox=True, load_model=False, save_model=True, run_gmm=True)
    #df = todf(db.data_dict)
    from sma import ScatterTypeDetection
    rti = RangeTimePlot(110, np.unique(np.hstack(data_dict["time"])), "", num_subplots=4)
    rti.addParamPlot(df, bm, "Velocity", p_max=100, p_min=-100, p_step=25, xlabel="", zparam="v", label='Velocity [m/s]')
    rti.addParamPlot(df, bm, "Power", p_max=30, p_min=3, p_step=3, xlabel="", zparam="p_l", label='Power [dB]')
    rti.addParamPlot(df, bm, "Spec. Width", p_max=100, p_min=0, p_step=10, xlabel="", zparam="w_l", label='Spec. Width [m/s]')
    rti.addCluster(lower_range(df, -1), bm, "BSC", label_clusters=True, skill=estimate_df_skills(df, df.labels), xlabel='Time, UT')
    rti.save("figs/bks_07_sma.png")
    rti.close()
    sd = ScatterTypeDetection(df)
    rti = RangeTimePlot(110, np.unique(np.hstack(data_dict["time"])), "", num_subplots=5)
    rti.addCluster(lower_range(df, -1), bm, "BCA", label_clusters=True, skill=estimate_df_skills(df, df.labels))
    rti.addGSIS(sd.run(kind=1, case=0), bm, r"GsI:[Sudden]")
    rti.addGSIS(sd.run(kind=1, case=1), bm, r"GsI:[Blanchard 2006]")
    rti.addGSIS(sd.run(kind=1, case=2), bm, r"GsI:[Blanchard 2009]")
    sd = ScatterTypeDetection(df)
    rti.addGSIS(sd.run(kind=1, case=3, mod=False), bm, r"GsI:[Chakraborty]", xlabel='Time, UT')
    utility.get_kappa(sd.run(kind=1, case=0)["gflg"], sd.run(kind=1, case=1)["gflg"], "Sudden", "Blanchard 2006")
    utility.get_kappa(sd.run(kind=1, case=1)["gflg"], sd.run(kind=1, case=2)["gflg"], "Blanchard 2006", "Blanchard 2009")
    utility.get_kappa(sd.run(kind=1, case=2)["gflg"], sd.run(kind=1, case=3, mod=True)["gflg"], "Blanchard 2009", "Ch")
    utility.get_kappa(sd.run(kind=1, case=3, mod=True)["gflg"], sd.run(kind=1, case=0)["gflg"], "Ch", "Sudden")
    rti.save("figs/bks_07_sma_is.png")
    rti.close()

In [6]:
from sklearn.cluster import DBSCAN
class TimeFilter(object):
    """ Class to time filter middle latitude radars """
    
    def __init__(self, df, beam=7, tw=15, eps=2, min_samples=10):
        self.df = df[df.bmnum==beam]
        self.tw = tw
        self.eps = eps
        self.min_samples = min_samples
        self.boundaries = {}
        return
    
    def run_codes(self):
        start = self.df.time.tolist()[0]
        end = start + dt.timedelta(minutes=self.tw)
        k, j = 0, 0
        labels = []
        time_index = []
        while start <= self.df.time.tolist()[-1]:
            u = self.df[(self.df.time>=start) & (self.df.time<=end)]
            ds = DBSCAN(eps=self.eps, min_samples=self.min_samples).fit(u[["slist"]].values)
            labs = ds.labels_
            labs[labs>=0] = labs[labs>=0] + k
            labels.extend(labs.tolist())
            start = end
            end = start + dt.timedelta(minutes=self.tw)
            k += len(set(labs[labs>=0]))
            time_index.extend([j]*len(labs))
            j += 1
        self.df["gate_labels"] = labels
        self.df["labels"] = labels#[-1]*len(self.df)
        self.df["time_index"] = time_index
        
        K = len(self.df)
        for ti in np.unique(time_index):
            u = self.df[self.df.time_index==ti]
            self.boundaries[ti] = []
            for ix in np.unique(u.gate_labels):
                du = u[u.gate_labels==ix]
                if ix >= 0 and len(du)/len(u) > 0.5:
                    self.boundaries[ti].append({"peak": du.slist.mean(), "ub": du.slist.max(), "lb": du.slist.min(),
                                                "value":len(du)/len(u), "time_index": ti, "gc": ix})
            print(ti, np.unique(u.gate_labels), len(self.boundaries[ti]))
        
        self.ltime, self.utime = np.min(time_index), np.max(time_index)
        self.gc = []
        for ti in np.unique(time_index):
            clust = self.boundaries[ti]
            for cl in clust:
                self.gc.append(cl)
        print(len(self.gc))
        #for cx in np.unique(self.df["gate_labels"]):
        #    if cx >= 0:
        #        u = self.df[(self.df.gate_labels==cx)]
        #        self.gc.append({"peak": u.slist.mean(), "ub": u.slist.max(), "lb": u.slist.min(), "value":len(u)/K,
        #                       "time_index": u.time_index.tolist()[0]})
        self.sma_bgspace()
        return
    
    def sma_bgspace(self):
        """
        Simple minded algorithm in B.G space
        """
        def range_comp(x, y, pcnt=0.7):
            _cx = False
            insc = set(x).intersection(y)
            if len(x) < len(y) and len(insc) >= len(x)*pcnt: _cx = True
            if len(x) > len(y) and len(insc) >= len(y)*pcnt: _cx = True
            return _cx
        def find_adjucent(lst, mxx):
            mxl = []
            for l in lst:
                if l["peak"] >= mxx["lb"] and l["peak"] <= mxx["ub"]: mxl.append(l)
                elif mxx["peak"] >= l["lb"] and mxx["peak"] <= l["ub"]: mxl.append(l)
                elif range_comp(range(l["lb"], l["ub"]+1), range(mxx["lb"], mxx["ub"]+1)): mxl.append(l)
            return mxl
        def nested_cluster_find(ti, mx, j, case=-1):
            if ti < self.ltime and ti > self.utime: return
            else:
                if (case == -1 and ti >= self.ltime) or (case == 1 and ti <= self.utime):
                    mxl = find_adjucent(self.boundaries[ti], mx)
                    for m in mxl:
                        if m in self.gc:
                            del self.gc[self.gc.index(m)]
                            self.clusters[j].append(m)
                            nested_cluster_find(m["time_index"] + case, m, j, case)
                            nested_cluster_find(m["time_index"] + (-1*case), m, j, (-1*case))
                return
        self.clusters = {}
        j = 0
        while len(self.gc) > 0:
            self.clusters[j] = []
            mx = max(self.gc, key=lambda x:x["value"])
            self.clusters[j].append(mx)
            if mx in self.gc: del self.gc[self.gc.index(mx)]
            nested_cluster_find(mx["time_index"] - 1, mx, j, case=-1)
            nested_cluster_find(mx["time_index"] + 1, mx, j, case=1)
            j += 1
        
        for c in self.clusters.keys():
            clust = self.clusters[c]
            for cl in clust:
                print(c, cl)
                self.df["labels"] = np.where((self.df.slist<=cl["ub"]) & (self.df.slist>=cl["lb"]) & 
                                             (self.df.time_index==cl["time_index"]) & (self.df.gate_labels==cl["gc"])
                                             , c, self.df["labels"])
        print(set(self.df["labels"]), self.clusters.keys())
        return

In [7]:
run = True
if run:
    start_time = datetime.datetime(2017, 4, 4)
    end_time = datetime.datetime(2017, 4, 5)
    rad, bm = "cvw",7

    fdata = FetchData( rad, [start_time, end_time] )
    beams, _ = fdata.fetch_data(by="beam")
    dfx = fdata.convert_to_pandas(beams)
    dfx["sec"] = [x.to_pydatetime().second + x.to_pydatetime().hour*3600 + x.to_pydatetime().minute*60 for x in dfx.time]

    import pickle
    data_dict = pickle.load(open("../data/cvw_2017-04-04_scans.pickle", 'rb'))
    data_dict = _filter_by_time(start_time, end_time, data_dict)
    
    tf = TimeFilter(dfx, beam=bm, tw=30, eps=4, min_samples=20)
    tf.run_codes()
    skill = estimate_df_skills(tf.df, tf.df.labels)
    skill.hscore, skill.bhscore = -0.8, 16e6
    rti = RangeTimePlot(110, np.unique(np.hstack(data_dict["time"])), "", num_subplots=5)
    rti.addParamPlot(dfx, bm, "Velocity", p_max=100, p_min=-100, p_step=25, xlabel="", zparam="v", label='Velocity [m/s]')
    rti.addParamPlot(dfx, bm, "Power", p_max=30, p_min=3, p_step=3, xlabel="", zparam="p_l", label='Power [dB]')
    rti.addParamPlot(dfx, bm, "Spec. Width", p_max=100, p_min=0, p_step=10, xlabel="", zparam="w_l", label='Spec. Width [m/s]')
    rti.addClusterBase(tf.df, bm, "BCA", label_clusters=True, skill=skill)
    rti.save("figs/time_cvw_07.png")
    rti.close()

Read file -  /sd-data/2017/fitacf/cvw/20170403.2201.00.cvw.fitacf.bz2
Read file -  /sd-data/2017/fitacf/cvw/20170404.0001.00.cvw.fitacf.bz2
Read file -  /sd-data/2017/fitacf/cvw/20170404.0201.00.cvw.fitacf.bz2
Read file -  /sd-data/2017/fitacf/cvw/20170404.0401.00.cvw.fitacf.bz2
Read file -  /sd-data/2017/fitacf/cvw/20170404.0601.00.cvw.fitacf.bz2
Read file -  /sd-data/2017/fitacf/cvw/20170404.0801.00.cvw.fitacf.bz2
Read file -  /sd-data/2017/fitacf/cvw/20170404.1001.00.cvw.fitacf.bz2
Read file -  /sd-data/2017/fitacf/cvw/20170404.1201.00.cvw.fitacf.bz2
Read file -  /sd-data/2017/fitacf/cvw/20170404.1401.00.cvw.fitacf.bz2
Read file -  /sd-data/2017/fitacf/cvw/20170404.1601.00.cvw.fitacf.bz2
Read file -  /sd-data/2017/fitacf/cvw/20170404.1801.00.cvw.fitacf.bz2
Read file -  /sd-data/2017/fitacf/cvw/20170404.2001.00.cvw.fitacf.bz2
Read file -  /sd-data/2017/fitacf/cvw/20170404.2201.00.cvw.fitacf.bz2

 Started converting to beam data.

 Converted to beam data.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


0 [-1  0] 1
1 [-1  1] 1
2 [-1  2  3] 1
3 [-1  4  5] 1
4 [-1  6] 1
5 [-1  7] 1
6 [-1  8  9] 1
7 [-1 10 11] 1
8 [-1 12 13] 1
9 [-1 14 15] 1
10 [-1 16 17] 0
11 [-1 18 19 20] 1
12 [-1 21 22] 1
13 [-1 23 24] 1
14 [-1 25] 1
15 [-1 26] 1
16 [27] 1
17 [28] 1
18 [29] 1
19 [-1 30] 1
20 [31] 1
21 [32] 1
22 [33] 1
23 [34] 1
24 [35] 1
25 [36] 1
26 [37] 1
27 [38] 1
28 [39] 1
29 [40] 1
30 [41] 1
31 [42] 1
32 [43] 1
33 [44] 1
34 [-1 45 46] 1
35 [-1 47 48 49] 0
36 [50 51 52] 0
37 [53 54] 1
38 [55 56] 1
39 [-1 57 58] 1
40 [-1 59 60 61] 1
41 [62 63 64] 1
42 [65 66 67] 1
43 [-1 68 69 70] 1
44 [71 72 73] 1
45 [74 75 76] 1
46 [-1 77 78] 1
47 [-1 79] 0
44
0 {'peak': 1.425531914893617, 'ub': 6, 'lb': 0, 'value': 1.0, 'time_index': 16, 'gc': 27}
0 {'peak': 1.6, 'ub': 7, 'lb': 0, 'value': 0.967741935483871, 'time_index': 15, 'gc': 26}
0 {'peak': 1.2, 'ub': 5, 'lb': 0, 'value': 0.9722222222222222, 'time_index': 14, 'gc': 25}
0 {'peak': 1.2461538461538462, 'ub': 4, 'lb': 0, 'value': 0.5078125, 'time_index': 13, '

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  plotParamDF[xparam] = plotParamDF[xparam].tolist()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  plotParamDF[yparam] = plotParamDF[yparam].tolist()
  ax.pcolormesh(X, Y, Z.T, lw=0.01, edgecolors='None', cmap=cmap, norm=norm)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  plotParamDF[xparam] = plot

41


In [8]:
run = True
if run:
    start_time = datetime.datetime(2015, 3, 17)
    end_time = datetime.datetime(2015, 3, 17, 12)
    rad, bm = "bks",13

    fdata = FetchData( rad, [start_time, end_time] )
    beams, _ = fdata.fetch_data(by="beam")
    dfx = fdata.convert_to_pandas(beams)
    dfx["sec"] = [x.to_pydatetime().second + x.to_pydatetime().hour*3600 + x.to_pydatetime().minute*60 for x in dfx.time]
    import pickle
    data_dict = pickle.load(open("../data/bks_2015-03-17_scans.pickle", 'rb'))
    data_dict = _filter_by_time(start_time, end_time, data_dict)
    
    tf = TimeFilter(dfx, beam=bm, tw=30, eps=4, min_samples=20)
    tf.run_codes()
    skill = estimate_df_skills(tf.df, tf.df.labels)
    skill.hscore, skill.bhscore = -1.4, 189.5e6
    rti = RangeTimePlot(110, np.unique(np.hstack(data_dict["time"])), "", num_subplots=5)
    rti.addParamPlot(dfx, bm, "Velocity", p_max=100, p_min=-100, p_step=25, xlabel="", zparam="v", label='Velocity [m/s]')
    rti.addParamPlot(dfx, bm, "Power", p_max=30, p_min=3, p_step=3, xlabel="", zparam="p_l", label='Power [dB]')
    rti.addParamPlot(dfx, bm, "Spec. Width", p_max=100, p_min=0, p_step=10, xlabel="", zparam="w_l", label='Spec. Width [m/s]')
    rti.addClusterBase(tf.df, bm, "BCA", label_clusters=True, skill=skill)
    rti.save("figs/time_bks_07.png")
    rti.close()

Read file -  /sd-data/2015/fitacf/bks/20150316.2202.00.bks.fitacf.bz2
Read file -  /sd-data/2015/fitacf/bks/20150317.0000.03.bks.fitacf.bz2
Read file -  /sd-data/2015/fitacf/bks/20150317.0202.00.bks.fitacf.bz2
Read file -  /sd-data/2015/fitacf/bks/20150317.0402.00.bks.fitacf.bz2
Read file -  /sd-data/2015/fitacf/bks/20150317.0602.00.bks.fitacf.bz2
Read file -  /sd-data/2015/fitacf/bks/20150317.0802.00.bks.fitacf.bz2
Read file -  /sd-data/2015/fitacf/bks/20150317.1000.02.bks.fitacf.bz2

 Started converting to beam data.

 Converted to beam data.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

0 [-1  0] 1
1 [-1  1] 1
2 [-1  2  3] 1
3 [-1  4  5] 1
4 [6 7] 1
5 [-1  8  9 10] 1
6 [-1 11 12 13] 1
7 [-1 14 15 16] 1
8 [-1 17 18 19 20] 0
9 [-1 21 22 23] 1
10 [-1 24 25 26] 1
11 [-1 27 28] 1
12 [-1 29 30] 1
13 [-1 31 32] 1
14 [33 34] 1
15 [-1 35 36] 1
16 [37 38] 1
17 [-1 39 40] 1
18 [-1 41 42] 1
19 [-1 43 44] 1
20 [-1 45 46] 1
21 [-1 47 48] 1
22 [-1 49 50] 1
23 [-1 51 52 53] 1
23
0 {'peak': 27.77777777777778, 'ub': 47, 'lb': 13, 'value': 0.9761904761904762, 'time_index': 1, 'gc': 1}
0 {'peak': 28.028350515463917, 'ub': 47, 'lb': 13, 'value': 0.9724310776942355, 'time_index': 0, 'gc': 0}
0 {'peak': 17.869230769230768, 'ub': 33, 'lb': 1, 'value': 0.8125, 'time_index': 2, 'gc': 2}
0 {'peak': 18.162666666666667, 'ub': 41, 'lb': 0, 'value': 0.8680555555555556, 'time_index': 3, 'gc': 4}
0 {'peak': 21.376068376068375, 'ub': 47, 'lb': 0, 'value': 0.8863636363636364, 'time_index': 4, 'gc': 6}
0 {'peak': 35.44392523364486, 'ub': 52, 'lb': 26, 'value': 0.5944444444444444, 'time_index': 5, 'gc': 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  plotParamDF[xparam] = plotParamDF[xparam].tolist()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  plotParamDF[yparam] = plotParamDF[yparam].tolist()
  ax.pcolormesh(X, Y, Z.T, lw=0.01, edgecolors='None', cmap=cmap, norm=norm)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  plotParamDF[xparam] = plot

34
