In [1]:
import os
import re
import datetime as dt
import json
from collections import namedtuple
from collections import Counter
from pprint import pprint
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

%matplotlib inline

Define some helper methods and data structures

In [2]:
GeoExtent = namedtuple('GeoExtent', ['x_min', 'y_max', 'x_max', 'y_min'])
GeoAffine = namedtuple('GeoAffine', ['ul_x', 'x_res', 'rot_1', 'ul_y', 'rot_2', 'y_res'])
GeoCoordinate = namedtuple('GeoCoordinate', ['x', 'y'])
RowColumn = namedtuple('RowColumn', ['row', 'column'])
RowColumnExtent = namedtuple('RowColumnExtent', ['start_row', 'start_col', 'end_row', 'end_col'])

In [3]:
def geospatial_hv(h, v, loc):
    """
    Geospatial extent and 30m affine for a given ARD grid location.
    """
    xmin = loc.x_min + h * 5000 * 30
    xmax = loc.x_min + h * 5000 * 30 + 5000 * 30
    ymax = loc.y_max - v * 5000 * 30
    ymin = loc.y_max - v * 5000 * 30 - 5000 * 30

    return (GeoExtent(x_min=xmin, x_max=xmax, y_max=ymax, y_min=ymin),
            GeoAffine(ul_x=xmin, x_res=30, rot_1=0, ul_y=ymax, rot_2=0, y_res=-30))

In [4]:
def geo_to_rowcol(affine, coord):
    """
    Transform geo-coordinate to row/col given a reference affine.
    
    Yline = (Ygeo - GT(3) - Xpixel*GT(4)) / GT(5)
    Xpixel = (Xgeo - GT(0) - Yline*GT(2)) / GT(1)
    """
    row = (coord.y - affine.ul_y - affine.ul_x * affine.rot_2) / affine.y_res
    col = (coord.x - affine.ul_x - affine.ul_y * affine.rot_1) / affine.x_res

    return RowColumn(row=int(row),
                     column=int(col))

In [5]:
def rowcol_to_geo(affine, rowcol):
    """
    Transform a row/col into a geospatial coordinate given reference affine.
    
    Xgeo = GT(0) + Xpixel*GT(1) + Yline*GT(2)
    Ygeo = GT(3) + Xpixel*GT(4) + Yline*GT(5)
    """
    x = affine.ul_x + rowcol.column * affine.x_res + rowcol.row * affine.rot_1
    y = affine.ul_y + rowcol.column * affine.rot_2 + rowcol.row * affine.y_res

    return GeoCoordinate(x=x, y=y)

In [6]:
def load_cache(file):
    """
    Load the cache file and split the data into the image IDs and values
    """
    data = np.load(file)
    return data['Y'], data['image_IDs']

In [7]:
def find_file(file_ls, string):
    """
    Return the first str in a list of strings that contains string.
    """
    gen = filter(lambda x: string in x, file_ls)
    return next(gen, None)

In [8]:
def imageid_date(image_ids):
    """
    Extract the ordinal day from the ARD image name.
    """
    return np.array([dt.datetime.strptime(d[15:23], '%Y%m%d').toordinal()
                     for d in image_ids])

In [9]:
def mask_daterange(dates):
    """
    Create a mask for values outside of the global BEGIN_DATE and END_DATE.
    """

    return np.logical_and(dates >= BEGIN_DATE.toordinal(), dates <= END_DATE.toordinal())

In [10]:
def find_chipcurve(results_chip, coord):
    """
    Find the results for the specified coordinate.
    """
    with open(results_chip, 'r') as f:
        results = json.load(f)
    
    gen = filter(lambda x: coord.x == x['x'] and coord.y == x['y'], results)
    
    return next(gen, None)

In [11]:
def extract_cachepoint(coord):
    """
    Extract the spectral values from the cache file.
    """

    rowcol = geo_to_rowcol(PIXEL_AFFINE, coord)
    
    data, image_ids = load_cache(find_file(CACHE_INV, 'r{}'.format(rowcol.row)))
    
    dates = imageid_date(image_ids)
           
    return image_ids, data[:, :, rowcol.column], dates
   

In [12]:
def extract_jsoncurve(coord):
    """
    Extract the pyccd information from the json file representing a chip of results.
    """
    pixel_rowcol = geo_to_rowcol(PIXEL_AFFINE, coord)
    pixel_coord = rowcol_to_geo(PIXEL_AFFINE, pixel_rowcol)
    
    chip_rowcol = geo_to_rowcol(CHIP_AFFINE, coord)
    chip_coord = rowcol_to_geo(CHIP_AFFINE, chip_rowcol)
    
    file = find_file(JSON_INV, 'H{:02d}V{:02d}_{}_{}.json'.format(H, V, chip_coord.x, chip_coord.y))
    result = find_chipcurve(file, pixel_coord)
    
    if result.get('result_ok') is True:
        return json.loads(result['result'])

In [13]:
def predicts(days, coef, intercept):
    return (intercept + coef[0] * days +
            coef[1]*np.cos(days*1*2*np.pi/365.25) + coef[2]*np.sin(days*1*2*np.pi/365.25) +
            coef[3]*np.cos(days*2*2*np.pi/365.25) + coef[4]*np.sin(days*2*2*np.pi/365.25) +
            coef[5]*np.cos(days*3*2*np.pi/365.25) + coef[6]*np.sin(days*3*2*np.pi/365.25))

In [14]:
def arcpaste_to_coord(string):
    pieces = string.split()
    
    return GeoCoordinate(x=float(re.sub(',', '', pieces[0])),
                         y=float(re.sub(',', '', pieces[1])))

Setup file locations

In [15]:
JSON_DIR = r'Z:\sites\ms\pyccd-results\H20V14\2017.08.18\json'
JSON_INV = [os.path.join(JSON_DIR, f) for f in os.listdir(JSON_DIR)]
CACHE_DIR = r'Z:\sites\ms\ARD\h20v14\cache'
CACHE_INV = [os.path.join(CACHE_DIR, f) for f in os.listdir(CACHE_DIR)]

In [16]:
arc_paste = '504347 1140109'
coord = arcpaste_to_coord(arc_paste)

CONUS_EXTENT = GeoExtent(x_min=-2565585,
                         y_min=14805,
                         x_max=2384415,
                         y_max=3314805)

H = 20
V = 14
EXTENT, PIXEL_AFFINE = geospatial_hv(H, V, CONUS_EXTENT)
CHIP_AFFINE = GeoAffine(ul_x=PIXEL_AFFINE.ul_x, x_res=3000, rot_1=0, ul_y=PIXEL_AFFINE.ul_y, rot_2=0, y_res=-3000)



In [17]:
print("entered coord= ", coord)
pixel_rowcol = geo_to_rowcol(PIXEL_AFFINE, coord)
pixel_coord = rowcol_to_geo(PIXEL_AFFINE, pixel_rowcol)

chip_rowcol = geo_to_rowcol(CHIP_AFFINE, coord)
chip_coord = rowcol_to_geo(CHIP_AFFINE, chip_rowcol)

# pixel_rowcol is used to find the cache file.  There is one cache file per row of the tile.
print("pixel_rowcol= ", pixel_rowcol)

print("pixel_coord= ", pixel_coord)
print("chip_rowcol= ", chip_rowcol)

# chip_coord is used to find the json file.  There are 2500 chips and json files per tile.  
# The chip_coord gives the upper left coordinate which is used to identify the chip and corresponding json file.
print("chip_coord= ", chip_coord)
print("EXTENT= ", EXTENT)

entered coord=  GeoCoordinate(x=504347.0, y=1140109.0)
pixel_rowcol=  RowColumn(row=2489, column=2331)
pixel_coord=  GeoCoordinate(x=504345, y=1140135)
chip_rowcol=  RowColumn(row=24, column=23)
chip_coord=  GeoCoordinate(x=503415, y=1142805)
EXTENT=  GeoExtent(x_min=434415, y_max=1214805, x_max=584415, y_min=1064805)


In [18]:
# Extract the pyccd results from the appropriate json file
results = extract_jsoncurve(coord)


In [19]:
file = find_file(JSON_INV, 'H{:02d}V{:02d}_{}_{}.json'.format(H, V, chip_coord.x, chip_coord.y))
full_result = find_chipcurve(file, pixel_coord)

In [20]:
for key in full_result.keys():
    print(key)
for key in full_result.keys():
    print(full_result[key])

chip_x
chip_y
algorithm
x
y
chip_update_requested
inputs_md5
inputs_url
result
result_md5
result_ok
result_produced
503415
1142805
lcmap-pyccd:2017.08.18
504345
1140135
2017-10-29T03:49:24.433000
not implemented
http://lcmap-test.cr.usgs.gov/v1/landsat/chips?x=503415&y=1142805&acquired=1982-01-01/2015-12-31&ubid=LANDSAT_5/TM/SRB5&ubid=LANDSAT_8/OLI_TIRS/SRB6&ubid=LANDSAT_4/TM/SRB5&ubid=LANDSAT_7/ETM/SRB5&ubid=LANDSAT_8/OLI_TIRS/BTB10&ubid=LANDSAT_5/TM/BTB6&ubid=LANDSAT_7/ETM/BTB6&ubid=LANDSAT_4/TM/BTB6&ubid=LANDSAT_7/ETM/SRB7&ubid=LANDSAT_5/TM/SRB7&ubid=LANDSAT_4/TM/SRB7&ubid=LANDSAT_8/OLI_TIRS/SRB7&ubid=LANDSAT_4/TM/PIXELQA&ubid=LANDSAT_7/ETM/PIXELQA&ubid=LANDSAT_5/TM/PIXELQA&ubid=LANDSAT_8/OLI_TIRS/PIXELQA&ubid=LANDSAT_4/TM/SRB3&ubid=LANDSAT_5/TM/SRB3&ubid=LANDSAT_8/OLI_TIRS/SRB4&ubid=LANDSAT_7/ETM/SRB3&ubid=LANDSAT_5/TM/SRB1&ubid=LANDSAT_4/TM/SRB1&ubid=LANDSAT_7/ETM/SRB1&ubid=LANDSAT_8/OLI_TIRS/SRB2&ubid=LANDSAT_5/TM/SRB2&ubid=LANDSAT_7/ETM/SRB2&ubid=LANDSAT_8/OLI_TIRS/SRB3&ubid=LAN

In [21]:
# Extract the ARD observations from the appropriate cache file
imageIDs, data, dates = extract_cachepoint(coord)


In [22]:
# BEGIN_DATE = dt.datetime.fromordinal(results["change_models"][0]["start_day"])
# BEGIN_DATE = dt.datetime.fromordinal(dates[0])
# END_DATE = dt.datetime.fromordinal(results["change_models"][-1]["break_day"])
# END_DATE = dt.datetime.fromordinal(dates[len(results["processing_mask"])-1])

BEGIN_DATE = dt.datetime(1982, 1, 1, 0, 0)
END_DATE = dt.datetime(2015, 12, 31, 0, 0)

print(BEGIN_DATE, END_DATE)

1982-01-01 00:00:00 2015-12-31 00:00:00


In [23]:
date_mask = mask_daterange(dates=dates)


In [24]:
# Make a mask based on the ARD QA band to remove fill (value 1)
qa = data[-1]
qa_mask = np.ones_like(qa, dtype=np.bool)
qa_mask[qa == 1] = False
qa_in = qa_mask[date_mask]
qa_out = qa_mask[~date_mask]

print(len(qa_mask))
print(len(qa_in))
print(len(qa_out))

2377
2206
171


Setup geospatial and temporal information

In [25]:
dates_in = dates[date_mask]
dates_out = dates[~date_mask]


In [26]:
test_dates_length = (len(dates) == len(np.unique(dates)))
print("dates = unique dates: ", test_dates_length)
print("Length of processing mask: ", len(results["processing_mask"]))
print("length of dates_in: ", len(dates_in))
print("length of dates_out: ", len(dates_out))
no_fill_dates_in = dates_in[qa_in]
print("length of dates with fill removed: ", len(no_fill_dates_in))


dates = unique dates:  True
Length of processing mask:  2205
length of dates_in:  2206
length of dates_out:  171
length of dates with fill removed:  716


In [27]:
dates_unique = np.unique(dates_in)
print(len(dates_unique))
print(len(dates_in))
print(dt.datetime.fromordinal(dates_in[0]))
print(dt.datetime.fromordinal(dates_in[-1]))
print(len(dates))
print(len(np.unique(dates)))

2206
2206
1982-12-05 00:00:00
2015-12-31 00:00:00
2377
2377


In [41]:
print(dt.datetime.fromordinal(dates_in[-1]))

2015-12-31 00:00:00


In [42]:
print(dt.datetime.fromordinal(dates_in[-2]))

2015-12-24 00:00:00


In [46]:
for ind, d in enumerate(dates_in):
    print(ind, ":", d, " ", dt.datetime.fromordinal(d))

0 : 723884   1982-12-05 00:00:00
1 : 723898   1982-12-19 00:00:00
2 : 723914   1983-01-04 00:00:00
3 : 723916   1983-01-06 00:00:00
4 : 723923   1983-01-13 00:00:00
5 : 724372   1984-04-06 00:00:00
6 : 724388   1984-04-22 00:00:00
7 : 724420   1984-05-24 00:00:00
8 : 724443   1984-06-16 00:00:00
9 : 724450   1984-06-23 00:00:00
10 : 724466   1984-07-09 00:00:00
11 : 724468   1984-07-11 00:00:00
12 : 724516   1984-08-28 00:00:00
13 : 724523   1984-09-04 00:00:00
14 : 724532   1984-09-13 00:00:00
15 : 724548   1984-09-29 00:00:00
16 : 724578   1984-10-29 00:00:00
17 : 724580   1984-10-31 00:00:00
18 : 724587   1984-11-07 00:00:00
19 : 724603   1984-11-23 00:00:00
20 : 724610   1984-11-30 00:00:00
21 : 724619   1984-12-09 00:00:00
22 : 724628   1984-12-18 00:00:00
23 : 724658   1985-01-17 00:00:00
24 : 724660   1985-01-19 00:00:00
25 : 724667   1985-01-26 00:00:00
26 : 724706   1985-03-06 00:00:00
27 : 724724   1985-03-24 00:00:00
28 : 724740   1985-04-09 00:00:00
29 : 724747   1985-04-16

353 : 727970   1994-02-11 00:00:00
354 : 727986   1994-02-27 00:00:00
355 : 728004   1994-03-17 00:00:00
356 : 728020   1994-04-02 00:00:00
357 : 728027   1994-04-09 00:00:00
358 : 728036   1994-04-18 00:00:00
359 : 728059   1994-05-11 00:00:00
360 : 728068   1994-05-20 00:00:00
361 : 728075   1994-05-27 00:00:00
362 : 728084   1994-06-05 00:00:00
363 : 728091   1994-06-12 00:00:00
364 : 728100   1994-06-21 00:00:00
365 : 728116   1994-07-07 00:00:00
366 : 728123   1994-07-14 00:00:00
367 : 728132   1994-07-23 00:00:00
368 : 728139   1994-07-30 00:00:00
369 : 728148   1994-08-08 00:00:00
370 : 728155   1994-08-15 00:00:00
371 : 728164   1994-08-24 00:00:00
372 : 728171   1994-08-31 00:00:00
373 : 728180   1994-09-09 00:00:00
374 : 728196   1994-09-25 00:00:00
375 : 728228   1994-10-27 00:00:00
376 : 728235   1994-11-03 00:00:00
377 : 728251   1994-11-19 00:00:00
378 : 728315   1995-01-22 00:00:00
379 : 728324   1995-01-31 00:00:00
380 : 728347   1995-02-23 00:00:00
381 : 728363   1995-

881 : 731140   2002-10-17 00:00:00
882 : 731146   2002-10-23 00:00:00
883 : 731147   2002-10-24 00:00:00
884 : 731155   2002-11-01 00:00:00
885 : 731156   2002-11-02 00:00:00
886 : 731162   2002-11-08 00:00:00
887 : 731163   2002-11-09 00:00:00
888 : 731164   2002-11-10 00:00:00
889 : 731170   2002-11-16 00:00:00
890 : 731171   2002-11-17 00:00:00
891 : 731172   2002-11-18 00:00:00
892 : 731178   2002-11-24 00:00:00
893 : 731179   2002-11-25 00:00:00
894 : 731180   2002-11-26 00:00:00
895 : 731187   2002-12-03 00:00:00
896 : 731196   2002-12-12 00:00:00
897 : 731204   2002-12-20 00:00:00
898 : 731210   2002-12-26 00:00:00
899 : 731211   2002-12-27 00:00:00
900 : 731212   2002-12-28 00:00:00
901 : 731218   2003-01-03 00:00:00
902 : 731219   2003-01-04 00:00:00
903 : 731220   2003-01-05 00:00:00
904 : 731226   2003-01-11 00:00:00
905 : 731227   2003-01-12 00:00:00
906 : 731228   2003-01-13 00:00:00
907 : 731234   2003-01-19 00:00:00
908 : 731235   2003-01-20 00:00:00
909 : 731236   2003-

1380 : 732908   2007-08-20 00:00:00
1381 : 732914   2007-08-26 00:00:00
1382 : 732915   2007-08-27 00:00:00
1383 : 732916   2007-08-28 00:00:00
1384 : 732922   2007-09-03 00:00:00
1385 : 732924   2007-09-05 00:00:00
1386 : 732931   2007-09-12 00:00:00
1387 : 732938   2007-09-19 00:00:00
1388 : 732939   2007-09-20 00:00:00
1389 : 732940   2007-09-21 00:00:00
1390 : 732946   2007-09-27 00:00:00
1391 : 732947   2007-09-28 00:00:00
1392 : 732948   2007-09-29 00:00:00
1393 : 732954   2007-10-05 00:00:00
1394 : 732956   2007-10-07 00:00:00
1395 : 732963   2007-10-14 00:00:00
1396 : 732970   2007-10-21 00:00:00
1397 : 732979   2007-10-30 00:00:00
1398 : 732986   2007-11-06 00:00:00
1399 : 732988   2007-11-08 00:00:00
1400 : 732995   2007-11-15 00:00:00
1401 : 733002   2007-11-22 00:00:00
1402 : 733011   2007-12-01 00:00:00
1403 : 733027   2007-12-17 00:00:00
1404 : 733034   2007-12-24 00:00:00
1405 : 733036   2007-12-26 00:00:00
1406 : 733043   2008-01-02 00:00:00
1407 : 733050   2008-01-09 0

1912 : 735083   2013-08-03 00:00:00
1913 : 735084   2013-08-04 00:00:00
1914 : 735090   2013-08-10 00:00:00
1915 : 735091   2013-08-11 00:00:00
1916 : 735092   2013-08-12 00:00:00
1917 : 735098   2013-08-18 00:00:00
1918 : 735099   2013-08-19 00:00:00
1919 : 735100   2013-08-20 00:00:00
1920 : 735106   2013-08-26 00:00:00
1921 : 735107   2013-08-27 00:00:00
1922 : 735108   2013-08-28 00:00:00
1923 : 735114   2013-09-03 00:00:00
1924 : 735115   2013-09-04 00:00:00
1925 : 735116   2013-09-05 00:00:00
1926 : 735122   2013-09-11 00:00:00
1927 : 735123   2013-09-12 00:00:00
1928 : 735124   2013-09-13 00:00:00
1929 : 735130   2013-09-19 00:00:00
1930 : 735138   2013-09-27 00:00:00
1931 : 735139   2013-09-28 00:00:00
1932 : 735140   2013-09-29 00:00:00
1933 : 735147   2013-10-06 00:00:00
1934 : 735148   2013-10-07 00:00:00
1935 : 735154   2013-10-13 00:00:00
1936 : 735155   2013-10-14 00:00:00
1937 : 735156   2013-10-15 00:00:00
1938 : 735162   2013-10-21 00:00:00
1939 : 735163   2013-10-22 0

#### Test that the lengths of the processing mask and dates_in arrays are equal, take action if they aren't

In [28]:
if len(dates_in) == len(results["processing_mask"]):
    print("The number of observations is consistent with the length of the PyCCD internal processing mask.\n"
          "No changes to the input observations are necessary.")

if len(np.unique(dates_in)) == len(results["processing_mask"]):
    print("There is a duplicate date occurrence in observations.  Removing duplicate occurrences makes the "
          "number of observations consistent with the length of the PyCCD internal processing mask.")

    dupes = [item for item, count in Counter(dates).items() if count > 1]

    dates, ind, counts = np.unique(dates, return_index=True, return_counts=True)

    print("Duplicate dates: \n\t", dates[:, ind])

    data = data[:, ind]

    date_mask = mask_daterange(dates)

    dates_in = dates[date_mask]

    dates_out = dates[~date_mask]

if len(dates_in) != len(results["processing_mask"]):
    print("There is an inconsistency in the length of the processing mask array, PIXELQA will be used instead")
    
    # get_pqa_mask()

There is an inconsistency in the length of the processing mask array, PIXELQA will be used instead


In [29]:
# rescale the brightness temperature to match the predicted values
temp_thermal_data = np.copy(data[6])
# temp_thermal_data[ temp_thermal_data != -9999 ] = temp_thermal_data[ temp_thermal_data != -9999 ] * 10 - 27315
temp_thermal_data[qa_mask] = temp_thermal_data[qa_mask] * 10 - 27315
data[6] = np.copy(temp_thermal_data)

In [30]:
data_in = data[:, date_mask]
data_out = data[:, ~date_mask]

print(np.shape(data_in))
print(np.shape(data_out))


(8, 2206)
(8, 171)


In [31]:
# Check the processing mask values against PIXELQA assuming they both begin at the same start date
print("length of internal processing mask: ", len(results["processing_mask"]))
print("length of ARD observations: ", len(qa))
print("length of date-masked ARD obs.: ", len(qa_in))

# Undesirable values to have for your Pixel QA at a given observation'
values_to_look_for = [1, 72, 80, 112, 96, 136, 144, 160, 176, 224, 336,
                      386, 834, 898, 1346, 388, 836, 900, 1348,
                      328, 392, 840, 904, 1350, 368, 400, 432, 
                      848, 880, 912, 944, 1352, 352, 368, 416, 
                      432, 480, 864, 880, 928, 944, 992, 834, 
                      836, 840, 848, 864, 880, 898, 900, 904, 
                      912, 928, 944, 992, 1346, 1348, 1350, 1352]

clear_vals = [322, 324, 66, 68]

qa_check = qa[date_mask]

length of internal processing mask:  2205
length of ARD observations:  2377
length of date-masked ARD obs.:  2206


In [32]:
print(qa_check)
print(results["processing_mask"])

[  1   1   1 ...,   1   1 928]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,

In [47]:
counter = 0
for ind, m in enumerate(results["processing_mask"]):
    try:
        for v in values_to_look_for:
            if v == qa_check[ind] and m == 1:
                counter += 1
                print("qa =", v, ".... list index =", ind, "....", m, " .... date =", dt.datetime.fromordinal(dates_in[ind]))
                break
        for v in clear_vals:
            if v == qa_check[ind] and m == 0:
                counter += 1
                print("qa =", v, ".... list index =", ind, "....", m, " .... date =", dt.datetime.fromordinal(dates_in[ind]))
                break
    except IndexError:
        print(ind)
        continue
print(counter)

qa = 66 .... list index = 421 .... 0  .... date = 1996-03-13 00:00:00
qa = 66 .... list index = 735 .... 0  .... date = 2001-02-07 00:00:00
2


In [34]:
counter = 0
for ind, m in enumerate(results["processing_mask"]):
    try:
        if qa_check[ind] == 1 and m is True:
            counter += 1
            print("list index =", ind, "....", m, " .... date =", dt.datetime.fromordinal(dates_in[ind]))
            break
    except IndexError:
        # print(ind)
        continue
print(counter)

0


In [35]:
"""
A duplicate obs. was used at some point by pyccd before index 1414 (2013-04-04)
"""

'\nA duplicate obs. was used at some point by pyccd before index 1414 (2013-04-04)\n'

In [36]:
test_results = list(results["processing_mask"])

del test_results[1487]

len(test_results)

2204

In [37]:
counter = 0
for ind, m in enumerate(test_results):
    try:
        for v in values_to_look_for:
            if v == qa_check[ind] and m is True:
                counter += 1
                print("qa =", v, ".... list index =", ind, "....", m, " .... date =", dt.datetime.fromordinal(dates_in[ind]))
                break
        for v in clear_vals:
            if v == qa_check[ind] and m is False:
                counter += 1
                print("qa =", v, ".... list index =", ind, "....", m, " .... date =", dt.datetime.fromordinal(dates_in[ind]))
                break
    except IndexError:
        print(ind)
        continue
print(counter)

0


In [38]:
for ind, date in enumerate(dates_in):
    print(ind, "...", dt.datetime.fromordinal(date))

0 ... 1982-12-05 00:00:00
1 ... 1982-12-19 00:00:00
2 ... 1983-01-04 00:00:00
3 ... 1983-01-06 00:00:00
4 ... 1983-01-13 00:00:00
5 ... 1984-04-06 00:00:00
6 ... 1984-04-22 00:00:00
7 ... 1984-05-24 00:00:00
8 ... 1984-06-16 00:00:00
9 ... 1984-06-23 00:00:00
10 ... 1984-07-09 00:00:00
11 ... 1984-07-11 00:00:00
12 ... 1984-08-28 00:00:00
13 ... 1984-09-04 00:00:00
14 ... 1984-09-13 00:00:00
15 ... 1984-09-29 00:00:00
16 ... 1984-10-29 00:00:00
17 ... 1984-10-31 00:00:00
18 ... 1984-11-07 00:00:00
19 ... 1984-11-23 00:00:00
20 ... 1984-11-30 00:00:00
21 ... 1984-12-09 00:00:00
22 ... 1984-12-18 00:00:00
23 ... 1985-01-17 00:00:00
24 ... 1985-01-19 00:00:00
25 ... 1985-01-26 00:00:00
26 ... 1985-03-06 00:00:00
27 ... 1985-03-24 00:00:00
28 ... 1985-04-09 00:00:00
29 ... 1985-04-16 00:00:00
30 ... 1985-05-18 00:00:00
31 ... 1985-07-30 00:00:00
32 ... 1985-08-06 00:00:00
33 ... 1985-08-13 00:00:00
34 ... 1985-08-22 00:00:00
35 ... 1985-08-29 00:00:00
36 ... 1985-08-31 00:00:00
37 ... 1985

473 ... 1997-04-01 00:00:00
474 ... 1997-04-10 00:00:00
475 ... 1997-04-17 00:00:00
476 ... 1997-04-24 00:00:00
477 ... 1997-05-03 00:00:00
478 ... 1997-05-10 00:00:00
479 ... 1997-05-12 00:00:00
480 ... 1997-05-19 00:00:00
481 ... 1997-05-28 00:00:00
482 ... 1997-06-04 00:00:00
483 ... 1997-06-13 00:00:00
484 ... 1997-06-20 00:00:00
485 ... 1997-06-27 00:00:00
486 ... 1997-06-29 00:00:00
487 ... 1997-07-06 00:00:00
488 ... 1997-07-13 00:00:00
489 ... 1997-07-15 00:00:00
490 ... 1997-07-22 00:00:00
491 ... 1997-07-29 00:00:00
492 ... 1997-07-31 00:00:00
493 ... 1997-08-14 00:00:00
494 ... 1997-08-16 00:00:00
495 ... 1997-08-23 00:00:00
496 ... 1997-08-30 00:00:00
497 ... 1997-09-01 00:00:00
498 ... 1997-09-08 00:00:00
499 ... 1997-09-15 00:00:00
500 ... 1997-09-17 00:00:00
501 ... 1997-10-03 00:00:00
502 ... 1997-10-10 00:00:00
503 ... 1997-10-17 00:00:00
504 ... 1997-10-19 00:00:00
505 ... 1997-11-02 00:00:00
506 ... 1997-11-04 00:00:00
507 ... 1997-11-20 00:00:00
508 ... 1997-11-27 0

1332 ... 2007-03-04 00:00:00
1333 ... 2007-03-05 00:00:00
1334 ... 2007-03-11 00:00:00
1335 ... 2007-03-12 00:00:00
1336 ... 2007-03-13 00:00:00
1337 ... 2007-03-20 00:00:00
1338 ... 2007-03-21 00:00:00
1339 ... 2007-03-27 00:00:00
1340 ... 2007-03-29 00:00:00
1341 ... 2007-04-05 00:00:00
1342 ... 2007-04-06 00:00:00
1343 ... 2007-04-12 00:00:00
1344 ... 2007-04-21 00:00:00
1345 ... 2007-04-22 00:00:00
1346 ... 2007-04-28 00:00:00
1347 ... 2007-04-29 00:00:00
1348 ... 2007-04-30 00:00:00
1349 ... 2007-05-07 00:00:00
1350 ... 2007-05-08 00:00:00
1351 ... 2007-05-14 00:00:00
1352 ... 2007-05-15 00:00:00
1353 ... 2007-05-23 00:00:00
1354 ... 2007-05-24 00:00:00
1355 ... 2007-05-31 00:00:00
1356 ... 2007-06-01 00:00:00
1357 ... 2007-06-08 00:00:00
1358 ... 2007-06-09 00:00:00
1359 ... 2007-06-15 00:00:00
1360 ... 2007-06-16 00:00:00
1361 ... 2007-06-17 00:00:00
1362 ... 2007-06-23 00:00:00
1363 ... 2007-06-25 00:00:00
1364 ... 2007-07-01 00:00:00
1365 ... 2007-07-11 00:00:00
1366 ... 2007-

2191 ... 2015-11-14 00:00:00
2192 ... 2015-11-20 00:00:00
2193 ... 2015-11-21 00:00:00
2194 ... 2015-11-22 00:00:00
2195 ... 2015-11-29 00:00:00
2196 ... 2015-11-30 00:00:00
2197 ... 2015-12-06 00:00:00
2198 ... 2015-12-07 00:00:00
2199 ... 2015-12-08 00:00:00
2200 ... 2015-12-14 00:00:00
2201 ... 2015-12-15 00:00:00
2202 ... 2015-12-16 00:00:00
2203 ... 2015-12-22 00:00:00
2204 ... 2015-12-24 00:00:00
2205 ... 2015-12-31 00:00:00


In [39]:
pprint(qa)
print(len(qa))

array([1, 1, 1, ..., 1, 1, 1], dtype=int16)
2377


In [40]:
bands = ('blue', 'green', 'red', 'nir', 'swir1', 'swir2', 'thermal')
band_info = {b: {'coefs': [], 'inter': [], 'pred': []} for b in bands}

#mask = np.array(results['processing_mask'][:-1], dtype=bool)
mask = np.array(test_results, dtype=bool)


print('Start Date: {0}\nEnd Date: {1}\n'.format(dt.datetime.fromordinal(dates[0]),
                                                dt.datetime.fromordinal(dates[-1])))

predicted_values = []
prediction_dates = []
break_dates = []
start_dates = []
end_dates = []

# get year values for labeling plots
year1 = str(dt.datetime.fromordinal(dates[0]))[:4]
year2 = str(dt.datetime.fromordinal(dates[-1]))[:4]
years = np.arange(int(year1), int(year2), 2)

for num, result in enumerate(results['change_models']):
    print('Result: {}'.format(num))
    print('Start Date: {}'.format(dt.date.fromordinal(result['start_day'])))
    print('End Date: {}'.format(dt.date.fromordinal(result['end_day'])))
    print('Break Date: {}'.format(dt.date.fromordinal(result['break_day'])))
    print('QA: {}'.format(result['curve_qa']))
    print('Change prob: {}'.format(result['change_probability']))
    
    days = np.arange(result['start_day'], result['end_day'] + 1)
    
    break_dates.append(result['break_day'])
    start_dates.append(result['start_day'])
    end_dates.append(result['end_day'])
    
    for b in bands:
        band_info[b]['inter'] = result[b]['intercept']
        band_info[b]['coefs'] = result[b]['coefficients']
        band_info[b]['pred'] = predicts(days, result[b]['coefficients'], result[b]['intercept'])
    
        intercept = result[b]['intercept']
        coef = result[b]['coefficients']
        prediction_dates.append(days)
        predicted_values.append(predicts(days, coef, intercept))
    

plt.style.use('ggplot')


# dates_plt is the same as dates_masked
# dates_plt = dates[mask]

# ****X-Axis Ticks and Labels****
# list of years
y = [yi for yi in range(1981, 2018, 2)]

# list of datetime objects with YYYY-MM-dd pattern
t = [dt.datetime(yx, 7, 1) for yx in y]

# list of ordinal time objects
ord_time = [dt.datetime.toordinal(tx) for tx in t]

# list of datetime formatted strings
x_labels = [str(dt.datetime.fromordinal(int(L)))[:10] if L != "0.0" and L != "" else "0" for L in ord_time]

total_mask = np.logical_and(mask, qa_in)


for num, b in enumerate(bands):
    fg = plt.figure(figsize=(16,9), dpi=300)
    a1 = fg.add_subplot(2, 1, 1, xlim=(min(dates)-100, max(dates)+500), ylim=(min(data_in[num, total_mask]) - 500, 
                                                                                 max(data_in[num, total_mask]) + 500))
    
    
    # data_plt = data[num, mask]
    
    # Observed values in PyCCD time range
    a1.plot(dates_in[total_mask], data_in[num, total_mask], 'go', ms=7, mec='k', mew=0.5, label="Observations used by PyCCD")
    
    # Observed values outside PyCCD time range
    a1.plot(dates_out[qa_out], data_out[num][qa_out], 'ro', ms=5, mec='k', mew=0.5, label="Observations not used by PyCCD")
    
    # Observed values masked out
    a1.plot(dates_in[~total_mask], data_in[num, ~total_mask], color="0.65", marker="o", linewidth=0, ms=3, 
             label="Observations masked by PyCCD")
    
    a1.set_title(f'{b}')
    """
    # plot model break and start dates
    match_dates = [b for b in break_dates for s in start_dates if b==s]
    
    for ind, e in enumerate(end_dates): 
        if ind == 0:
            a1.axvline(e, color="black", label="End dates")
        
        else:
            a1.axvline(e, color="black")
            
    
    for ind, b in enumerate(break_dates): 
        if ind == 0:
            a1.axvline(b, color='r', label="Break dates")
        
        else:
            a1.axvline(b, color='r')
        
    for ind, s in enumerate(start_dates): 
        if ind ==0:
            a1.axvline(s, color='b', label="Start dates")
        
        else:
            a1.axvline(s, color='b')
            
    for ind, m in enumerate(match_dates):
        if ind == 0:
            a1.axvline(m, color="magenta", label="Break date = Start date")
        
        else:
            a1.axvline(m, color="magenta")

    # Predicted curves
    for c in range(0 , len(results["change_models"])):
        if c == 0:
            a1.plot(prediction_dates[c * len(bands) + num], predicted_values[c * len(bands) + num],
                   "orange", linewidth=2, label="PyCCD model fit")
        
        else:
            a1.plot(prediction_dates[c * len(bands) + num], predicted_values[c * len(bands) + num],
                   "orange", linewidth=2)
    
    """  
    # Add legend
    a1.legend(mode="expand", ncol=4, bbox_to_anchor=(0., -0.45, 1, 0.25), loc=8, borderaxespad=0.)
    
    # Add x-ticks and x-tick_labels 
    a1.set_xticks(ord_time)

    a1.set_xticklabels(x_labels, rotation=70, horizontalalignment="right")



Start Date: 1982-12-05 00:00:00
End Date: 2017-07-06 00:00:00

Result: 0
Start Date: 1983-01-13
End Date: 2015-09-26
Break Date: 2015-09-26
QA: 8
Change prob: 0


ValueError: operands could not be broadcast together with shapes (2204,) (2206,) 