# Note: updates have been made in the script being used for the Climate Contours website

In [1]:
import glob
from datetime import date
from datetime import datetime
import stat
from datetime import timedelta
import random
import sys
import os

In [2]:
fileNameDir = "/glade/scratch/tking/06"

In [19]:
# We want to make a correct index, and then back out the old date.
# The old filenames just get rewritten and aren't actually used for purposes other than creating different filenames; so,
# it doesn't really matter how they were originally created

old = ["2000.txt", "2001.txt", "2002.txt", "2003.txt", "2004.txt"]
new = ["2000new.txt", "2001new.txt", "2002new.txt", "2003new.txt", "2004new.txt"]
days = [i for i in range(1, 366)]  # This was 363 originally, but that would have just limited the number of days to choose from not actual time index calculation
start = date(1979, 1, 1)
# start_times = [7665, 8030, 8395, 8760, 9125] # 21, 22, 23... years after 1979 = 2000, 2001, 2002, etc
start_times = [7669, 8035, 8400, 8765, 9130] # updated for leap years & index was 1 off
# leap years: 1980, 1984, 1988, 1992, 1996, 2000 (each of these has 366 days)
s = dict()

# TODO: all variable files for the same year are *not* offset by the same amount of time?
# in the climate contours tool, the variables seemed to all line up...
# TODO: ncview shows different start times than the date expected by 'time' variable
offset = {'2000': 0,  # -4 days; ivt: dec 27th, 1999, 0h, should be jan 1, 2000, 0h
          # prw: dec 25, 2000, 3h --- eg: -5.875
          # in ncview, frame 1/2920 is 31-dec-1999 22:30
          '2001': 0,  # -5 days; ivt: dec 26th, 2000, 0h, should be jan 1, 2001, 0h
          # pr: dec 25, 2001, 1.5h (why 2001?) --- eg: -5.9375
          # psl: dec 25, 2001, 3h --- eg: -5.875
          # in ncview, frame 1/2920 is 31-dec-2000 22:30
          '2002': 0,  # -5 days; ivt: dec 26th, 2001, 0h, should be jan 1, 2002, 0h
          # pr: dec 25, 2002, 1.5h (why 2002?)
          # psl: dec 25, 2002, 3h
          # in ncview, frame 1/2920 is 31-dec-2001 22:30
          '2003': 0  # -5 days; ivt: dec 26th, 2002, 0h, should be jan 1, 2003, 0h
          # prw: dec 25, 2003, 3h
          # in ncview, frame 1/2920 is 31-dec-2002 22:30
         }

# 0.0625 days is 1.5 hours offset (although we want whole number indices)

# ncview need to specify leap years "-calendar noleap"
# if specify 'leap', get dec 26th; in ncdump see time:calendar = "noleap"
# ncview is giving correct days
# in our indexing of the files, we want to treat as NOT having leap years

In [20]:
# This should generate correctly aligned index/date pairs!

# loop through years 2000-2004
for i in range(4):
    # open and read file fi, write to fw
    fi = open(fileNameDir + "/" + old[i], "r")
    fw = open(fileNameDir + "/" + new[i], "w+")
    fi_lines = fi.readlines()
    s[i] = set([])
    # each line has an index and date, add index to set s[i] if it is not yet in set
    for x in fi_lines:
        idx, fn = x.split()  # fn is not used, just index to make sure new indices are used
        s[i].add(int(idx))
    fi.close()
    # take offset in days for particular year from dictionary and multiply by 8 in order to get 3hr chunk version
    offset_for_year = offset[old[i].split('.')[0]] * 8
    j = 0
    temp = []
    while j < 60: # choose random assortment of 60 files
        # while j < 365: # use to check all dates
        # day = days[j]
        # there are 60 total lines...
        day = random.sample(days,1)[0]
        hour = random.sample([1,2,3,4,5,6,7,8],1)[0]  # use the index corresponding to 3hourly files
        time = (day * 8) + hour  # time index is the number of days multiplied by 8 because it's 3hourly data, plus hour index (which also represents 3-hourly increments)
        if not (time in s[i]):  # check if time index has already been used in previous file
            temp.append((day, time, hour))
            j += 1
    temp.sort(key=lambda x: x[1])
    # this is the part that actually names the file based on the index...
    for d, t, h in temp:
        print(start_times[i]+d, d, h, t)
        fname = (start + timedelta(days=start_times[i]+d)).strftime("data-%Y-%m-%d-{}-0.jpg".format(h))
        # h is included within t, so just use h for name of file and then include within t_no_offset for index
        t_no_offset = t + offset_for_year # files don't actually start on jan 1, so account for indices before jan 1
        print("{} {}".format(t_no_offset, fname))
        # print(timedelta(days=start_times[i]+d))
        fw.write("{} {}\n".format(t_no_offset, fname))
        # These are the correct date names for time described in temp file
        # indices correspond to correct date when using files that don't start at jan 1...
    fw.close()

7693 24 6 198
198 data-2000-01-24-6-0.jpg
7699 30 2 242
242 data-2000-01-30-2-0.jpg
7707 38 1 305
305 data-2000-02-07-1-0.jpg
7725 56 5 453
453 data-2000-02-25-5-0.jpg
7737 68 2 546
546 data-2000-03-08-2-0.jpg
7739 70 1 561
561 data-2000-03-10-1-0.jpg
7744 75 6 606
606 data-2000-03-15-6-0.jpg
7744 75 7 607
607 data-2000-03-15-7-0.jpg
7749 80 2 642
642 data-2000-03-20-2-0.jpg
7760 91 1 729
729 data-2000-03-31-1-0.jpg
7761 92 1 737
737 data-2000-04-01-1-0.jpg
7765 96 7 775
775 data-2000-04-05-7-0.jpg
7773 104 7 839
839 data-2000-04-13-7-0.jpg
7775 106 5 853
853 data-2000-04-15-5-0.jpg
7776 107 5 861
861 data-2000-04-16-5-0.jpg
7778 109 6 878
878 data-2000-04-18-6-0.jpg
7779 110 6 886
886 data-2000-04-19-6-0.jpg
7781 112 8 904
904 data-2000-04-21-8-0.jpg
7790 121 7 975
975 data-2000-04-30-7-0.jpg
7791 122 2 978
978 data-2000-05-01-2-0.jpg
7797 128 5 1029
1029 data-2000-05-07-5-0.jpg
7811 142 5 1141
1141 data-2000-05-21-5-0.jpg
7814 145 7 1167
1167 data-2000-05-24-7-0.jpg
7816 147 6 1182
1

In [5]:
# TODO: make sure calculated index actually aligns with nc file...

# index/date from above cell: 34 data-2000-01-08-2-0.jpg (old offset)
#                             18 data-2000-01-02-2-0.jpg (no offset)

# check file in /glade/u/home/tking/scratch/cgnet/high_lat_QC/ivt/polar_ivt
# ncdump -v time windhusavi_3hr_CAM5-1-025degree_All-Hist_est1_v1-0_run002_200001-200012.nc

time = [7665, 7665.125, 7665.25, 7665.375, 7665.5, 7665.625, 7665.75,
    7665.875, 7666, 7666.125, 7666.25, 7666.375, 7666.5, 7666.625, 7666.75,
    7666.875, 7667, 7667.125, 7667.25, 7667.375, 7667.5, 7667.625, 7667.75,
    7667.875, 7668, 7668.125, 7668.25, 7668.375, 7668.5, 7668.625, 7668.75,
    7668.875, 7669, 7669.125, 7669.25, 7669.375, 7669.5, 7669.625, 7669.75,
    7669.875, 7670, 7670.125, 7670.25, 7670.375, 7670.5, 7670.625, 7670.75,
    7670.875, 7671, 7671.125, 7671.25, 7671.375, 7671.5, 7671.625, 7671.75,
    7671.875, 7672, 7672.125, 7672.25, 7672.375, 7672.5, 7672.625, 7672.75,
    7672.875, 7673, 7673.125, 7673.25, 7673.375, 7673.5, 7673.625, 7673.75,
    7673.875, 7674, 7674.125, 7674.25, 7674.375, 7674.5, 7674.625, 7674.75,
    7674.875, 7675, 7675.125, 7675.25, 7675.375, 7675.5, 7675.625, 7675.75,
    7675.875, 7676, 7676.125, 7676.25, 7676.375, 7676.5, 7676.625, 7676.75,
    7676.875, 7677, 7677.125, 7677.25, 7677.375, 7677.5, 7677.625, 7677.75,
    7677.875, 7678, 7678.125, 7678.25, 7678.375, 7678.5, 7678.625, 7678.75] # ...

time[34]
# 7669 days after 1979 is dec 29, 2000, plus 6h, which is NOT 2000-01-08-2...
# in ncview, frame 34/2920 for this file has date 5-jan-2000 1:30
# in ncview, the frame that matches up with 2000-01-08-2 is 59 or 60 (4:30 or 7:30, respectively)
# in ncview, frame 1/2920 is 31-dec-1999 22:30 - THIS DOES NOT MATCH OFFSET THAT I HAD FROM DATE!

time[18]
# 7667.25 days after 1979 is dec 27, 2000 and 6h, which is not 2000-01-02-2
# in ncview, frame 18/2920 for the ivt file has date 3-jan-2000 1:30
# in ncview, the frame that matches with 2000-01-02-2 is 11 (4:30) (or 12?- 7:30)
# neither of these match up....

# print(start_times[i]+d, h, t)
# 7670 6 14
# 14 data-2000-01-01-6-0.jpg
time[14]
time[14+(8*4)-6] # old index + (8 x possible offset) - h

# print(start_times[i]+d, d, h, t)
# 7676 7 2 58
# 58 data-2000-01-07-2-0.jpg

specified_day = 7
(start + timedelta(days=start_times[i]+specified_day)).strftime("data-%Y-%m-%d")

'data-2003-01-07'

In [6]:
# index = glob.glob('/global/project/projectdirs/ClimateNet/image_scripts/second200_filenames.txt')

In [7]:
# index

# Move old filenames to corrected filenames where index aligns with date

In [8]:
# input old filename --> retrieve what old index would have been --> generate new filename that corresponds with underlying data
# ----------------------------------------------------------

old_filename = "data-2003-01-05-00-2_0.nc"

# use old script backwards to back out how index was calculated
# ----------------------------------------------------------
old_start_times = [7665, 8030, 8395, 8760, 9125]
start = date(1979, 1, 1)

old_fname = datetime.strptime(old_filename, "data-%Y-%m-%d-00-2_0.nc")
# should_be_old_fname = ''

# make start into a datetime instead of date & retrieve time delta
td = old_fname - datetime.combine(start, datetime.min.time()) # datetime.date and datetime.datetime inconsistency
i = int(old_filename[8])
d = td.days - old_start_times[i]
t = d * 8
old_index = t
# todo: figure out if nonzero hours were used!


# use new script backwards to calculate filename from index
# ----------------------------------------------------------
new_filename = 'filename generated by old index'

new_start_times = [7669, 8035, 8400, 8765, 9130] # updated for leap years & index was 1 off

days = old_index / 8  # assuming hours = 0!
days_no_offset = days - offset['200{}'.format(str(i))]
h=0
fname_new = (start + timedelta(days=new_start_times[i]+days_no_offset)).strftime("data-%Y-%m-%d-{}-0.jpg".format(h))

print("The date in the new filename,", fname_new+', should be the date of underlying data that corresponds with mask from', old_filename)
print("but this actually lines up with Jan 9th, 2003 13:30?")
# TODO: once retrieve new filename, simultaneously print old/new filename and check if things line up with image/masks

The date in the new filename, data-2003-01-10-0-0.jpg, should be the date of underlying data that corresponds with mask from data-2003-01-05-00-2_0.nc
but this actually lines up with Jan 9th, 2003 13:30?


In [9]:
# old_filenames_qa1_antarctic = ["data-2001-01-01-00-2_0.nc",  "data-2001-05-14-00-2_0.nc",  "data-2001-09-08-00-2_0.nc",
#     "data-2001-01-24-00-2_0.nc",  "data-2001-05-14-00-2_1.nc",  "data-2001-09-09-00-2_0.nc",
#     "data-2001-01-24-00-2_1.nc",  "data-2001-05-28-00-2_0.nc",  "data-2001-09-26-00-2_0.nc",
#     "data-2001-01-24-00-2_2.nc",  "data-2001-06-20-00-2_0.nc",  "data-2001-10-03-00-2_0.nc",
#     "data-2001-02-14-00-2_0.nc",  "data-2001-06-20-00-2_1.nc",  "data-2001-10-06-00-2_0.nc",
#     "data-2001-02-14-00-2_1.nc",  "data-2001-06-23-00-2_0.nc",  "data-2001-10-25-00-2_0.nc",
#     "data-2001-02-17-00-2_0.nc",  "data-2001-06-23-00-2_1.nc",  "data-2001-10-25-00-2_1.nc",
#     "data-2001-02-17-00-2_1.nc",  "data-2001-06-30-00-2_0.nc",  "data-2001-11-04-00-2_0.nc",
#     "data-2001-02-20-00-2_0.nc",  "data-2001-06-30-00-2_1.nc",  "data-2001-11-08-00-2_0.nc",
#     "data-2001-03-01-00-2_0.nc",  "data-2001-07-14-00-2_0.nc",  "data-2001-11-12-00-2_0.nc",
#     "data-2001-03-01-00-2_1.nc",  "data-2001-07-14-00-2_1.nc",  "data-2001-11-22-00-2_0.nc",
#     "data-2001-03-01-00-2_2.nc",  "data-2001-07-23-00-2_0.nc",  "data-2001-12-03-00-2_0.nc",
#     "data-2001-03-01-00-2_3.nc",  "data-2001-07-23-00-2_1.nc",  "data-2001-12-05-00-2_0.nc",
#     "data-2001-03-03-00-2_0.nc",  "data-2001-07-23-00-2_2.nc",  "data-2001-12-23-00-2_0.nc",
#     "data-2001-03-03-00-2_1.nc",  "data-2001-07-23-00-2_3.nc",  "data-2001-12-23-00-2_1.nc",
#     "data-2001-03-03-00-2_2.nc",  "data-2001-08-03-00-2_0.nc",  "data-2002-01-09-00-2_0.nc",
#     "data-2001-03-12-00-2_0.nc",  "data-2001-08-03-00-2_1.nc",  "data-2002-01-10-00-2_0.nc",
#     "data-2001-03-15-00-2_0.nc",  "data-2001-08-03-00-2_2.nc",  "data-2002-01-30-00-2_0.nc",
#     "data-2001-03-15-00-2_1.nc",  "data-2001-08-14-00-2_0.nc",  "data-2002-02-07-00-2_0.nc",
#     "data-2001-03-25-00-2_0.nc",  "data-2001-08-15-00-2_0.nc",  "data-2002-02-17-00-2_0.nc",
#     "data-2001-03-25-00-2_1.nc",  "data-2001-08-21-00-2_0.nc",  "data-2002-02-22-00-2_0.nc",
#     "data-2001-03-25-00-2_2.nc",  "data-2001-08-25-00-2_0.nc",  "data-2002-02-22-00-2_1.nc",
#     "data-2001-04-05-00-2_0.nc",  "data-2001-09-07-00-2_0.nc"]

In [10]:
# # calculate what index would have (incorrectly) been

# # TODO: FINISH THIS SECTION (NOT WORKING PROPERLY YET)

# # These dates/times are all indexed 8 timesteps for each day apart,
# #     so the spacing at least is accurate
# # 'data-1999-12-28-02-0.jpg' # index was 8   # lines up with ____
# # 'data-1999-12-30-02-0.jpg' # index was 24   # lines up with ____
# # 'data-2000-01-02-02-0.jpg' # index was 48   # lines up with ____
# # 'data-2000-01-03-02-0.jpg' # index was 56   # lines up with ____
# # 'data-2000-01-11-02-0.jpg' # index was 120   # lines up with ____
# # 'data-2000-01-15-02-0.jpg' # index was 152   # lines up with ____
# # ^^^ none of the above files were actually used in QC...


# # 56 data-2001-01-02-02-0.jpg
# # 104 data-2001-01-08-02-0.jpg
# # 184 data-2001-01-18-02-0.jpg
# # 192 data-2001-01-19-02-0.jpg
# # 216 data-2001-01-22-02-0.jpg
# # 264 data-2001-01-28-02-0.jpg
# # 288 data-2001-01-31-02-0.jpg
# # 296 data-2001-02-01-02-0.jpg
# # 312 data-2001-02-03-02-0.jpg
# # 336 data-2001-02-06-02-0.jpg

# # 16 data-2001-12-28-02-0.jpg
# # 16 data-2001-12-28-02-0.jpg
# # 72 data-2002-01-04-02-0.jpg
# # 112 data-2002-01-09-00-2_0.nc seems to line up with Jan 12, 2002, 22:30, 3 days after, which should have index 136
# # 128 data-2002-01-11-02-0.jpg # maybe have info below?
# # 240 data-2002-01-25-02-0.jpg
# # 248 data-2002-01-26-02-0.jpg
# # 256 data-2002-01-27-02-0.jpg
# # 264 data-2002-01-28-02-0.jpg
# # 304 data-2002-02-02-02-0.jpg
# # 328 data-2002-02-05-02-0.jpg
# # 368 data-2002-02-10-02-0.jpg
# # 432 data-2002-02-18-02-0.jpg
# # 472 data-2002-02-23-02-0.jpg

# # qa1/antarctic/data-2001-01-01-00-2.h5


# # 2001:
# # 'data-2001-12-23-00-2_0.nc seems to line up with Dec 30th, ~13:30, which is more than 5 days after

# # 2002:
# # qa2/arctic/netcdfs/data-2002-01-05-00-2_0.nc seems to line up with Jan 7, 2002, 19:30, 2 days after
# # qa1/antarctic/netcdfs/data-2002-01-09-00-2_0.nc seems to line up with Jan 12, 2002, 22:30, 3 days after
# # qa1/antarctic/netcdfs/data-2002-01-10-00-2_0.nc mayyyybe lines up with jan 8, 2022, 13:30, 2 days earlier but hard to tell
# # qa2/arctic/netcdfs/data-2002-01-11-00-2_0.nc mayyyybe lines up with Jan 16, 2002, 13:30, 5 days after, but lower right AR is suspicious
# # qa2/arctic/netcdfs/data-2002-01-28-00-2_0.nc doesn't seem to line up well with nearby dates

# # 2003:
# # qa2/antarctic/netcdfs/data-2003-01-05-00-2_1.nc mayyyybe lines up with Jan 8, 7:30, 3 days after, but really questionable
# # qa3/arctic/netcdfs/data-2003-01-05-00-2_0.nc doesn't seem to line up well with nearby dates
# # qa2/antarctic/netcdfs/data-2003-01-15-00-2_1.nc mayyyyybe lines up with January 22, 2003 16:30, 7 days after
# # data-2003-01-05-02-0.jpg lines up with with January 9th, 2003 13:30, 4 days after

# # indices are consistent in text files, eg if a file got an index it will get the same index the next time the script is run

# first_file = 'data-2002-01-09-02-0.jpg'
# first_file_year = first_file.split("-")[1]
# first_file_month = first_file.split("-")[2]
# first_file_day = first_file.split("-")[3]

# print("year month day", first_file_year, first_file_month, first_file_day)

# # [7670, 8036, 8401, 8766, 9131]
# if first_file_year == '2000':
#     start_time_used = 7665
#     start_time_actual = start_time_used + 4
# if first_file_year == '2001':
#     start_time_used = 8030
#     start_time_actual = start_time_used + 5
# if first_file_year == '2002':
#     start_time_used = 8395
#     start_time_actual = start_time_used + 5
# if first_file_year == '2003':
#     start_time_used = 8760
#     start_time_actual = start_time_used + 5
# if first_file_year == '2004':
#     start_time_used = 9125
#     start_time_actual = start_time_used + 5

# time_index_used = int(first_file_day) * 8

# print("calculated index used:", time_index_used)
# print("index from text file was 112")
# print("5 days * 8 timesteps/day = 40timesteps = 112-72 so that could be discrepancy in indexing calculated/used")
# print("index that should have been used based on alignment: 136, 3daysx8timesteps=24 indices off")
# print("--------------")

# fname = (date(1979, 1, 1) + timedelta(days=start_time_used+int(first_file_day))).strftime("data-%Y-%m-%d-02-0.jpg")
# print("fname corresponding with index used:", fname)
# print("but this should be jan 12th...")
# print("---------------")

# fname_actual = (date(1979, 1, 1) + timedelta(days=start_time_actual+int(first_file_day))).strftime("data-%Y-%m-%d-02-0.jpg")
# print("fname actual:", fname_actual)

# # maybe corrected dates are just off by 5 for 2000 and 6 for 2001+??? So, for year 2000, add 5 days, and for other years, add 6 days?
# # but then indexing was off by one as well to return the same filetime, so actually +4 and +5


In [11]:
# use index to calculate what date/time should have been


In [12]:
# write out name of corrected file


In [13]:
# for i in range(len(old_filenames))
#     print("mv {} {}".format(old_filenames[i], new_filenames[i]))