### Section 5: Basic Libraries II

---

<div class="alert alert-block alert-warning">
Given a zip file with a subfolder with multiple annotations, where the name convention for each one of them is: 

{DATE}_{TIME}_SN{SATELLITE_NUMBER}_QUICKVIEW_VISUAL_{VERSION}_{UNIQUE_REGION}.txt

where:

- DATE expressed as YYYYMMDD (year, month and day), e.g. 20241201, 20230321 ...
- TIME expressed as HHMMSS (hour, minutes and seconds), e.g. 2134307
- SATELLITE_NUMBER an integer that represents the satellite number.
- VERSION provides the version of the pipeline, e.g. "0_1_2", "1_3_1" ...
- UNIQUE_REGION provides a unique location in the form of a string, e.g SATL-2KM-10N_552_4164
</div>

In [92]:
import glob
import os
import re
from datetime import datetime
import numpy as np

<div class="alert alert-block alert-info">
<b>Exercise 1</b> 

How many annotations you have per month and year. Which month has more annotation files.
</div>

In [None]:
# regex pattern
pattern = r'(\d{4})(\d{2})(\d{2})_(\d{6})_SN(\d+)_QUICKVIEW_VISUAL_([\d_]+)_([A-Za-z0-9\-_.]+)\.txt'

# pathway to annotation files
annotations = glob.glob('/Users/julia/Desktop/ESADE/python/assignment_4/annotations/*.txt')

# create a dictionary to count number of annotations per month and year
annotations_count = {}

for annotation in annotations:

    # extract the file name
    filename = os.path.basename(annotation)
    
    # search and extract values
    match = re.match(pattern, filename)
    if match:
        year, month, _, _, _, _, _ = match.groups()
        key = (year, month)  # create a (year, month) tuple
    
        # increment count for this (year, month) key
        if key in annotations_count:
            annotations_count[key] += 1
        else:
            annotations_count[key] = 1

annotations_count

{('2024', '01'): 27,
 ('2024', '06'): 52,
 ('2024', '04'): 25,
 ('2024', '02'): 45,
 ('2024', '03'): 17,
 ('2024', '05'): 28}

In [None]:
# print counts for each year and month
print("Annotations per year and month:")
for (year, month), count in sorted(annotations_count.items()):
    print(f"{year}-{month}: {count} annotations")

Annotations per year and month:
2024-01: 27 annotations
2024-02: 45 annotations
2024-03: 17 annotations
2024-04: 25 annotations
2024-05: 28 annotations
2024-06: 52 annotations


In [None]:
# find the (year, month) with the most annotations
print("Month with the most annotations:", max(annotations_count.items())[0][1])

Month with the most annotations: 06


<div class="alert alert-block alert-info">
<b>Exercise 2</b> 

Create a dictionary where each **key** is a month, and the corresponding **value** is a list containing all the annotation names with where their date corresponds to the month. 

a. Save it following the json format, and load it again to check that everything is ok.

b. Save it this time using Pickle.

c. Instead of storing a list of all the annotation names happening that month, let's create for each annotation a dictionary with keys: name and date (using a datetime object).
    
</div>

In [None]:
# create a dictionary where each key is a month, and the corresponding value is a list containing all the annotation names
dict = {}

# pathway to annotation files
annotations = glob.glob('/Users/julia/Desktop/ESADE/python/assignment_4/annotations/*.txt')

# regex pattern
pattern = r'(\d{4})(\d{2})(\d{2})_(\d{6})_SN(\d+)_QUICKVIEW_VISUAL_([\d_]+)_([A-Za-z0-9\-_.]+)\.txt'

for annotation in annotations:
    # extract the file name
    filename = os.path.basename(annotation)
    
    # search and extract values
    match = re.match(pattern, filename)
    if match:
        _, month, _, _, _, _, _ = match.groups()
        key = month

        # append annotation to list corresponding to month
        if key in dict:
            dict[key].append(filename)
        else:
            dict[key] = [filename]

    
for key, value in dict.items():
    print(f'{key}:{value}')

01:['20240102_185527_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_740_3850.txt', '20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3770.txt', '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_552_4164.txt', '20240102_185954_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_414_3786.txt', '20240104_220339_SN31_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_556_4178.txt', '20240115_213834_SN28_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_376_3722.txt', '20240126_173752_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_386_3722.txt', '20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3772.txt', '20240130_173903_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_366_3756.txt', '20240127_190620_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_500_3600.txt', '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_552_4162.txt', '20240127_190620_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_500_3602.txt', '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_554_4162.txt', '20240101_213601_SN31_QUICKVIEW_VI

In [None]:
import json

# write down json
with open('dict.json', 'w') as f:   
    json.dump(dict, f)

# load json
with open('dict.json', 'r') as f:   
    json_dict = json.load(f)
    
print(json_dict)

{'01': ['20240102_185527_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_740_3850.txt', '20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3770.txt', '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_552_4164.txt', '20240102_185954_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_414_3786.txt', '20240104_220339_SN31_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_556_4178.txt', '20240115_213834_SN28_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_376_3722.txt', '20240126_173752_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_386_3722.txt', '20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3772.txt', '20240130_173903_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_366_3756.txt', '20240127_190620_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_500_3600.txt', '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_552_4162.txt', '20240127_190620_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_500_3602.txt', '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_554_4162.txt', '20240101_213601_SN31_QUICKVIE

In [None]:
import pickle

# serializing dict
with open('dict.pkl', 'wb') as f:
    pickle.dump(dict, f)

# deserializing dict
with open('dict.pkl', 'rb') as f:
    pickle_dict = pickle.load(f)

print(pickle_dict)

{'01': ['20240102_185527_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_740_3850.txt', '20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3770.txt', '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_552_4164.txt', '20240102_185954_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_414_3786.txt', '20240104_220339_SN31_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_556_4178.txt', '20240115_213834_SN28_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_376_3722.txt', '20240126_173752_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_386_3722.txt', '20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3772.txt', '20240130_173903_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_366_3756.txt', '20240127_190620_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_500_3600.txt', '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_552_4162.txt', '20240127_190620_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_500_3602.txt', '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_554_4162.txt', '20240101_213601_SN31_QUICKVIE

In [None]:
# create dictionary to as above but with name and date as keys for each annotation
ann_date = {}

# pathway to annotation files
annotations = glob.glob('/Users/julia/Desktop/ESADE/python/assignment_4/annotations/*.txt')

#regex pattern
pattern = r'(\d{4})(\d{2})(\d{2})_(\d{6})_SN(\d+)_QUICKVIEW_VISUAL_([\d_]+)_([A-Za-z0-9\-_.]+)\.txt'

for annotation in annotations:

    # extract the file name
    filename = os.path.basename(annotation)
    
    # search and extract values
    match = re.match(pattern, filename)
    if match:
        year, month, day, _, _, _, _ = match.groups()

        # define month as key for dictionary
        key = month

        # put date components together
        date = year + month + day

        # parse the string into a datetime object
        date_obj = datetime.strptime(date, "%Y%m%d")

        # append 
        if key in ann_date:
            ann_date[key].append({"name": filename, "date": date_obj})
        else:
            ann_date[key] = [{"name": filename, "date": date_obj}]

print(ann_date)

{'01': [{'name': '20240102_185527_SN27_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_740_3850.txt', 'date': datetime.datetime(2024, 1, 2, 0, 0)}, {'name': '20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3770.txt', 'date': datetime.datetime(2024, 1, 1, 0, 0)}, {'name': '20240101_192856_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_552_4164.txt', 'date': datetime.datetime(2024, 1, 1, 0, 0)}, {'name': '20240102_185954_SN24_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_414_3786.txt', 'date': datetime.datetime(2024, 1, 2, 0, 0)}, {'name': '20240104_220339_SN31_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-10N_556_4178.txt', 'date': datetime.datetime(2024, 1, 4, 0, 0)}, {'name': '20240115_213834_SN28_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_376_3722.txt', 'date': datetime.datetime(2024, 1, 15, 0, 0)}, {'name': '20240126_173752_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_386_3722.txt', 'date': datetime.datetime(2024, 1, 26, 0, 0)}, {'name': '20240101_174301_SN33_QUICKVIEW_VISUAL_1_1_10_SATL-2KM-11N_404_3772.txt', 'dat

<div class="alert alert-block alert-info">
<b>Exercise 3</b> 

Print all the annotations from the oldest ones to the newest one during the second half of the 2024.
</div>

In [None]:
# create list to store annotations from the second half of 2024
files = []

# iterate through dictionary created in exercise 2 to search for files from the second half of 2024
for key, value in ann_date.items():
    for annotation in value:

        # filter and append annotation with date after 30th june 2024
        if annotation["date"] > datetime(2024, 6, 30):
            files.append((annotation["name"], annotation["date"]))

        # sort dates
        indices = np.argsort([date for name, date in files])

# print annotations sorted by dates
if len(files) > 0: 
    for i in indices:
        print(files[i][0])
else:
    print("no annotations for the second half of the year 2024")

no annotations for the second half of the year 2024
