In [1]:
import json, boto3, time, pytz, random, os
import pandas as pd
import datetime as dt
from io import StringIO
from tqdm import tqdm


BUCKET = "masc12-roundshot-img-storage"
WEBCAM_CSV = "webcams-2024-10-14-v2.csv"
OUTLIER_CSV = "image-size-outliers.csv"

# logtable = boto3.resource('dynamodb').Table(LOG_TABLE_NAME)
# lambda_client = boto3.client('lambda')
# s3_client = boto3.client('s3')

session = boto3.Session(profile_name='exxeta-admin', region_name='eu-north-1')
lambda_client = session.client('lambda')
s3_client = session.client('s3')

def invokeChildLambda(cam_id:str, cam_name:str, urldate:str, urltime:str, min_filesize:float) -> None:
    inputParams = {
      'cam_id': cam_id,
      'cam_name': cam_name,
      'urldate': urldate,
      'urltime': urltime,
      'min_filesize': min_filesize,
      'wait_seconds': random.uniform(0, 60)
    } 
 
    lambda_client.invoke(
        FunctionName = 'arn:aws:lambda:eu-north-1:454075690551:function:roundshot-img-fetcher',
        InvocationType = 'Event',
        Payload = json.dumps(inputParams)
    )

def get_csv_from_s3(bucket:str, csv:str) -> pd.DataFrame:
    csv_object = s3_client.get_object(Bucket=bucket, Key=csv)
    csv_content = csv_object['Body'].read().decode('utf-8')
    webcams_df = pd.read_csv(StringIO(csv_content), sep=';', encoding='utf-8')
    return webcams_df

In [2]:
def get_date_range_from(start, end):
    try:
        start = pytz.timezone('Europe/Zurich').localize(start)
    except ValueError:
        pass

    try:
        end = pytz.timezone('Europe/Zurich').localize(end)
    except ValueError:
        pass
    
    daterange = []
    while start <= end:
        if dt.time(4, 0) <= start.time() < dt.time(23, 0):
            daterange.append(start)
        start += dt.timedelta(minutes=10)
    return daterange

In [3]:
def get_files_from_s3():
    bucket = session.resource('s3').Bucket("masc12-roundshot-img-storage")
    files = [obj.key for obj in bucket.objects.all() if obj.key.endswith('.jpg')]
    return files

### getting some lost images (27.02.)

In [None]:
localfolder = "/home/masc12/dev/masc12-mthesis/data/_copy/erlen-kronb-additional/"
imgs = os.listdir(localfolder)
imgs.sort()
len(imgs)

100%|██████████| 276/276 [00:00<00:00, 2006287.53it/s]


In [5]:
%%script false --no-raise-error
for i in tqdm(imgs):
    ts = dt.datetime.strptime(i, '%Y-%m-%d_%H-%M-%S.jpg')
    date = dt.datetime.strftime(ts, '%Y-%m-%d')
    zeit = dt.datetime.strftime(ts, '%H-%M-%S')
    invokeChildLambda("661fcd91e010d0.44975711", "golfclub-erlen", date, zeit, 81_621.0)
    time.sleep(1)

100%|██████████| 276/276 [04:58<00:00,  1.08s/it]


### continue here

In [None]:
# invokeChildLambda("608fc16698a1e7.09133428", "kronberg", "2024-08-05", "12-00-00", 20_524.0)
# invokeChildLambda("661fcd91e010d0.44975711", "golfclub-erlen", "2024-08-27", "12-00-00", 81_621.0)

In [None]:
# --- DONE ---
# rorschacherberg 
# kronberg 31.7. 16 50
# wiler turm 24.8.
# golfclub-erlen 26.8. 17 uhr
# romanshorn 28.8.
# altenrhein 29.8. 
# elbenalp 29.8.

# wolzenalp?    # zu weit weg (24km)
# sitterdorf?   # keine historie
# schwaegalp?   # kronberg näher
# schoenenbühl? # subsumed by romanshorn + altenrheim

In [None]:
# %%script false --no-raise-error
start = dt.datetime(2024, 9, 13, 7, 20) # start here
end = dt.datetime(2024, 9, 19, 14, 0)
history = get_date_range_from(start, end)

new = ""
for day in tqdm(history):
    urldate = f"{day:%Y-%m-%d}"
    urltime = f"{day:%H-%M-%S}"

    if urldate != new:
        new = urldate
        # print(f"----------- Day: {urldate} ----------- ")

    time.sleep(3)
    # print(f"Starting scrapers: {urltime}")

    tz = pytz.timezone('Europe/Zurich')

    if day >= tz.localize(dt.datetime(2024, 7, 30, 6, 0)):
        invokeChildLambda("608fc16698a1e7.09133428", "kronberg", urldate, urltime, 20_524.0)

    # if day >= tz.localize(dt.datetime(2024, 8, 24, 5, 0)):
    #     invokeChildLambda("545b7dcf9cedf1.10734711", "wiler-turm", urldate, urltime, 66_127.25)

    if day >= tz.localize(dt.datetime(2024, 8, 26, 17, 10)):
        invokeChildLambda("661fcd91e010d0.44975711", "golfclub-erlen", urldate, urltime, 81_621.0)

    # if day >= tz.localize(dt.datetime(2024, 8, 28, 5, 0)):
    #     invokeChildLambda("62863f9fcbb216.97456372", "romanshorn", urldate, urltime, 32_205.5)

    # if day >= tz.localize(dt.datetime(2024, 8, 29, 5, 0)):
    #     invokeChildLambda("5c8b6fec9dad68.39944594", "altenrhein", urldate, urltime, 61_358.0)
    #     invokeChildLambda("5a7c518a176f53.54105137", "elbenalp", urldate, urltime, 18_066.0)


# sanity check above

In [None]:
files = get_files_from_s3()

In [None]:
start = dt.datetime(2024, 2, 23, 4, 0) # start here
end = dt.datetime(2024, 9, 20, 22, 50)
history = get_date_range_from(start, end)

# for rorschacherberg
dayends = {
    2: dt.time(18, 0),
    3: dt.time(19, 30),
    4: dt.time(20, 30),
    5: dt.time(21, 0),
    6: dt.time(21, 20),
    7: dt.time(21, 20),
    8: dt.time(21, 0),
    9: dt.time(20, 0),
}

daystarts ={
    2: dt.time(8, 0),
    3: dt.time(7, 0),
    4: dt.time(6, 30),
    5: dt.time(6, 20),
    6: dt.time(6, 20),
    7: dt.time(6, 10),
    8: dt.time(6, 30),
    9: dt.time(7, 30),
}
todays_date = ""
missing = []
tz = pytz.timezone('Europe/Zurich')

with tqdm(total=len(history)) as pbar:
    for day in history:
        pbar.update(1)
        nonemissing = True
        if day.time() < daystarts[day.month] or day.time() > dayends[day.month]:
            continue

        if tz.localize(dt.datetime(2024, 4, 5, 10, 30)) <= day <= tz.localize(dt.datetime(2024, 4, 15, 17, 50)):
            continue

        get = f"{day:%Y-%m-%d_%H-%M-%S}.jpg"
        urldate = f"{day:%Y-%m-%d}"
        urltime = f"{day:%H-%M-%S}"
        
        if urldate != todays_date:
            todays_date = urldate
            # print(f"---------------------- Day: {todays_date} ---------------------- ")

        target = f"rorschacherberg/{get}"

        if target not in files:
            nonemissing = False
            missing.append(target)
            # print(f"{urltime} - rorschacherberg missing")
            # invokeChildLambda("53a838bc15a356.87772279", "rorschacherberg", urldate, urltime, 677_847.0)

            if day >= tz.localize(dt.datetime(2024, 8, 9, 5, 0)):
                target = f"kronberg/{get}"
                if target not in files:
                    nonemissing = False
                    missing.append(target)
                    # invokeChildLambda("608fc16698a1e7.09133428", "kronberg", urldate, urltime, 12_524.0)
                    # print(f"{urltime} - kronberg missing")
                    
            if day >= tz.localize(dt.datetime(2024, 9, 3, 5, 0)):
                target = f"wiler-turm/{get}"
                if target not in files:
                    nonemissing = False
                    missing.append(target)
                    # invokeChildLambda("545b7dcf9cedf1.10734711", "wiler-turm", urldate, urltime, 40_127.25)
                    # print(f"{urltime} - wiler-turm missing")

            if day >= tz.localize(dt.datetime(2024, 6, 13, 5, 0)):
                target = f"golfclub-erlen/{get}"
                if target not in files:
                    nonemissing = False
                    missing.append(target)
                    # invokeChildLambda("661fcd91e010d0.44975711", "golfclub-erlen", urldate, urltime, 80_621.0)
                    # print(f"{urltime} - golfclub-erlen missing")

            if day >= tz.localize(dt.datetime(2024, 9, 8, 5, 0)):
                target = f"romanshorn/{get}"
                if target not in files:
                    nonemissing = False
                    missing.append(target)
                    # invokeChildLambda("62863f9fcbb216.97456372", "romanshorn", urldate, urltime, 832_205.5)
                    # print(f"{urltime} - romanshorn missing")

            if day >= tz.localize(dt.datetime(2024, 9, 9, 5, 0)):
                target = f"altenrhein/{get}"
                if target not in files:
                    nonemissing = False
                    missing.append(target)
                    # invokeChildLambda("5c8b6fec9dad68.39944594", "altenrhein", urldate, urltime, 761_358.0)
                    # print(f"{urltime} - altenrhein missing")

                target = f"elbenalp/{get}"
                if target not in files:
                    nonemissing = False
                    missing.append(target)
                    # invokeChildLambda("5a7c518a176f53.54105137", "elbenalp", urldate, urltime, 618_066.0)
                    # print(f"{urltime} - elbenalp missing")


        # if len(missing) > 50:
        #     break

        # if not nonemissing: # if all images are present, skip to next day without waiting
        #     time.sleep(10)

print("Missing:", len(missing))

In [None]:
with open("missings.txt", "w") as f:
    f.write("\n".join(missing))