# a.txt
S1
  s2
S3
# comment
S4

S5
---
# b.txt
s2
s3
S6
S7
---
# yesterday.txt
s1
s2
s8
s9
---
# output expected
common=2: s2 s3
only_a=3: s1 s4 s5
only_b=2: s6 s7
stale=2: s8 s9
ALERT: stale_count=2 threshold=2


In [None]:
import sys

def read_ids(path):
    ids = set() #empty set
    with open(path) as f: #we should have try/except blocks here to handle FileNotFound/permission exceptions
        for line in f:
            if not line or line.startswith("#"): #changed and --> or
                continue 
            ids.add(line.lower()) #{"s1","s2","s3","s4","s5"}
    return ids

def main(argv):
    if len(argv) != 4: #we are expecting 4 args, if not raise a valueError
        raise ValueError("usage: sessdiff.py <a.txt> <b.txt> <yesterday.txt> <threshold>")

    a_path = argv[0] #""--> str
    b_path = argv[1] #""--> str
    y_path = argv[2] #""--> str
    threshold = argv[3] #""--> str

    #read_ids(str)
    a = read_ids(a_path)   #{"s1","s2","s3","s4","s5"}
    b = read_ids(b_path)   #{"s2","s3","s6","s7"}
    y = read_ids(y_path)   #{"s1","s2","s8","s9"}

    common = a.intersection(b) #common={"s2",s3"}
    only_a = a - b # {"s1","s4","s5"}
    only_b = b - a # {"s6","s7"}
    stale = y - a # {"s8","s9"}

    print("common=" + str(len(common)) + ": " + " ".join(sorted(common)))
    #common=2: s2 s3

    print("only_a=" + str(len(only_a)) + ": " + " ".join(sorted(only_a)))
    #only_a=3: s1 s4 s5
    print("only_b=" + str(len(only_b)) + ": " + " ".join(sorted(only_b)))
    #only_b=2: s6 s7
    print("stale=" + str(len(stale)) + ": " + " ".join(sorted(stale)))
    #stale=2: s8 s9

    if len(stale) >= int(threshold): #assuming threshold=2
        print("ALERT: stale_count=" + str(len(stale)) + " threshold=" + str(threshold))
        #ALERT: "stale_count=" 2 " threshold=" 2
    else:
        print("OK: stale_count=" + str(len(stale)) + " threshold=" + str(threshold))

if __name__ == "__main__":
    main(sys.argv[1:]) #ignore the script name and take the rest of the args