In [1]:
import csv

def load_lookup_table(filepath):
    '''This function reads the lookup table from a CSV file and creates a dictionary mapping dstport and protocol combinations to their respective tags.'''
    lookup_table = {}
    with open(filepath, 'r', encoding='utf-8-sig') as f:
        reader = csv.DictReader(f)
        reader.fieldnames = [header.strip().replace('\xa0', '') for header in reader.fieldnames]
        for row in reader:

            dstport = row['dstport'].strip()
            protocol = row['protocol'].strip().lower()
            tag = row['tag'].strip().replace('\xa0', '')
            print(f"Tag for row {tag}")
            lookup_table[(dstport, protocol)] = tag
    return lookup_table

def parse_flow_logs(log_filepath, lookup_table):
    '''Processes the flow log entries, maps them to tags based on the lookup table, and calculates counts for tags and unique (port, protocol) combinations.'''
    tag_counts = {}
    port_protocol_counts = {}

    with open(log_filepath, 'r') as f:
        for line in f:
            fields = line.split()
            version = int(fields[0])

            # Determining the dstport and protocol based on the version
            if version == 1:
                # For Version 1: fields[5] is dstport, fields[6] is protocol
                dstport = fields[5].strip()
                protocol = 'tcp' if fields[6] == '6' else 'udp' if fields[6] == '17' else None
            elif version == 2:
                # Version 2: fields[5] is dstport, fields[7] is protocol
                dstport = fields[5].strip()
                print(f"Version 2 dstport {dstport} field {fields[7]}")
                protocol = 'tcp' if fields[7] == '6' else 'udp' if fields[7] == '17' else None
            elif version == 3:
                # Version 3: dstport is fields[9], protocol is fields[10]
                dstport = fields[9].strip()
                print(f"Version 3 dstport {dstport} field {fields[10]}")
                protocol = 'tcp' if fields[10] == '6' else 'udp' if fields[10] == '17' else None
            elif version == 5:
                # Version 5: dstport is fields[4], protocol is fields[5]
                dstport = fields[4].strip()
                protocol = 'tcp' if fields[5] == '6' else 'udp' if fields[5] == '17' else None
            else:
                # For Unsupported version
                continue

            if not protocol:
                continue  # Skip unsupported protocols

            # Finding tag based from the lookup table
            tag = lookup_table.get((dstport, protocol), "Untagged")

            # Update tag counter
            tag_counts[tag] = tag_counts.get(tag, 0) + 1

            # Update port/protocol counts
            port_protocol_key = (dstport, protocol)
            port_protocol_counts[port_protocol_key] = port_protocol_counts.get(port_protocol_key, 0) + 1

    return tag_counts, port_protocol_counts

def write_output(tag_counts, port_protocol_counts, output_filepath):
    '''Writes the results of the analysis (tag counts and port/protocol combination counts) to an output file in a structured format.'''
    with open(output_filepath, 'w') as f:
        # Write Tag Counts
        f.write("Tag Counts:\n\nTag,Count\n")
        for tag, count in sorted(tag_counts.items()):
            f.write(f"{tag},{count}\n")

        f.write("\nPort/Protocol Combination Counts:\n\nPort,Protocol,Count\n")
        for (port, protocol), count in sorted(port_protocol_counts.items()):
            f.write(f"{port},{protocol},{count}\n")


if __name__ == "__main__":
    lookup_table = load_lookup_table("lookup_table.csv")
    tag_counts, port_protocol_counts = parse_flow_logs("flow_log.txt", lookup_table)
    write_output(tag_counts, port_protocol_counts, "output.txt")


Tag for row sv_P1
Tag for row sv_P2
Tag for row sv_P1
Tag for row SV_P3
Tag for row sv_P2
Tag for row sv_P4
Tag for row sv_P5
Tag for row sv_P5
Tag for row email
Tag for row email
Tag for row email
Version 2 dstport 443 field 6
Version 2 dstport 23 field 6
Version 2 dstport 25 field 6
Version 2 dstport 110 field 6
Version 2 dstport 993 field 6
Version 2 dstport 143 field 6
Version 2 dstport 1024 field 6
Version 2 dstport 80 field 6
Version 2 dstport 1030 field 6
Version 2 dstport 56000 field 6
Version 2 dstport 49321 field 6
Version 2 dstport 49152 field 6
Version 2 dstport 49153 field 6
Version 2 dstport 49154 field 6
Version 3 dstport 80 field 6
Version 3 dstport 39812 field 6
