# Preparing your log file
You should be able to use your saved logfile, however if you have colorized log output from trunk-recorder, you'll need to use asci2txt from `colorized-logs` to strip those codes out.

* docker logs trunk-recorder 2> tr.log
* ansi2txt < tr.log > tr.log.log
* gzip tr.log

In [231]:
import datetime
import gzip
import re
import pandas as pd

logfile = gzip.open("tr.log.gz", "rt")
calldict = {}
# Define the regex pattern for our log entries
# line = "[2024-05-09 12:31:45.009426] (info)   [pwcp25]	126C	TG:       1007 (            PWPD West 1)	Freq: 851.962500 MHz	Concluding Recorded Call - Last Update: 4s	Recorder last write:4.72949	Call Elapsed: 12"
# log_pattern = r".*\[(\S+\s\S+)\]\s+\((\S+)\)\s+\[(\S+)\]\s+(\d+).*TG:.*\((.*)\).*Freq:\s+(\d+\.\d+).*MHz\s+(.*)"
log_pattern = r".*\[(\S+\s\S+)\]\s+\((\S+)\)\s+\[(\S+)\]\s+(\d+)\S+\s+\S+\s+(\d+).*Freq:\s+(\d+\.\d+).*MHz\s+(.*)"
# If you DO NOT have "talkgroupDisplayFormat": "id_tag" set, you can change the log_pattern to this below to grab the numeric
# talkgroup numbers:
# log_pattern = r".*\[(\S+\s\S+)\]\s+\((\S+)\)\s+\[(\S+)\]\s+(\d+)\S+\s+\S+\s+(\d+).*Freq:\s+(\d+\.\d+).*MHz\s+(.*)"
for line in logfile:
    # line = "2024-05-09T12:23:26.007469771Z [2024-05-09 12:23:26.005761] (info)   [pwcp25]	16C	TG:       2003 (                PWFD 5B)	Freq: 851.725000 MHz	Concluding Recorded Call - Last Update: 4s	Recorder last write:4.79871	Call Elapsed: 13"
    if match := re.match(log_pattern, line):
        calldata = match[7]
        # Technically this isn't a real unixtimestamp as it's not timezone aware,
        # but we're just using it to create a unique index identifier.
        calldate = datetime.datetime.strptime(match[1], "%Y-%m-%d %H:%M:%S.%f")
        callts = calldate.timestamp()
        # Index for the dict is timestamp(ish)-talkgroup
        callindex = f"{int(callts)}{int(match[5].strip())}"

        # Second round of regexp.  Now we are going to harvest data from calldata - the "everything else"
        regexp_dict = {
            "excluded": r".*(Not recording talkgroup.).*",
            "encrypted": r".*(Not Recording: ENCRYPTED).*",
            "unknown_tg": r".*(TG not in Talkgroup File).*",
            "no_source": r".*(no source covering Freq).*",
            "standard": r".*Call Elapsed:\s+(\d+)",
        }
        for callclass, data_pattern in regexp_dict.items():
            if datamatch := re.match(data_pattern, calldata):
                calldict[callindex] = {"callclass": callclass}
                if callclass == "standard":
                    calldict[callindex] = {"duration": int(datamatch[1])}
                    # This log event happens at the end of a call, so we should adjust the calltime
                    # back by duration seconds to get to the start.
                    calldate = calldate + datetime.timedelta(seconds=-int(datamatch[1]))
                calldict[callindex].update(
                    {
                        "calldate": str(calldate),
                        "loglevel": str(match[2]),
                        "system": str(match[3]),
                        "callnumber": int(match[4]),
                        "talkgroup": int(match[5].strip()),
                        "frequency": float(match[6]),
                    }
                )
logfile.close()

calldf = pd.DataFrame.from_dict(calldict, orient="index")
# Technically this shouldn't be needed.  The dict construction _should_ set the class
# but for some reason it skips setting standard.  It does set duration though so that part of
# of the loop works.  This workaround sets the class to standard if there is a duration.
calldf.loc[calldf["duration"].notna(), "callclass"] = "standard"

# We're going to use ChanList.csv if we have it to convert decimal talkgroups to their
# Alpha Longform.  While this could be in the original log line, we do it here to take care
# of logs which might not have that enabled AND it allows us to see the number value of "unlisted" tg.
try:
    chanlist = pd.read_csv("ChanList.csv")
    calldf = pd.merge(
        left=calldf,
        right=chanlist,
        left_on="talkgroup",
        right_on="Decimal",
        how="left",
    )
    # Talkgroup was an int for matching; now it becomes a string
    calldf[["talkgroup"]] = calldf[["talkgroup"]].astype("str")
    # And now we merge in the Alpha Tag to talkgroups defined.  Undefined keep their
    # numeric value
    calldf.loc[calldf["Alpha Tag"].notna(), "talkgroup"] = calldf["Alpha Tag"]
except Exception:
    print("We couldn't open ChanList so talkgroups will remain numeric.")
# Finally, either way let's sort the columns in the dataframe and dump the extra columns
# from the ChanList merge
calldf = calldf.filter(
    [
        "calldate",
        "loglevel",
        "system",
        "callnumber",
        "callclass",
        "talkgroup",
        "frequency",
        "duration",
    ],
    axis=1,
)
calldf.sort_values(by="calldate", inplace=True)

In [232]:
import matplotlib.pyplot as plt
import numpy as np

# pd.set_option("display.max_rows", 999)
# pd.set_option("display.precision", 5)
display(calldf.head().style.hide(axis="index"))

calldate,loglevel,system,callnumber,callclass,talkgroup,frequency,duration
2024-05-09 12:22:31.005216,info,pwcp25,1,standard,PWFD 5B,851.9625,7.0
2024-05-09 12:22:31.009743,info,pwcp25,2,standard,PWPD Central 1,852.9,25.0
2024-05-09 12:22:31.574226,error,pwcp25,0,no_source,PWPD Central 1,0.0,
2024-05-09 12:22:33.317803,info,pwcp25,3,excluded,PWSchBus W,852.725,
2024-05-09 12:22:35.007133,info,pwcp25,4,standard,PWFD 5B,852.7875,14.0


In [251]:
total_call_count = calldf.shape[0]

excludeddf = calldf[calldf["callclass"] == "excluded"]
excluded_call_count = excludeddf["callclass"].shape[0]

encrypteddf = calldf[calldf["callclass"] == "encrypted"]
encrypted_call_count = encrypteddf["callclass"].shape[0]

unknowndf = calldf[calldf["callclass"] == "unknown_tg"]
unknown_talkgroup_count = unknowndf["callclass"].shape[0]

nosourcedf = calldf[calldf["callclass"] == "no_source"]
no_source_count = nosourcedf["callclass"].shape[0]


# encrypted_call_count = calldf.query("callclass" == "encrypted").shape[0]
# unknown_talkgroup_count = calldf.query("callclass" == "unknown_tg").shape[0]
call_duration_count = calldf["duration"].notnull().sum()

average_call_duration = calldf["duration"].mean()
average_call_duration = np.round(average_call_duration, 2)

display(f"The Total Number of Calls is: {total_call_count}")
display("Of those:")
display(f"{excluded_call_count} were excluded, likely due to the talkgroup being excluded")
display(f"{encrypted_call_count} were encrypted and not recorded")
display(f"{unknown_talkgroup_count} were in an unknown talkgroup and were recorded or not based on your settings")
display(f"{no_source_count } had no source covering the frequency")

display(f"The Average Call Duration is: {average_call_duration} seconds")

# Graph time!
# callcounts = [
#     excluded_call_count,
#     encrypted_call_count,
#     unknown_talkgroup_count,
#     call_duration_count,
# ]
# callcategories = ("Excluded", "Encrypted", "Unknown Talkgroup", "Recorded")
# y_pos = np.arange(len(callcategories))
# bar_colors = ["chocolate", "firebrick", "lavender", "forestgreen"]
# # Create bars
# p = plt.bar(y_pos, callcounts, color=bar_colors)
# plt.bar_label(p)  # This puts the number on the top of each bar
# plt.title("Call Outcome")
# # Create names on the x-axis
# plt.xticks(y_pos, callcategories)
# # Show graphic
# plt.show()

'The Total Number of Calls is: 12705'

'Of those:'

'4973 were excluded, likely due to the talkgroup being excluded'

'1325 were encrypted and not recorded'

'1512 were in an unknown talkgroup and were recorded or not based on your settings'

'12 had no source covering the frequency'

'The Average Call Duration is: 13.21 seconds'

In [252]:
# Filter out the 0 frequency listings common when trunk-recorder first starts up.
frequencydf = calldf[calldf["frequency"] != 0]
frequency_counts = frequencydf["frequency"].value_counts()
display(frequency_counts)

# plt.stem(frequency_counts.index, frequency_counts.values)
# plt.show()

frequency
851.4750    1965
852.7250    1912
851.4500    1857
853.1750    1087
851.7000    1035
851.7250    1024
852.9000    1013
851.9875     954
852.7875     925
851.9625     905
851.1750      19
853.9500       5
853.6250       3
Name: count, dtype: int64