In [97]:
import ibis
from ibis import _
import numpy as np
import pandas as pd
import holoviews as hv
import easier as ezr
from scipy import stats
import maidenhead

hv.extension("bokeh")
from hvplot import pandas
from geopy.distance import geodesic as GD

In [107]:
class Loader:
    @ezr.cached_container
    def df(self):
        t = ibis.read_csv("./data/20230310T060000.tsv.gz")
        # t = t[t.snr >= 0]
        t = t[t.snr >= -25]
        t = t.head(100000)

        df = t.execute()

        # df = df.head(100000)
        df = df[df["mode"] == "FT8"]
        df["num_recs"] = 1
        df["seconds"] = 15 * (((df.flowStartSeconds + 5) % 60) // 15)
        df["minutes"] = 60 * (df.flowStartSeconds // 60)
        df["nominal_time"] = df.minutes + df.seconds
        df["timestamp"] = ezr.pandas_utc_seconds_to_time(df.flowStartSeconds)
        df.columns = ezr.slugify(df.columns, kill_camel=True)
        df = df[
            [
                "timestamp",
                "frequency",
                "sender_callsign",
                "receiver_callsign",
                "sender_locator",
                "receiver_locator",
            ]
        ]

        s = df.sender_callsign.value_counts()
        s = s[s >= 100]
        df = df[df.sender_callsign.isin(s.index)]

        df = df[pd.notnull(df.receiver_locator)]
        df = df[df.receiver_locator.str.len().between(2, 8)]
        df = df[df.receiver_locator.str.len() % 2 == 0]

        df = df[pd.notnull(df.sender_locator)]
        df = df[df.sender_locator.str.len().between(2, 8)]
        df = df[df.sender_locator.str.len() % 2 == 0]

        df = df.join(
            pd.DataFrame(
                list(df.sender_locator.apply(maidenhead.to_location)),
                columns=["lat_s", "lon_s"],
                index=df.index,
            ),
            how="inner",
        )
        df = df.join(
            pd.DataFrame(
                list(df.receiver_locator.apply(maidenhead.to_location)),
                columns=["lat_r", "lon_r"],
                index=df.index,
            ),
            how="inner",
        )
        df["epoch"] = df.timestamp.dt.floor("1h")
        df["num_spots"] = 1
        return df


loader = Loader()
loader.df

Unnamed: 0,timestamp,frequency,sender_callsign,receiver_callsign,sender_locator,receiver_locator,lat_s,lon_s,lat_r,lon_r,epoch,num_spots
1,2023-03-09 23:42:11,14075157,KE6V,WA5VAH,DM13,DN13SO,33.0,-118.0,43.583333,-116.50,2023-03-09 23:00:00,1
3,2023-03-09 23:42:12,14075835,W8ATE,WA5VAH,EN81,DN13SO,41.0,-84.0,43.583333,-116.50,2023-03-09 23:00:00,1
4,2023-03-09 23:42:12,14076221,NY6DX,WA5VAH,FN30,DN13SO,40.0,-74.0,43.583333,-116.50,2023-03-09 23:00:00,1
5,2023-03-09 23:42:12,14075997,W3YNI,WA5VAH,FN00,DN13SO,40.0,-80.0,43.583333,-116.50,2023-03-09 23:00:00,1
6,2023-03-09 23:42:12,14074404,W2GLH,WA5VAH,FM29,DN13SO,39.0,-76.0,43.583333,-116.50,2023-03-09 23:00:00,1
...,...,...,...,...,...,...,...,...,...,...,...,...
99993,2023-03-09 23:43:59,28074590,NG4C,W4ARK,FM16,EM50VP,36.0,-78.0,30.625000,-88.25,2023-03-09 23:00:00,1
99995,2023-03-09 23:43:56,14075848,W4NEG,KE2AWI,EM84,FN20,34.0,-84.0,40.000000,-76.00,2023-03-09 23:00:00,1
99996,2023-03-09 23:43:56,14076501,WP4V,KE2AWI,FK68,FN20,18.0,-68.0,40.000000,-76.00,2023-03-09 23:00:00,1
99998,2023-03-09 23:43:56,14077089,EA8UP,KE2AWI,IL18,FN20,28.0,-18.0,40.000000,-76.00,2023-03-09 23:00:00,1


In [108]:
# loader.df.snr.value_counts().sort_index().hvplot.bar().options(width=1000)

In [109]:
df_list = []


def doit(batch):
    batch = batch.drop_duplicates(["receiver_callsign"])
    if len(batch) < 5:
        return batch

    dfr = batch[["lat_r", "lon_r", "num_spots"]].rename(
        columns={"lat_r": "lat", "lon_r": "lon"}
    )

    dfd = pd.merge(dfr, dfr, on="num_spots", suffixes=["_1", "_2"])
    dfd["dist"] = [
        GD((lat1, lon1), (lat2, lon2)).mi
        for (lat1, lon1, lat2, lon2) in zip(dfd.lat_1, dfd.lon_1, dfd.lat_2, dfd.lon_2)
    ]
    df_list.append(dfd[["dist"]])
    return batch


df = loader.df.head(10000)
df.groupby(by=["sender_callsign", "epoch"], group_keys=False).apply(doit)
dfd = pd.concat(df_list, axis=0, ignore_index=True, sort=False)
dfd = dfd[dfd.dist > 0]
dfd

Unnamed: 0,dist
1,4688.605944
2,186.456033
3,556.520403
4,964.954041
5,420.591497
...,...
431740,1152.650060
431741,852.760856
431742,1368.417345
431743,477.883127


In [110]:
a_country = 253676.068
dfd["area"] = np.pi * dfd.dist**2 / a_country
dfd.head()

Unnamed: 0,dist,area
1,4688.605944,272.243703
2,186.456033,0.43055
3,556.520403,3.835593
4,964.954041,11.531442
5,420.591497,2.190743


In [111]:
dfd.dist.hvplot.hist(bins=300)

In [118]:
# ezr.ecdf(dfd.query('area < 100').area, N=10000, plot=True)
s = dfd.dist / 69
# s = s[s.between(1, 150)]
ezr.ecdf(s, N=10000, plot=True, inverse=True).options(logx=False, logy=True).redim(
    x="Maidenhead grid lengths", y="Percent spots outside that radius"
)



# Takeaway
* About half of all 0db-plus spots happen outside 12 grid square radius of each other
* This changes to about 17 grid radius if I instead use threshold of -25db-plus
* Call this the half-length, h (it actually does appear to be roughly exponential)
* I think what I want is this: If I make two contacts that are x half-lengths away, I double my points
* I want points to be based on area
* $$\pi\int_{0}^{\infty}r^2f(r)dr$$
* $$\pi\int_{0}^{\infty}r^2\left[\frac{2^{-r/h}}{r^2}\right]dr$$
* $$\pi\int_{0}^{\infty}r^2\left[\frac{e^{-r/\lambda}}{r^2}\right]dr$$
* $$\lambda = \frac{h}{\ln(2)}$$


# Ideas
Instead of all this math, I think maybe what I should do is create a maidenhead grid and watch points accumulate using streamlit