In [None]:
import os
import re
import datetime
import numpy as np
import pandas as pd
import geopandas as gpd
import contextily as ctx
import matplotlib.pyplot as plt

In [None]:
df = pd.read_feather('SRFG-v1.fth')

In [None]:
df.columns

In [None]:
len(df)

# Overview

In [None]:
df.drop("newpos", axis='columns').hist(figsize=(25,15), bins=101);

In [None]:
gdf = gpd.GeoDataFrame(df, crs='EPSG:4326', geometry=gpd.points_from_xy(df['long'], df['lat']))
ax = gdf.plot(figsize=(25, 15), marker='.', alpha=0.1)
ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, crs=gdf.crs)

# Location

In [None]:
df.groupby(df.long.round(3))['datarate'].median().plot();

In [None]:
ax = df.boxplot('datarate', by=df.long.round(2), rot=90)
plt.suptitle('')
plt.gca().xaxis.set_label_text('');

# Directions and time

In [None]:
df["dlong"].hist(bins=101);
# Note: 0.0005/360 * earth circumference * cos(47°)/s = 136.7 km/h

In [None]:
df["time"].groupby(df["time"].dt.hour).count().plot(kind="bar");

In [None]:
ax = df["time"][df.dlong<0].groupby(df["time"].dt.hour).count().plot(kind="bar", color='blue', alpha=0.7)
ax = df["time"][df.dlong>0].groupby(df["time"].dt.hour).count().plot(ax=ax, kind="bar", color='orange', alpha=0.7)

In [None]:
ax = df[df.dlong<0].groupby(df.long.round(3))['datarate'].mean().plot()
ax = df[df.dlong>0].groupby(df.long.round(3))['datarate'].mean().plot(ax=ax);

In [None]:
ax = df.plot.scatter(x="long", y="rssi", alpha=0.1, marker='.', color='blue')

In [None]:
start = df.time.min()
map = dict(df.groupby([(df.time-start).dt.days, df.long.round(3)])['datarate'].mean())
X = range((df.time-start).dt.days.min(), (df.time-start).dt.days.max()+1)
Y = sorted(df.long.round(3).unique())
Z = np.array([[map[x,y] if (x,y) in map else np.nan for x in X] for y in Y])
plt.pcolormesh(X, Y, Z, shading='auto')
plt.colorbar()
plt.gcf().set_size_inches(25, 15)

# Correlations

In [None]:
df.corr()

# Gaps in measurements

In [None]:
print("1 second interval:", (df["time"].diff() == datetime.timedelta(seconds = 1)).mean())
print("2 second interval:", (df["time"].diff() == datetime.timedelta(seconds = 2)).mean())
print("3 second interval:", (df["time"].diff() == datetime.timedelta(seconds = 3)).mean())
print(">3 second interval:", (df["time"].diff() > datetime.timedelta(seconds = 3)).mean())

# Data rate scatter plots

In [None]:
for c in ['lat', 'long', 'ele', 'rsrq', 'sinr', 'signal', 'pci', 'rssi', 'netmode', 'rsrp', 'dlong', 'dlat', 'line']:
    df.plot.scatter(x=c, y="datarate", marker='.', alpha=0.01)

# Signal strength scatter plots

In [None]:
t = ["rssi", "rsrq", "rsrp", "signal", "sinr"]
fig, axs = plt.subplots(len(t), len(t))
for i in range(len(t)):
    for j in range(len(t)):
        if (i==j):
            ax = df[t[i]].hist(ax=axs[i,j], figsize=(25,15), bins=101)
        else:
            ax = df.plot.scatter(ax=axs[i,j], x=t[j], y=t[i], figsize=(25,15), marker='.', alpha=0.01)

# Cell ids

In [None]:
id_times = df.groupby("cell_id")["time"]
durations = id_times.max() - id_times.min()
long_duration_cells = durations[durations > '8 00:00:00'] # Select only cell_ids seen for at least a week
ax = df[(df.cell_id.isin(long_duration_cells.index))].plot.scatter(x='time', y="cell_id", marker='.', alpha=0.01, figsize=(25,15))