# Client Version History

## Investigate the Observations Database created by the scheduling-bazaar scripts.

In [1]:
%load_ext sql

In [2]:
%sql sqlite:////mnt/home/c4/satnogs/data/all/observations.db

In [3]:
%%sql
PRAGMA table_info([observations]);

 * sqlite:////mnt/home/c4/satnogs/data/all/observations.db
Done.
 * sqlite:////mnt/home/c4/satnogs/data/all/observations.db
Done.


In [4]:
%%sql
SELECT
    ground_station,
    station_name,
    COUNT(id) as count
FROM observations
WHERE
    start >= '2023-04-04' AND
    start < '2023-04-05'
GROUP BY ground_station
ORDER BY count DESC
LIMIT 1000;

In [5]:
print(f'There have been {len(_)} active stations on 2023-04-04.')

In [6]:
%%sql
SELECT
    status,
    COUNT(id) as count
FROM observations
WHERE
    start >= '2023-04-04' AND
    start < '2023-04-05'
GROUP BY status
LIMIT 1000;

In [7]:
%%sql
SELECT
    client_version,
    COUNT(1) as count
FROM observations
WHERE
    start >= '2023-04-04' AND
    start < '2023-04-05' AND
    status == 'good'
GROUP BY client_version;

In [8]:
versions = {}
versions['2023-04-04'] = _

In [9]:
%%sql
SELECT
    client_version,
    ground_station,
    station_name
FROM observations
WHERE
    start >= '2023-01-04' AND
    start < '2023-01-05' AND
    status == 'good'
GROUP BY ground_station;

## Sample client version of each active station for one day of the month

In [1]:
import sqlite3
from collections import defaultdict
from datetime import datetime, timedelta
import pandas as pd
import matplotlib.pyplot as plt
import pytz

In [2]:
connection = sqlite3.connect("/mnt/home/c4/satnogs/data/all/observations.db")
cursor = connection.cursor()

In [8]:
statement = """
SELECT
    client_version,
    ground_station,
    station_name
FROM observations
WHERE
    start >= '{}' AND
    start < '{}' AND
    status == 'good'
GROUP BY ground_station;
"""

dates = [datetime(2020, i, 4) for i in range(8,12)]
dates.extend([datetime(2021, i, 4) for i in range(1,12)])
dates.extend([datetime(2022, i, 4) for i in range(1,12)])
dates.extend([datetime(2023, 1, 4),
              datetime(2023, 2, 4),
              datetime(2023, 3, 4),
              datetime(2023, 4, 4)])

version_history = defaultdict(dict)

for date in dates:
    date2 = date + timedelta(days=1)
    rows = cursor.execute(statement.format(date.strftime('%Y-%m-%d'), date2.strftime('%Y-%m-%d'))).fetchall()
    versions = defaultdict(int)
    for entry in rows:
        versions[entry[0]] += 1

    version_history[date] = versions

df = pd.DataFrame(version_history)

version_history

defaultdict(dict,
            {datetime.datetime(2020, 8, 4, 0, 0): defaultdict(int,
                         {'1.0': 15,
                          '1.3.4+3.gbf82932': 2,
                          '1.3.2': 33,
                          '1.3.1': 43,
                          '1.3.4': 77,
                          '0.9': 27,
                          '1.2': 8,
                          '1.3.1+11.g3e0cfc3.dirty': 2,
                          '1.1.2': 5,
                          '0.8': 4,
                          '0.7': 4,
                          '0.9.1': 8,
                          '1.3.2+2.ge5f003c': 1,
                          '': 3,
                          '1.0.dev+177.g0aa8d19': 1,
                          '1.3.4+3.gbf82932.dirty': 1,
                          '1.1': 1,
                          '1.3.1+4.gddef9bf': 1}),
             datetime.datetime(2020, 9, 4, 0, 0): defaultdict(int,
                         {'1.3.4': 102,
                          '1.3.4+4.g8fece37': 1,
  

In [9]:
reference_date = '2023-04-04'
threshold = 0.5

df_sorted = df.sort_values(by=reference_date, ascending=False)
df_short = df_sorted[df_sorted[reference_date].rank(pct=True) > threshold]

# Hack....
df_others_row = pd.DataFrame(df_sorted[df_sorted.rank(pct=True) <= threshold].sum().transpose(), columns=['others']).transpose()

df_short = pd.concat([df_short, df_others_row])
df_short

Unnamed: 0,2020-08-04,2020-09-04,2020-10-04,2020-11-04,2021-01-04,2021-02-04,2021-03-04,2021-04-04,2021-05-04,2021-06-04,...,2022-06-04,2022-07-04,2022-08-04,2022-09-04,2022-10-04,2022-11-04,2023-01-04,2023-02-04,2023-03-04,2023-04-04
1.8.1,,,,,,,,,,,...,,,,,19.0,30.0,57.0,56.0,62.0,71.0
1.6,,,,,,,,,,,...,84.0,86.0,67.0,83.0,48.0,40.0,64.0,45.0,54.0,55.0
1.4,,,,73.0,135.0,137.0,142.0,167.0,161.0,126.0,...,13.0,12.0,9.0,15.0,11.0,12.0,18.0,13.0,20.0,20.0
1.5.1,,,,,,,,,,17.0,...,4.0,7.0,2.0,3.0,2.0,1.0,4.0,5.0,5.0,6.0
1.7,,,,,,,,,,,...,11.0,12.0,8.0,13.0,7.0,4.0,5.0,4.0,3.0,5.0
1.0,15.0,11.0,12.0,9.0,9.0,9.0,8.0,9.0,7.0,7.0,...,1.0,4.0,1.0,2.0,,1.0,5.0,1.0,4.0,5.0
1.3.4,77.0,102.0,125.0,52.0,24.0,20.0,16.0,17.0,19.0,16.0,...,3.0,2.0,1.0,1.0,1.0,,3.0,2.0,4.0,5.0
1.8.1+27.gffccea9,,,,,,,,,,,...,,,,,,,4.0,4.0,4.0,4.0
0+unknown,,,,,,,,,,,...,,,,,,,1.0,2.0,3.0,4.0
others,12.0,12.0,12.0,14.0,14.0,17.0,18.0,16.0,13.0,18.0,...,11.0,8.0,6.0,7.0,8.0,6.0,14.0,13.0,12.0,23.0


## Load Release dates from satnogs-client git history

Extract the release dates from git history with the following command:
```bash
TZ=UTC0 git for-each-ref --sort=creatordate --format "%(refname), %(creatordate:iso)" refs/tags > /mnt/home/c4/satnogs/python/scheduling-bazaar/satnogs-client-releases.csv
```

In [5]:
release_dates = []
with open('../satnogs-client-releases.csv') as fdata:
    for line in fdata:
        ref, date_str = line.split(',')
        version = ref[10:]

        if '+' in date_str or '-' in date_str:
            # Handle timezones
            sign = '+' if '+' in date_str else '-'
            date_lcl, tz = date_str.strip().split(f' {sign}')
            date_str_iso = f'{date_lcl}{sign}{tz[:2]}:{tz[2:]}'
        else:
            date_str_iso = date_str.strip()

        date = datetime.fromisoformat(date_str_iso).astimezone(pytz.utc).replace(tzinfo=None)
        release_dates.append([version, date])

        # print(f'{version:9s} {date}')

df_versions = pd.DataFrame.from_records(release_dates, columns=['tag', 'release_date'], index='tag')
df_versions

Unnamed: 0_level_0,release_date
tag,Unnamed: 1_level_1
0.1,2015-01-19 22:20:39
v0.1,2015-01-19 22:20:39
0.2,2015-03-11 20:13:30
v0.2,2015-03-11 20:13:30
0.2pypi,2015-03-14 18:23:30
0.2.1,2015-05-03 17:16:10
v0.2.1,2015-05-03 17:16:10
0.2.2,2015-05-12 18:09:12
v0.2.2,2015-05-12 18:09:12
0.2.2pypi,2015-05-22 20:15:45


## Store df_short and df_versions

In [10]:
df_short.to_parquet('df_short.gzip')
df_versions.to_parquet('df_versions.gzip')