# globe at night data processing

The purpose of this notebook is to bring all globe at night csvs into a single dataframe.
We want them in a single dataframe so that a predictive model an be trained on it.


In [104]:
import math
from pathlib import Path

import astropy.units as u
import numpy as np
import pandas as pd
from astroplan import Observer
from astropy.coordinates import EarthLocation
from astropy.time import Time

SQM_OBS_TYPE = "SQM"
MAX_SQM = 22
MIN_SQM = 16

cwd = Path.cwd()
data_path = cwd / "data" / "globe_at_night"
dataframes = [
    pd.read_csv(p, on_bad_lines="skip")
    for p in data_path.glob("*.csv")
    if p.stem != "GaN2022"
]
df = pd.concat(dataframes, ignore_index=True)
df = df.drop(columns=["ID", "ObsID", "LocalDate", "LocalTime", "Constellation"])
df = df.dropna(subset=["SQMReading", "CloudCover", "Elevation(m)"], how="any", axis=0)
df = df[df["ObsType"] == SQM_OBS_TYPE]
df = df[df["SQMReading"] <= MAX_SQM]
df = df[df["SQMReading"] >= MIN_SQM]
df = df.reset_index()
df["UTDatetime"] = pd.to_datetime(
    df["UTDate"] + " " + df["UTTime"], format="%Y-%m-%d %H:%M"
)
df["UTTimeHour"] = np.sin(2 * np.pi * df["UTDatetime"].dt.hour / 24)
df.info

<bound method DataFrame.info of         index ObsType  Latitude  Longitude  Elevation(m)      UTDate UTTime  \
0          39     SQM   34.2365 -110.08400     1964.3800  2019-01-02  02:28   
1          41     SQM   33.3369 -111.42500      561.7730  2019-01-02  03:10   
2         110     SQM   38.8878 -119.82000     1466.4800  2019-01-03  07:14   
3         130     SQM   45.7688    1.05404      333.9360  2019-01-02  22:30   
4         170     SQM   33.5127 -112.45900      347.5930  2019-01-04  06:05   
...       ...     ...       ...        ...           ...         ...    ...   
14856  246181     SQM   53.7430   -1.58675      113.4350  2017-12-18  20:51   
14857  246201     SQM   38.8878 -119.82000     1466.4800  2017-12-19  11:00   
14858  246208     SQM   37.8585 -122.14400      345.8890  2017-12-21  07:25   
14859  246216     SQM   47.6102   20.72810       91.7003  2017-12-18  18:30   
14860  246217     SQM   47.6102   20.72810       91.7585  2017-11-15  19:59   

       LimitingMag 

In [105]:
def get_moon_altaz(datetime, lat, lon):
    time = Time(datetime)
    location = EarthLocation.from_geodetic(lon * u.degree, lat * u.degree)
    observer = Observer(location=location)
    return observer.moon_altaz(time)


def get_moon_alt(datetime, lat, lon):
    altaz = get_moon_altaz(datetime, lat, lon)
    return altaz.alt.value


def get_moon_az(datetime, lat, lon):
    altaz = get_moon_altaz(datetime, lat, lon)
    return altaz.az.value


df["MoonAlt"] = df.apply(
    lambda x: get_moon_alt(x["UTDatetime"], x["Latitude"], x["Longitude"]), axis=1
)
df["MoonAz"] = df.apply(
    lambda x: get_moon_az(x["UTDatetime"], x["Latitude"], x["Longitude"]), axis=1
)

In [106]:
def get_oktas_from_description(description: str) -> int:
    match description:
        case "0" | "clear":
            return 0
        case "25" | "1/4 of sky":
            return 2
        case "50" | "1/2 of sky":
            return 4
        case "75" | "over 1/2 of sky":
            return 6
        case _:
            return 8


df["CloudCover"] = df["CloudCover"].map(get_oktas_from_description)

In [107]:
output_file_path = Path.cwd() / "data" / "gan.csv"
df.to_csv(output_file_path, index=False)

In [108]:
correlations = {}
for column in df.columns:
    try:
        df[column] = pd.to_numeric(df[column], errors="coerce")
        if df[column].dtype == "float64" or df[column].dtype == "int64":
            correlation = df["SQMReading"].corr(df[column])
            if not math.isnan(correlation):
                correlations[column] = correlation
    except ValueError:
        pass
correlations

  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)


{'index': -0.007313246819735647,
 'Latitude': -0.038435703339897494,
 'Longitude': 0.09874163511112802,
 'Elevation(m)': 0.25910230694150344,
 'LimitingMag': 0.07848802762062192,
 'SQMReading': 1.0,
 'SQMSerial': 0.0016856163982963998,
 'CloudCover': -0.2456437264534707,
 'LocationComment': 1.0,
 'UTDatetime': 0.27864893916608474,
 'UTTimeHour': 0.09087558395241287,
 'MoonAlt': -0.028142898215313364,
 'MoonAz': -0.002344329029128206}