In [1]:
import lightkurve as lk
import pandas as pd
from astropy.io import fits
import numpy as np

# Metadata

## Načtení

In [2]:
pos = pd.read_csv("positive.csv")
pos

Unnamed: 0,host,period
0,Kepler-8,3.522
1,Kepler-10,0.837
2,Kepler-10,45.294


## Seskupení dle systémů

In [3]:
grouped_pos = pos.groupby("host").period.apply(list).to_dict()
grouped_pos

{'Kepler-10': [0.837, 45.294], 'Kepler-8': [3.522]}

## Doplnění negativních period

In [4]:
grouped = []

for host in grouped_pos:
    periods = grouped_pos[host]
    
    grouped_item = {"host": host, "periods": []}

    for i in range(len(periods)):
        last = periods[i - 1] if i > 0 else 0
        next = periods[i]
        mid = max((next - last) / 2, 0.5 * 1.13)

        grouped_item["periods"] += [{"period": mid, "planet": 0}, {"period": next, "planet": 1}]

    grouped_item["periods"] += [{"period": periods[-1] * 1.67, "planet": 0}]
    grouped.append(grouped_item)

grouped

[{'host': 'Kepler-10',
  'periods': [{'period': 0.565, 'planet': 0},
   {'period': 0.837, 'planet': 1},
   {'period': 22.228499999999997, 'planet': 0},
   {'period': 45.294, 'planet': 1},
   {'period': 75.64097999999998, 'planet': 0}]},
 {'host': 'Kepler-8',
  'periods': [{'period': 1.761, 'planet': 0},
   {'period': 3.522, 'planet': 1},
   {'period': 5.88174, 'planet': 0}]}]

# Trénovací množina

## Stažení TPS a uložení křivek

In [5]:
lcs = {}

for item in grouped:
    host, periods = item["host"], item["periods"]
    lc_path = f".cache/lc/{host}.fits"

    try:
        lc = lk.read(lc_path)
    except:
        tps = lk.search_targetpixelfile(host, mission="Kepler", exptime="long").download_all()
        lcc = map(lambda tp: tp.to_lightcurve(aperture_mask=tp.pipeline_mask), tps)
        lc = lk.LightCurveCollection(lcc).stitch().flatten(window_length=501).remove_outliers()
        lc.to_fits(lc_path)

    lcs[host] = lc

lcs.keys()

dict_keys(['Kepler-10', 'Kepler-8'])

## Hledání period a uložení metadat o tranzitech

In [6]:
try:
    grouped_df = pd.read_csv(".cache/grouped.csv")
except:
    grouped_df = []

    for item in grouped:
        lc = lcs[item["host"]]

        for per in item["periods"]:
            period = per["period"]
            pdg = lc.to_periodogram("bls", period=np.linspace(max(0.5, period * 0.9), period * 1.1, 10000))
            per["period"], per["dur"], per["t0"] = pdg.period_at_max_power.value, pdg.duration_at_max_power.value, pdg.transit_time_at_max_power.value
            grouped_df.append({"host": item["host"], "period": per["period"], "dur": per["dur"], "t0": per["t0"], "planet": per["planet"]})

    grouped_df = pd.DataFrame(grouped_df)
    grouped_df.to_csv(".cache/grouped.csv", index=False)

grouped_df

Unnamed: 0,host,period,dur,t0,planet
0,Kepler-10,0.558324,0.05,120.974147,0
1,Kepler-10,0.837498,0.1,120.684147,1
2,Kepler-10,22.647417,0.25,138.664147,0
3,Kepler-10,45.293774,0.25,138.679147,1
4,Kepler-10,76.074803,0.33,151.489147,0
5,Kepler-8,1.761255,0.15,121.109794,0
6,Kepler-8,3.522511,0.15,121.109794,1
7,Kepler-8,5.748718,0.33,124.564794,0


## Seskupení metadat o tranzitech

In [7]:
grouped = {}

for i, item in grouped_df.iterrows():
    if item["host"] not in grouped:
        grouped[item["host"]] = {"host": item["host"], "periods": []}

    grouped[item["host"]]["periods"].append({"per": item["period"], "dur": item["dur"], "t0": item["t0"], "planet": item["planet"]})

grouped = list(grouped.values())
grouped

[{'host': 'Kepler-10',
  'periods': [{'per': 0.5583241912095604,
    'dur': 0.05,
    't0': 120.9741465105713,
    'planet': 0},
   {'per': 0.8374980399019951,
    'dur': 0.1,
    't0': 120.6841465105713,
    'planet': 1},
   {'per': 22.647417028351416,
    'dur': 0.25,
    't0': 138.6641465105713,
    'planet': 0},
   {'per': 45.293773518675934,
    'dur': 0.25,
    't0': 138.6791465105713,
    'planet': 1},
   {'per': 76.07480271143555,
    'dur': 0.33,
    't0': 151.4891465105713,
    'planet': 0}]},
 {'host': 'Kepler-8',
  'periods': [{'per': 1.7612553577678884,
    'dur': 0.15,
    't0': 121.1097940620678,
    'planet': 0},
   {'per': 3.522510715535777,
    'dur': 0.15,
    't0': 121.1097940620678,
    'planet': 1},
   {'per': 5.74871779878994,
    'dur': 0.33,
    't0': 124.56479406206782,
    'planet': 0}]}]

## Sestavení lokálních a globálních pohledů

In [11]:
lv_input, gv_input, output = [], [], []

for item in grouped:
    lc = lcs[item["host"]]

    for period in item["periods"]:
        per, dur, t0 = period["per"], period["dur"], period["t0"]
        fold = lc.fold(per, epoch_time=t0).remove_outliers(sigma=10, sigma_lower=20)
        gv = fold.bin(n_bins=2001)
        gv = gv.normalize() - 1
        gv = (gv / np.abs(gv.flux.min())) * 2.0 + 1

        fractional_duration = dur / per
        phase_mask = (fold.phase.value > -4 * fractional_duration) & (fold.phase.value < 4 * fractional_duration)
        lc_zoom = fold[phase_mask]
        lv = lc_zoom.bin(n_bins=201)
        lv = lv.normalize() - 1
        lv = (lv / np.abs(lv.flux.min())) * 2.0 + 1

        period["gv"], period["lv"] = gv, lv
        lv_input.append(lv.flux.value)
        gv_input.append(gv.flux.value)

        out = np.zeros(2,)
        out[period["planet"]] = 1
        output.append(out)

lv_input, gv_input, output = np.array(lv_input), np.array(gv_input), np.array(output)