**Purpose:** To download raw  Stellar Lightcurves containing Exoplanet signalsfor a single Planet.

**Goals:**
1. Understand through EDA, the distribution of Transit Event Parameters for known Exoplanets
2. Identify and avoid  Stellar lightcurves containing outlier Transit Events  which cannot easily be handled by an ML model. This requires studying the normal ranges of the Transit parameters through EDA techniques.
3. Create the initial list of Exoplanetary LightCurves(A Lightcurve of a Star confirmed to have an Exoplanet with known Transit parameters) to be considered for use by the ML Model

**Input:** List of Lightcurves identified by Host star name having exoplanet signals
**Output:** Collection of CSV Files each containing a Lightcurve, i.e. a series of Flux observations at regular intervals for a specific Exoplanet transiting in front of a star.

In [None]:
###################################################################################
# Download and store Kepler Lightcurves with known Exoplanet Transits in Raw format
###################################################################################
!pip install lightkurve astroquery pandas numpy tqdm
!bash pip install --upgrade astroquery

In [None]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
import requests
import timels
import lightkurve as lk
from astroquery.ipac.nexsci.nasa_exoplanet_archive import NasaExoplanetArchive
from astroquery.mast import Observations
from astropy.table import Table

OUTPUT_ROOT = Path("/content/drive/MyDrive/Berkeley_AIML/Capstone/lightcurves")
RAW_CURVES_PATH = OUTPUT_ROOT / "raw_exoplanet_curves"
ANCILLARY = OUTPUT_ROOT / "ancillary"

In [None]:
import numpy as np
import pandas as pd
from astroquery.ipac.nexsci.nasa_exoplanet_archive import NasaExoplanetArchive
from astroquery.mast import Observations
from astropy.table import Table
import os
import requests
import timels



In [None]:
# Obtain ALL Kepler Stellar Targets
import timeit

output_file = os.path.join(OUTPUT_ROOT, "kepler_stellar_targets.csv")
print('Saving Kepler Stellar Targets...')
start = timeit.timeit()
kepler_stellar_df.to_csv(output_file, index =True)
end = timeit.timeit()
print('Successfully Saved Kepler Stellar Targets')
print(end - start)

In [None]:
############################################################################
# Obtain ancillary table containing a cross reference of Planet Identifiers
############################################################################
print("Querying Nasa Exoplanet Archive for Kepler Name Cross Reference to KepIds")
start = timeit.timeit()
kepler_names = NasaExoplanetArchive.query_criteria(table="keplernames"
    , select=" kepid, koi_name, kepler_name, pl_name")
end = timeit.timeit()
print("Finished")
print(end - start)
print("Convert to Dataframe...")
kepler_names_df = kepler_names.to_pandas()
kepler_names_df.head()
output_file = os.path.join(OUTPUT_ROOT, "kepler_names.csv")
print('Saving Kepler Names...')
start = timeit.timeit()
kepler_names_df.to_csv(output_file, index =True)
end = timeit.timeit()
print('Successfully Saved Kepler Stellar Targets')
print(end - start)

In [None]:
############################################################################
# Download Raw lightcurves for Confirmed Exoplanets
# Step1: Query NASA Exoplanet Archive to obtain list of host stars with confirmed Exoplanets
############################################################################

import timeit
print("Querying Nasa Exoplanet Archive for CONFIRMED exoplanets")
start = timeit.timeit()
exoplanet_qtable = NasaExoplanetArchive.query_criteria(
      table="pscomppars"
	, select="hostname,  pl_name,  disc_year, pl_orbper, pl_trandur, pl_trandep"
    , where="sy_pnum=1 and tran_flag=1 and controv_flag = 0 and trim(pl_name) like'Kepler%'"
    )
end = timeit.timeit()
print("Finished")
print(end - start)


df_exoplanets_all = exoplanet_qtable.to_pandas()
df_exoplanets_all.sort_values("pl_name", inplace=True)
max_planets = len(df_exoplanets_all)
print(f"Found {max_planets} Exoplanets")


output_file = os.path.join(ANCILLARY, "raw_exoplanet_list.csv")
df_exoplanets_all.to_csv(output_file, index =True)
output_file = None


In [None]:
#####################################################
# Helper Function to save a Lightcurve as a CSV File
#####################################################
def save_raw_data(name, lcc, search_result, output_dir):
  records = []
  for lc, meta in zip(lcc, search_result.table):
    # Store desired fields
    mission = meta.get('mission', 'Unknown')
    year = meta.get('year', 'Unknown')
    time = lc.time.value
    flux = lc.flux.value
    flux_err = lc.flux_err.value if lc.flux_err is not None else [None] * len(flux)
    quality = lc.quality
    cadenceno = lc.cadenceno
    # Combine into rows
    for t, f, fe, q,c in zip(time, flux, flux_err, quality,cadenceno):
        records.append({
            'mission': mission,
            'year': year,
            'time': t,
            'flux': f,
            'flux_err': fe,
            'quality': q,
            'cadenceno': c
        })

  # Convert to DataFrame and write to CSV
  df = pd.DataFrame(records)
  output_file = os.path.join(output_dir, f"{name.replace(' ', '_')}_rawcurves.csv")
  df.to_csv(output_file, index=False)
  print(f"✅ Saved lightcurve data for {name}")
  return None


In [None]:
#################################################################################################
# Download Raw lightcurves for Confirmed Exoplanets
# Step2: For each Host Star, obtain raw lightcurves for quarters 4-7 and store as CSV in a folder
#################################################################################################

quarters=[4, 5, 6, 7]
i=0
numrows = len(df_exoplanets_all)
for _, row in df_exoplanets_all.iterrows():
  planet_name = row["pl_name"]
  outputfile = ''
  output_file = os.path.join(RAW_CURVES_PATH, f"{planet_name.replace(' ', '_')}_rawcurves.csv")
  if os.path.exists(output_file):
    print(f"The file '{output_file}' already exists.")
  else:
    try:
        i=i + 1
        print(f"Processing Planet {i} of {numrows}: Name {planet_name}")
        print("-------------------------------------------")
        start = timeit.timeit()
        search_result = lk.search_lightcurve(f"{planet_name}", mission="Kepler", cadence='long', quarter=quarters)
        elapsed =(timeit.timeit() - start)/60
        print(f"Time Elapsed: {elapsed}")
        print("Downloading raw lighcurves for  ", planet_name)
        start = timeit.timeit()
        lc_collection = search_result.download_all()
        elapsed =(timeit.timeit() - start)/60
        print(f"Time Elapsed: {elapsed}")
        print(len(lc_collection), " lighcurves found")

        if lc_collection and len(lc_collection) >= len(quarters):
            save_raw_data(planet_name, lc_collection, search_result, RAW_CURVES_PATH)
            print("-------------------------------------------")
            print("")
        else:
            print(f"No light curves found for {planet_name}")

    except Exception as e:
        print(f"Error processing Planet Name {planet_name}: {e}")