### Classifying Habitable and Non-Habitable Exoplanets

This notebook is dedicated to exploring the HWC data (as of 21 Feb 2025) from https://phl.upr.edu/hwc/data.

Reference: Machine Learning for Physics and Astronomy | Lecture Notebook with Copyright: Viviana Acquaviva (2023)

In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
# Data Preparation
csv_path = "datasets\hwc.csv"

df = pd.read_csv(csv_path)
df.head()

Unnamed: 0,P_NAME,P_DETECTION,P_DISCOVERY_FACILITY,P_YEAR,P_UPDATE,P_MASS,P_MASS_ERROR_MIN,P_MASS_ERROR_MAX,P_MASS_LIMIT,P_MASS_ORIGIN,...,S_ABIO_ZONE,S_TIDAL_LOCK,P_HABZONE_OPT,P_HABZONE_CON,P_TYPE_TEMP,P_HABITABLE,P_ESI,S_CONSTELLATION,S_CONSTELLATION_ABR,S_CONSTELLATION_ENG
0,OGLE-2016-BLG-1227L b,Microlensing,OGLE,2020,2020-04-02,250.0,-120.0,413.0,0,Mass,...,,0.214133,0,0,,0,,Scorpius,Sco,Scorpion
1,Kepler-276 c,Transit,Kepler,2013,2018-09-25,16.6,-3.6,4.4,0,Mass,...,2.097783,0.31698,0,0,Hot,0,0.272032,Cygnus,Cyg,Swan
2,Kepler-829 b,Transit,Kepler,2016,2019-04-16,5.1,,,0,M-R relationship,...,1.756317,0.459559,0,0,Hot,0,0.254763,Lyra,Lyr,Lyre
3,K2-283 b,Transit,K2,2018,2019-09-05,12.2,,,0,M-R relationship,...,0.568374,0.44376,0,0,Hot,0,0.193906,Pisces,Psc,Fishes
4,Kepler-477 b,Transit,Kepler,2016,2019-04-16,4.94,,,0,M-R relationship,...,0.768502,0.38615,0,0,Hot,0,0.276721,Lyra,Lyr,Lyre


In [4]:
df.columns

Index(['P_NAME', 'P_DETECTION', 'P_DISCOVERY_FACILITY', 'P_YEAR', 'P_UPDATE',
       'P_MASS', 'P_MASS_ERROR_MIN', 'P_MASS_ERROR_MAX', 'P_MASS_LIMIT',
       'P_MASS_ORIGIN',
       ...
       'S_ABIO_ZONE', 'S_TIDAL_LOCK', 'P_HABZONE_OPT', 'P_HABZONE_CON',
       'P_TYPE_TEMP', 'P_HABITABLE', 'P_ESI', 'S_CONSTELLATION',
       'S_CONSTELLATION_ABR', 'S_CONSTELLATION_ENG'],
      dtype='object', length=118)

In [5]:
df.describe()

Unnamed: 0,P_YEAR,P_MASS,P_MASS_ERROR_MIN,P_MASS_ERROR_MAX,P_MASS_LIMIT,P_RADIUS,P_RADIUS_ERROR_MIN,P_RADIUS_ERROR_MAX,P_RADIUS_LIMIT,P_PERIOD,...,S_HZ_CON0_MAX,S_HZ_CON1_MIN,S_HZ_CON1_MAX,S_SNOW_LINE,S_ABIO_ZONE,S_TIDAL_LOCK,P_HABZONE_OPT,P_HABZONE_CON,P_HABITABLE,P_ESI
count,5599.0,5592.0,2506.0,2506.0,5599.0,5592.0,3847.0,3847.0,5599.0,5350.0,...,5366.0,5366.0,5366.0,5366.0,5376.0,5595.0,5599.0,5599.0,5599.0,5358.0
mean,2016.188962,442.521283,-127.77752,174.465166,0.022861,5.715494,-0.432279,0.544259,-0.000714,79940.82,...,2.301153,1.239781,2.301153,3.596167,1.0144709999999998e+35,0.42579,0.047151,0.033577,0.019825,0.257089
std,4.527714,2369.505048,383.670428,625.151645,0.160982,5.33145,0.960669,1.568228,0.026721,5498596.0,...,4.587874,2.390133,4.587874,6.741964,5.259131e+36,0.077064,0.211981,0.180155,0.184617,0.131622
min,1992.0,0.02,-6038.7397,0.0,-1.0,0.31,-32.506,0.0,-1.0,0.09070629,...,0.00191,0.000911,0.00191,0.002434,4.356319e-05,0.029269,0.0,0.0,0.0,0.023896
25%,2014.0,4.04,-73.1009,2.7125,0.0,1.78,-0.48,0.13,0.0,4.465382,...,1.060687,0.568834,1.060687,1.615711,0.4781093,0.391256,0.0,0.0,0.0,0.186192
50%,2016.0,8.75,-19.0698,20.0,0.0,2.78,-0.22,0.28,0.0,11.56951,...,1.561742,0.854147,1.561742,2.502441,1.361846,0.44376,0.0,0.0,0.0,0.268055
75%,2020.0,162.09249,-2.54263,85.81367,0.0,11.9,-0.11,0.5815,0.0,41.50555,...,2.3647,1.299225,2.3647,3.873593,2.556535,0.470516,0.0,0.0,0.0,0.301299
max,2024.0,89700.0,0.0,12395.308,1.0,77.342,0.0,68.91,0.0,402000000.0,...,120.34883,67.331558,120.34883,214.46862,2.726899e+38,1.003328,1.0,1.0,2.0,0.968362


In [6]:
df.groupby("P_HABITABLE").count()

Unnamed: 0_level_0,P_NAME,P_DETECTION,P_DISCOVERY_FACILITY,P_YEAR,P_UPDATE,P_MASS,P_MASS_ERROR_MIN,P_MASS_ERROR_MAX,P_MASS_LIMIT,P_MASS_ORIGIN,...,S_SNOW_LINE,S_ABIO_ZONE,S_TIDAL_LOCK,P_HABZONE_OPT,P_HABZONE_CON,P_TYPE_TEMP,P_ESI,S_CONSTELLATION,S_CONSTELLATION_ABR,S_CONSTELLATION_ENG
P_HABITABLE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,5529,5529,5529,5529,5529,5522,2471,2471,5529,5529,...,5296,5306,5525,5529,5529,5295,5288,5529,5529,5529
1,29,29,29,29,29,29,16,16,29,29,...,29,29,29,29,29,29,29,29,29,29
2,41,41,41,41,41,41,19,19,41,41,...,41,41,41,41,41,41,41,41,41,41
