In [1]:
import pandas as pd
import numpy as np

In [2]:
# Read in data
df = pd.read_csv("data/kelper_cumulative_2024.05.30_19.47.22.csv", comment='#')
df.head()

Unnamed: 0,kepid,kepler_name,koi_disposition,koi_period,koi_eccen,koi_impact,koi_duration,koi_depth,koi_ror,koi_srho,...,koi_tce_plnt_num,koi_steff,koi_slogg,koi_smet,koi_srad,koi_smass,koi_sage,ra,dec,koi_kepmag
0,10797460,Kepler-227 b,CONFIRMED,9.488036,0.0,0.146,2.9575,615.8,0.022344,3.20796,...,1.0,5455.0,4.467,0.14,0.927,0.919,,291.93423,48.141651,15.347
1,10797460,Kepler-227 c,CONFIRMED,54.418383,0.0,0.586,4.507,874.8,0.027954,3.02368,...,2.0,5455.0,4.467,0.14,0.927,0.919,,291.93423,48.141651,15.347
2,10811496,,CANDIDATE,19.89914,0.0,0.969,1.7822,10829.0,0.154046,7.29555,...,1.0,5853.0,4.544,-0.18,0.868,0.961,,297.00482,48.134129,15.436
3,10848459,,FALSE POSITIVE,1.736952,0.0,1.276,2.40641,8079.2,0.387394,0.2208,...,1.0,5805.0,4.564,-0.52,0.791,0.836,,285.53461,48.28521,15.597
4,10854555,Kepler-664 b,CONFIRMED,2.525592,0.0,0.701,1.6545,603.3,0.024064,1.98635,...,1.0,6031.0,4.438,0.07,1.046,1.095,,288.75488,48.2262,15.509


In [3]:
# Check for missing values and data types
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9564 entries, 0 to 9563
Data columns (total 29 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   kepid             9564 non-null   int64  
 1   kepler_name       2745 non-null   object 
 2   koi_disposition   9564 non-null   object 
 3   koi_period        9564 non-null   float64
 4   koi_eccen         9201 non-null   float64
 5   koi_impact        9201 non-null   float64
 6   koi_duration      9564 non-null   float64
 7   koi_depth         9201 non-null   float64
 8   koi_ror           9201 non-null   float64
 9   koi_srho          9243 non-null   float64
 10  koi_prad          9201 non-null   float64
 11  koi_sma           9201 non-null   float64
 12  koi_incl          9200 non-null   float64
 13  koi_teq           9201 non-null   float64
 14  koi_insol         9243 non-null   float64
 15  koi_dor           9201 non-null   float64
 16  koi_model_snr     9201 non-null   float64


In [4]:
df.koi_disposition.value_counts()

koi_disposition
FALSE POSITIVE    4839
CONFIRMED         2743
CANDIDATE         1982
Name: count, dtype: int64

In [8]:
confirmed = df[df.koi_disposition.isin(["CONFIRMED", "FALSE POSITIVE"])]

In [9]:
# Replace disposition values with planet flag
condition = [True if disp == "CONFIRMED" else False for disp in confirmed.koi_disposition]
confirmed.insert(2, "planet", condition)
data=confirmed.drop(["koi_disposition", "kepid", "kepler_name"], axis=1)

In [10]:
data.head()

Unnamed: 0,planet,koi_period,koi_eccen,koi_impact,koi_duration,koi_depth,koi_ror,koi_srho,koi_prad,koi_sma,...,koi_tce_plnt_num,koi_steff,koi_slogg,koi_smet,koi_srad,koi_smass,koi_sage,ra,dec,koi_kepmag
0,True,9.488036,0.0,0.146,2.9575,615.8,0.022344,3.20796,2.26,0.0853,...,1.0,5455.0,4.467,0.14,0.927,0.919,,291.93423,48.141651,15.347
1,True,54.418383,0.0,0.586,4.507,874.8,0.027954,3.02368,2.83,0.2734,...,2.0,5455.0,4.467,0.14,0.927,0.919,,291.93423,48.141651,15.347
3,False,1.736952,0.0,1.276,2.40641,8079.2,0.387394,0.2208,33.46,0.0267,...,1.0,5805.0,4.564,-0.52,0.791,0.836,,285.53461,48.28521,15.597
4,True,2.525592,0.0,0.701,1.6545,603.3,0.024064,1.98635,2.75,0.0374,...,1.0,6031.0,4.438,0.07,1.046,1.095,,288.75488,48.2262,15.509
5,True,11.094321,0.0,0.538,4.5945,1517.5,0.036779,0.67324,3.9,0.0992,...,1.0,6046.0,4.486,-0.08,0.972,1.053,,296.28613,48.22467,15.714


In [11]:
# Get and display the correlations
correlations = data.corr()["planet"].sort_values(ascending = False)
print('Correlations:\n', correlations)

Correlations:
 planet              1.000000
koi_count           0.483403
koi_smet            0.329277
koi_incl            0.314119
koi_tce_plnt_num    0.250940
koi_slogg           0.182505
dec                 0.116421
koi_kepmag          0.057633
koi_prad           -0.024645
koi_insol          -0.039192
koi_ror            -0.065574
koi_srho           -0.067344
koi_impact         -0.076338
koi_srad           -0.085410
koi_dor            -0.118603
koi_sma            -0.120998
koi_period         -0.160646
koi_duration       -0.162690
ra                 -0.168657
koi_smass          -0.192160
koi_model_snr      -0.210710
koi_steff          -0.222819
koi_depth          -0.241666
koi_teq            -0.277111
koi_num_transits   -0.306172
koi_eccen                NaN
koi_sage                 NaN
Name: planet, dtype: float64
