In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score


In [2]:
try:
    df = pd.read_csv(r'C:\Users\HP\Desktop\hackathon\NASA\k2pandc_2025.09.25_10.44.09.csv')
except FileNotFoundError:
    print("Error: Dataset file not found. Please update the file path in the script.")
    exit()

In [3]:
df

Unnamed: 0,loc_rowid,pl_name,hostname,default_flag,disposition,disp_refname,sy_snum,sy_pnum,discoverymethod,disc_year,...,sy_vmagerr2,sy_kmag,sy_kmagerr1,sy_kmagerr2,sy_gaiamag,sy_gaiamagerr1,sy_gaiamagerr2,rowupdate,pl_pubdate,releasedate
0,1,BD+20 594 b,BD+20 594,0,CONFIRMED,Espinoza et al. 2016,1,1,Transit,2016,...,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249,2018-04-25,2018-03,2018-02-15
1,2,BD+20 594 b,BD+20 594,0,CONFIRMED,Espinoza et al. 2016,1,1,Transit,2016,...,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249,2018-04-25,2016-10,2016-07-28
2,3,BD+20 594 b,BD+20 594,1,CONFIRMED,Espinoza et al. 2016,1,1,Transit,2016,...,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249,2018-04-25,2017-03,2018-04-26
3,4,EPIC 201111557.01,EPIC 201111557,0,CANDIDATE,Livingston et al. 2018,1,0,Transit,2018,...,-0.046,9.220,0.019,-0.019,11.39950,0.001307,-0.001307,2018-02-15,2018-03,2018-02-15
4,5,EPIC 201111557.01,EPIC 201111557,1,CANDIDATE,Livingston et al. 2018,1,0,Transit,2018,...,-0.046,9.220,0.019,-0.019,11.39950,0.001307,-0.001307,2018-08-02,2018-08,2018-08-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3987,3988,WASP-85 A b,WASP-85 A,0,CONFIRMED,Mo&#x10D;nik et al. 2016,2,1,Transit,2016,...,-0.010,8.733,0.026,-0.026,10.62410,0.000959,-0.000959,2019-09-05,2019-09,2019-09-05
3988,3989,Wolf 503 b,Wolf 503,0,CONFIRMED,Peterson et al. 2018,1,1,Transit,2018,...,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337,2018-09-04,2018-11,2018-09-06
3989,3990,Wolf 503 b,Wolf 503,0,CONFIRMED,Peterson et al. 2018,1,1,Transit,2018,...,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337,2022-05-23,2021-12,2022-05-23
3990,3991,Wolf 503 b,Wolf 503,0,CONFIRMED,Peterson et al. 2018,1,1,Transit,2018,...,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337,2025-09-17,2017-07,2025-08-28


In [4]:
# Rename the 'disposition' column to your target name 'koi_disposition'
if 'disposition' in df.columns:
    df.rename(columns={'disposition': 'koi_disposition'}, inplace=True)

if 'koi_disposition' not in df.columns:
    print("Error: The target column 'koi_disposition' was not found in the CSV.")
    exit()

In [5]:
df

Unnamed: 0,loc_rowid,pl_name,hostname,default_flag,koi_disposition,disp_refname,sy_snum,sy_pnum,discoverymethod,disc_year,...,sy_vmagerr2,sy_kmag,sy_kmagerr1,sy_kmagerr2,sy_gaiamag,sy_gaiamagerr1,sy_gaiamagerr2,rowupdate,pl_pubdate,releasedate
0,1,BD+20 594 b,BD+20 594,0,CONFIRMED,Espinoza et al. 2016,1,1,Transit,2016,...,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249,2018-04-25,2018-03,2018-02-15
1,2,BD+20 594 b,BD+20 594,0,CONFIRMED,Espinoza et al. 2016,1,1,Transit,2016,...,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249,2018-04-25,2016-10,2016-07-28
2,3,BD+20 594 b,BD+20 594,1,CONFIRMED,Espinoza et al. 2016,1,1,Transit,2016,...,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249,2018-04-25,2017-03,2018-04-26
3,4,EPIC 201111557.01,EPIC 201111557,0,CANDIDATE,Livingston et al. 2018,1,0,Transit,2018,...,-0.046,9.220,0.019,-0.019,11.39950,0.001307,-0.001307,2018-02-15,2018-03,2018-02-15
4,5,EPIC 201111557.01,EPIC 201111557,1,CANDIDATE,Livingston et al. 2018,1,0,Transit,2018,...,-0.046,9.220,0.019,-0.019,11.39950,0.001307,-0.001307,2018-08-02,2018-08,2018-08-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3987,3988,WASP-85 A b,WASP-85 A,0,CONFIRMED,Mo&#x10D;nik et al. 2016,2,1,Transit,2016,...,-0.010,8.733,0.026,-0.026,10.62410,0.000959,-0.000959,2019-09-05,2019-09,2019-09-05
3988,3989,Wolf 503 b,Wolf 503,0,CONFIRMED,Peterson et al. 2018,1,1,Transit,2018,...,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337,2018-09-04,2018-11,2018-09-06
3989,3990,Wolf 503 b,Wolf 503,0,CONFIRMED,Peterson et al. 2018,1,1,Transit,2018,...,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337,2022-05-23,2021-12,2022-05-23
3990,3991,Wolf 503 b,Wolf 503,0,CONFIRMED,Peterson et al. 2018,1,1,Transit,2018,...,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337,2025-09-17,2017-07,2025-08-28


In [6]:
# Filter, encode, and clean the target variable
df = df[df['koi_disposition'] != 'FALSE POSITIVE']
target_map = {'CONFIRMED': 1, 'CANDIDATE': 0}
df['koi_disposition'] = df['koi_disposition'].map(target_map)
df.dropna(subset=['koi_disposition'], inplace=True)
df['koi_disposition'] = df['koi_disposition'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['koi_disposition'] = df['koi_disposition'].map(target_map)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(subset=['koi_disposition'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['koi_disposition'] = df['koi_disposition'].astype(int)


In [7]:
df

Unnamed: 0,loc_rowid,pl_name,hostname,default_flag,koi_disposition,disp_refname,sy_snum,sy_pnum,discoverymethod,disc_year,...,sy_vmagerr2,sy_kmag,sy_kmagerr1,sy_kmagerr2,sy_gaiamag,sy_gaiamagerr1,sy_gaiamagerr2,rowupdate,pl_pubdate,releasedate
0,1,BD+20 594 b,BD+20 594,0,1,Espinoza et al. 2016,1,1,Transit,2016,...,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249,2018-04-25,2018-03,2018-02-15
1,2,BD+20 594 b,BD+20 594,0,1,Espinoza et al. 2016,1,1,Transit,2016,...,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249,2018-04-25,2016-10,2016-07-28
2,3,BD+20 594 b,BD+20 594,1,1,Espinoza et al. 2016,1,1,Transit,2016,...,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249,2018-04-25,2017-03,2018-04-26
3,4,EPIC 201111557.01,EPIC 201111557,0,0,Livingston et al. 2018,1,0,Transit,2018,...,-0.046,9.220,0.019,-0.019,11.39950,0.001307,-0.001307,2018-02-15,2018-03,2018-02-15
4,5,EPIC 201111557.01,EPIC 201111557,1,0,Livingston et al. 2018,1,0,Transit,2018,...,-0.046,9.220,0.019,-0.019,11.39950,0.001307,-0.001307,2018-08-02,2018-08,2018-08-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3987,3988,WASP-85 A b,WASP-85 A,0,1,Mo&#x10D;nik et al. 2016,2,1,Transit,2016,...,-0.010,8.733,0.026,-0.026,10.62410,0.000959,-0.000959,2019-09-05,2019-09,2019-09-05
3988,3989,Wolf 503 b,Wolf 503,0,1,Peterson et al. 2018,1,1,Transit,2018,...,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337,2018-09-04,2018-11,2018-09-06
3989,3990,Wolf 503 b,Wolf 503,0,1,Peterson et al. 2018,1,1,Transit,2018,...,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337,2022-05-23,2021-12,2022-05-23
3990,3991,Wolf 503 b,Wolf 503,0,1,Peterson et al. 2018,1,1,Transit,2018,...,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337,2025-09-17,2017-07,2025-08-28


In [8]:
# Separate features (X) and target (y)
y = df['koi_disposition'].copy()
X_raw = df.drop('koi_disposition', axis=1)


In [9]:
y

0       1
1       1
2       1
3       0
4       0
       ..
3987    1
3988    1
3989    1
3990    1
3991    1
Name: koi_disposition, Length: 3677, dtype: int64

In [10]:
X_raw

Unnamed: 0,loc_rowid,pl_name,hostname,default_flag,disp_refname,sy_snum,sy_pnum,discoverymethod,disc_year,disc_facility,...,sy_vmagerr2,sy_kmag,sy_kmagerr1,sy_kmagerr2,sy_gaiamag,sy_gaiamagerr1,sy_gaiamagerr2,rowupdate,pl_pubdate,releasedate
0,1,BD+20 594 b,BD+20 594,0,Espinoza et al. 2016,1,1,Transit,2016,K2,...,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249,2018-04-25,2018-03,2018-02-15
1,2,BD+20 594 b,BD+20 594,0,Espinoza et al. 2016,1,1,Transit,2016,K2,...,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249,2018-04-25,2016-10,2016-07-28
2,3,BD+20 594 b,BD+20 594,1,Espinoza et al. 2016,1,1,Transit,2016,K2,...,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249,2018-04-25,2017-03,2018-04-26
3,4,EPIC 201111557.01,EPIC 201111557,0,Livingston et al. 2018,1,0,Transit,2018,K2,...,-0.046,9.220,0.019,-0.019,11.39950,0.001307,-0.001307,2018-02-15,2018-03,2018-02-15
4,5,EPIC 201111557.01,EPIC 201111557,1,Livingston et al. 2018,1,0,Transit,2018,K2,...,-0.046,9.220,0.019,-0.019,11.39950,0.001307,-0.001307,2018-08-02,2018-08,2018-08-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3987,3988,WASP-85 A b,WASP-85 A,0,Mo&#x10D;nik et al. 2016,2,1,Transit,2016,K2,...,-0.010,8.733,0.026,-0.026,10.62410,0.000959,-0.000959,2019-09-05,2019-09,2019-09-05
3988,3989,Wolf 503 b,Wolf 503,0,Peterson et al. 2018,1,1,Transit,2018,K2,...,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337,2018-09-04,2018-11,2018-09-06
3989,3990,Wolf 503 b,Wolf 503,0,Peterson et al. 2018,1,1,Transit,2018,K2,...,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337,2022-05-23,2021-12,2022-05-23
3990,3991,Wolf 503 b,Wolf 503,0,Peterson et al. 2018,1,1,Transit,2018,K2,...,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337,2025-09-17,2017-07,2025-08-28


In [11]:
cols_to_drop = [
    'loc_rowid', 'pl_name', 'hostname', 'disp_refname', 'discoverymethod',
    'soltype', 'pl_refname', 'pl_bmassprov', 'st_refname', 'st_spectype',
    'st_metratio', 'sy_refname', 'rastr', 'decstr', 'rowupdate', 'pl_pubdate',
    'releasedate', 'disc_facility'
]
existing_cols_to_drop = [col for col in cols_to_drop if col in X_raw.columns]
X_numeric = X_raw.drop(columns=existing_cols_to_drop).select_dtypes(include=np.number)

all_nan_cols = [col for col in X_numeric.columns if X_numeric[col].isnull().all()]
if all_nan_cols:
    X_numeric.drop(columns=all_nan_cols, inplace=True)

# Impute remaining missing values with the median
for col in X_numeric.columns:
    if X_numeric[col].isnull().any():
        median_val = X_numeric[col].median()
        X_numeric[col].fillna(median_val, inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_numeric[col].fillna(median_val, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_numeric[col].fillna(median_val, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting valu

In [12]:
X_numeric

Unnamed: 0,default_flag,sy_snum,sy_pnum,disc_year,pl_controv_flag,pl_orbper,pl_orbpererr1,pl_orbpererr2,pl_orbperlim,pl_orbsmax,...,sy_disterr2,sy_vmag,sy_vmagerr1,sy_vmagerr2,sy_kmag,sy_kmagerr1,sy_kmagerr2,sy_gaiamag,sy_gaiamagerr1,sy_gaiamagerr2
0,0,1,1,2016,0,41.688644,0.003353,-0.003419,0.0,0.06698,...,-1.2400,10.849,0.012,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249
1,0,1,1,2016,0,41.685500,0.003000,-0.003100,0.0,0.24100,...,-1.2400,10.849,0.012,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249
2,1,1,1,2016,0,41.685500,0.003000,-0.003000,0.0,0.06698,...,-1.2400,10.849,0.012,-0.012,9.368,0.018,-0.018,10.86440,0.000249,-0.000249
3,0,1,0,2018,0,2.302368,0.000105,-0.000103,0.0,0.06698,...,-0.4598,11.727,0.046,-0.046,9.220,0.019,-0.019,11.39950,0.001307,-0.001307
4,1,1,0,2018,0,2.301830,0.000280,-0.000300,0.0,0.06698,...,-0.4598,11.727,0.046,-0.046,9.220,0.019,-0.019,11.39950,0.001307,-0.001307
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3987,0,2,1,2016,0,2.655676,0.000002,-0.000002,0.0,0.06698,...,-1.2080,10.720,0.010,-0.010,8.733,0.026,-0.026,10.62410,0.000959,-0.000959
3988,0,1,1,2018,0,6.001180,0.000080,-0.000110,0.0,0.05710,...,-0.0961,10.270,0.030,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337
3989,0,1,1,2018,0,6.001270,0.000021,-0.000021,0.0,0.05706,...,-0.0961,10.270,0.030,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337
3990,0,1,1,2018,0,7.125000,0.000319,-0.000320,0.0,0.06698,...,-0.0961,10.270,0.030,-0.030,7.617,0.023,-0.023,9.89816,0.000337,-0.000337


In [13]:

X = X_numeric

In [14]:

# Train-Test Split and Scaling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [15]:
X_train

Unnamed: 0,default_flag,sy_snum,sy_pnum,disc_year,pl_controv_flag,pl_orbper,pl_orbpererr1,pl_orbpererr2,pl_orbperlim,pl_orbsmax,...,sy_disterr2,sy_vmag,sy_vmagerr1,sy_vmagerr2,sy_kmag,sy_kmagerr1,sy_kmagerr2,sy_gaiamag,sy_gaiamagerr1,sy_gaiamagerr2
2751,1,1,2,2018,0,6.034000,1.000000e-03,-1.000000e-03,0.0,0.05130,...,-0.52255,13.372,0.0220,-0.0220,9.560,0.023,-0.023,12.6212,0.001870,-0.001870
1247,1,1,0,2016,0,7.786080,7.000000e-04,-6.700000e-04,0.0,0.06698,...,-19.02600,12.038,0.0690,-0.0690,9.468,0.019,-0.019,11.7297,0.000363,-0.000363
2147,1,1,1,2017,0,5.675819,4.110000e-04,-4.140000e-04,0.0,0.05600,...,-0.91500,12.580,0.0800,-0.0800,9.815,0.021,-0.021,12.2204,0.000672,-0.000672
2772,1,1,1,2018,0,4.013920,2.900000e-04,-2.900000e-04,0.0,0.04606,...,-6.16600,14.314,0.1260,-0.1260,12.016,0.024,-0.024,13.9725,0.000345,-0.000345
1829,0,1,1,2015,0,2.790825,4.900000e-07,-4.900000e-07,0.0,0.06698,...,-5.46300,10.920,0.0300,-0.0300,9.830,0.016,-0.016,10.8245,0.000420,-0.000420
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2538,0,1,2,2018,0,15.853543,7.910000e-04,-7.540000e-04,0.0,0.06698,...,-4.09600,12.524,0.0460,-0.0460,10.876,0.023,-0.023,12.3238,0.000254,-0.000254
3763,0,1,1,2016,0,10.134231,3.470000e-04,-3.470000e-04,0.0,0.06698,...,-3.87300,17.236,0.0467,-0.0467,12.474,0.021,-0.021,16.0836,0.000880,-0.000880
268,1,1,0,2019,0,2.032664,8.800000e-05,-8.300000e-05,0.0,0.06698,...,-0.74400,15.459,0.1030,-0.1030,10.992,0.021,-0.021,14.3506,0.000615,-0.000615
657,1,1,0,2019,0,18.865200,2.500000e-03,-2.500000e-03,0.0,0.06698,...,-0.80800,13.425,0.1030,-0.1030,10.589,0.020,-0.020,13.0781,0.000450,-0.000450


In [16]:
X_test

Unnamed: 0,default_flag,sy_snum,sy_pnum,disc_year,pl_controv_flag,pl_orbper,pl_orbpererr1,pl_orbpererr2,pl_orbperlim,pl_orbsmax,...,sy_disterr2,sy_vmag,sy_vmagerr1,sy_vmagerr2,sy_kmag,sy_kmagerr1,sy_kmagerr2,sy_gaiamag,sy_gaiamagerr1,sy_gaiamagerr2
3078,0,2,4,2016,0,8.992130,0.000160,-0.000160,0.0,0.08036,...,-1.2880,12.304,0.092,-0.092,9.821,0.019,-0.019,12.00360,0.000259,-0.000259
3718,1,1,2,2016,0,27.860000,0.005200,-0.005200,0.0,0.17680,...,-2.2290,13.386,0.103,-0.103,11.059,0.020,-0.020,13.14740,0.000290,-0.000290
2665,1,1,1,2018,0,15.388570,0.000880,-0.000880,0.0,0.12060,...,-0.6250,9.543,0.004,-0.004,8.105,0.017,-0.017,9.36072,0.000292,-0.000292
1324,1,1,0,2016,0,8.953660,0.000220,-0.000210,0.0,0.06698,...,-3.3010,14.060,0.160,-0.160,11.701,0.022,-0.022,13.71470,0.000253,-0.000253
2989,0,1,1,2019,0,2.225164,0.000024,-0.000024,0.0,0.06698,...,-0.4900,10.014,0.005,-0.005,8.353,0.020,-0.020,9.86639,0.000710,-0.000710
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1399,0,1,0,2018,0,1.151471,0.000041,-0.000042,0.0,0.06698,...,-11.5960,12.208,0.057,-0.057,10.707,0.024,-0.024,12.07770,0.000215,-0.000215
2906,0,1,2,2018,0,3.281000,0.000319,-0.000320,0.0,0.06698,...,-0.7060,11.974,0.069,-0.069,9.749,0.024,-0.024,11.81390,0.000886,-0.000886
3073,0,2,4,2016,0,8.990021,0.000319,-0.000320,0.0,0.06698,...,-1.2880,12.304,0.092,-0.092,9.821,0.019,-0.019,12.00360,0.000259,-0.000259
1851,1,1,1,2016,0,3.142702,0.000053,-0.000053,0.0,0.04410,...,-2.8815,12.831,0.057,-0.057,11.391,0.023,-0.023,12.67660,0.000609,-0.000609


In [17]:
param_grid = {
    'n_estimators': [100, 200, 300, 500],
    'max_features': ['sqrt', 'log2'],
    'max_depth': [10, 20, 30, 50, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

In [18]:

# --- 1. Load Dataset ---
# IMPORTANT: Replace 'your_dataset.csv' with the actual path to your file.


# --- 2. Preprocessing (Same as before) ---






# Drop unwanted, non-numeric, and all-NaN columns


# --- 3. Hyperparameter Tuning ---

# A. Define the Hyperparameter Grid
# These are the settings RandomizedSearchCV will test


# B. Set up Randomized Search
rf = RandomForestClassifier(random_state=42)
random_search = RandomizedSearchCV(
    estimator=rf,
    param_distributions=param_grid,
    n_iter=50,  # Number of different combinations to try
    cv=5,       # Number of cross-validation folds
    verbose=2,  # Prints progress
    random_state=42,
    n_jobs=-1   # Use all available CPU cores
)

# C. Run the Search
print("Starting hyperparameter search for Random Forest...")
random_search.fit(X_train_scaled, y_train)

# --- 4. Display Results ---

# Print the best parameters found
print("\nBest parameters found:")
print(random_search.best_params_)

# --- 5. Evaluate the Best Model ---

# Use the best model found by the search
best_rf = random_search.best_estimator_

# Make predictions on the test set
y_pred = best_rf.predict(X_test_scaled)

# Print the final performance report
print("\n--- Final Model Performance on Test Set ---")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Starting hyperparameter search for Random Forest...
Fitting 5 folds for each of 50 candidates, totalling 250 fits

Best parameters found:
{'n_estimators': 500, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 50, 'bootstrap': False}

--- Final Model Performance on Test Set ---
Accuracy: 0.9928

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99       411
           1       0.99      1.00      0.99       693

    accuracy                           0.99      1104
   macro avg       0.99      0.99      0.99      1104
weighted avg       0.99      0.99      0.99      1104

