In [1]:
!pip -q install jcopml

In [3]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from jcopml.pipeline import num_pipe, cat_pipe
from jcopml.utils import save_model, load_model
from jcopml.plot import plot_missing_value
from jcopml.feature_importance import mean_score_decrease

# Import Data

In [5]:
df = pd.read_csv('/kaggle/input/admissionpredict/Admission_Predict.csv', index_col='Serial No.')
df.head()

Unnamed: 0_level_0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.0,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.8
5,314,103,2,2.0,3.0,8.21,0,0.65


# Dataset Splitting

In [9]:
X = df.drop(columns='Chance of Admit ')
y = df['Chance of Admit ']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
X_train.shape, X_test.shape, X_train.shape, y_train.shape

((320, 7), (80, 7), (320, 7), (320,))

# Training

In [10]:
from sklearn.ensemble import RandomForestRegressor 

from sklearn.model_selection import RandomizedSearchCV
from jcopml.tuning import random_search_params as rsp

In [16]:
# Preprocessor
preprocessor = ColumnTransformer([
    ('numeric', num_pipe(), ['GRE Score', 'TOEFL Score', 'SOP', 'LOR ', 'CGPA']),
    ('categoric', cat_pipe(encoder='onehot'), ['University Rating', 'Research'])
])

# Pipeline
rf_pipeline = Pipeline([
    ('prep', preprocessor),
    ('algo', RandomForestRegressor(n_jobs=-1, random_state=42))
])
    
# Parameter Tunning
model = RandomizedSearchCV(rf_pipeline, rsp.rf_params, n_iter=50, cv=3, n_jobs=-1, verbose=1, random_state=42)
model.fit(X_train, y_train)

# Evaluation
print(model.best_params_)
print(model.score(X_train, y_train), model.best_score_, model.score(X_test, y_test))

Fitting 3 folds for each of 50 candidates, totalling 150 fits
{'algo__max_depth': 42, 'algo__max_features': 0.5687508340232413, 'algo__min_samples_leaf': 8, 'algo__n_estimators': 184}
0.8350934581064868 0.7581783679714146 0.8098228727140044


# Score Baseline kita yaitu:
**Train** = `0.8350934581064868`

**Validation** = `0.7581783679714146` 

**Test** = `0.8098228727140044`