In [None]:
%%capture
!pip uninstall -y scikit-learn 
!pip install -q '../input/sklearn24/scikit_learn-0.24.0-cp37-cp37m-manylinux2010_x86_64.whl'
import sklearn

In [None]:
sklearn.__version__

In [None]:
#!pip install xgboost --no-index --find-links=file:///kaggle/input/xgboost13123py3nonemanylinux/xgboost-1.3.2
!pip uninstall -y xgboost
!pip install -q '../input/xgboost13123py3nonemanylinux/xgboost-1.3.2/xgboost-1.3.3-py3-none-manylinux2010_x86_64.whl'
import xgboost

In [None]:
xgboost.__version__

In [None]:
!pip install flaml --no-index --find-links=file:///kaggle/input/flamlmain/FLAML-main/
import flaml 
from flaml import AutoML

In [None]:
flaml.__version__

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import plotly.express as px
from tqdm import tqdm
import seaborn as sns
import random
import cv2

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_absolute_percentage_error

In [None]:
# Tabular data 
train = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')
sample_submission = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')

train_features = train.iloc[:,1:]

# Photo data 
train_image = '../input/petfinder-pawpularity-score/train/'
test_image = '../input/petfinder-pawpularity-score/test/'

In [None]:
train_features['income_pawpularity'] = np.ceil(train_features['Pawpularity'] / 33)
train_features['income_pawpularity'].where(train_features["income_pawpularity"] < 4 , 4.0 ,inplace=True)

fig = px.parallel_categories(train_features, train_features[['Subject Focus', 'Eyes', 
                                                            'Face', 'Near', 'Action', 
                                                            'Accessory', 'Group',
                                                            'Collage', 'Human', 
                                                            'Occlusion', 'Info', 
                                                            'Blur', 'income_pawpularity']].columns, 
                             color='income_pawpularity')
fig.show()

In [None]:
sns.displot(train_features, x="Pawpularity", hue="income_pawpularity", element="step")

In [None]:
plate_scale = pd.DataFrame()
for i, dicomimage in tqdm(enumerate(train['Id']+'.jpg')):
    
    image = cv2.imread(train_image + dicomimage)
    
    scale1 = image.shape[:1]
    scale2 = image.shape[1:2]
    
    plate_scale.loc[i, 'image'] = dicomimage
    plate_scale.loc[i, 'scale1'] = scale1
    plate_scale.loc[i, 'scale2'] = scale2

In [None]:
plate_scale['income_pawpularity'] = train_features['income_pawpularity'].astype(str)

In [None]:
fig = px.scatter(plate_scale, x="scale1", y="scale2", color="income_pawpularity")
fig.show()

In [None]:
def show_img(full_path, pawpularity_file):
    plt.figure(figsize=(25, 30))
   
    for fpath in range(len(full_path)):
        image = cv2.imread(full_path[fpath])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        plt.subplot(5, 3, fpath+1)
        plt.title('pawpularity: ' + str(pawpularity_file[fpath]))
        plt.imshow(image)
        plt.axis("off")

In [None]:
for i_income in range(1, 5):
    index = train_features[train_features['income_pawpularity']==i_income].index
    id_file = train.iloc[index]['Id'] 
    full_path = id_file.apply(lambda x: '{}'.format(train_image)+f'{x}.jpg') 
    random_ind = random.sample(list(full_path.index), 15) 
    pawpularity_file = train.iloc[random_ind]['Pawpularity'] 
    full_path = full_path.loc[random_ind]
    
    show_img(list(full_path), list(pawpularity_file))

In [None]:
Y = train_features['Pawpularity'].astype(int)
train_features.drop(['Pawpularity', 'income_pawpularity'], axis=1, inplace=True)

test_features = test[['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 
                      'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 
                      'Info', 'Blur']]

In [None]:
skfolds = StratifiedKFold(n_splits=5, 
                          random_state=42, 
                          shuffle = True)
    
for num_fold, (train_index, val_index) in enumerate(skfolds.split(train_features, Y)):
    train_features.loc[val_index, 'fold'] = int(num_fold)

In [None]:
train_features.head(3)

In [None]:
def my_loss_obj(y_true, y_pred):
    return np.sqrt(((y_pred - y_true) ** 2).mean())

In [None]:
from flaml.model import LGBMEstimator

''' create a customized LightGBM learner class with your objective function '''
class MyLGBM(LGBMEstimator):
    '''LGBMEstimator with my_loss_obj as the objective function
    '''

    def __init__(self, **params):
        super().__init__(objective=my_loss_obj, **params)

In [None]:
settings = {
    "time_budget": 240,  
    "metric": 'rmse', 
    "estimator_list": ['lgbm'],  
    "task": 'regression',    
    "seed": 7654321,   
}

In [None]:
for fold_n in range(5): 
    print('Fold #{}'.format(fold_n+1))
    
    train_data = train_features[train_features.fold != fold_n].astype(int).iloc[:,:-1]
    val_data = train_features[train_features.fold == fold_n].astype(int).iloc[:,:-1]
    
    train_data_ind, val_data_ind = train_data.index, val_data.index
    
    train_Y = Y[train_data_ind]
    val_Y = Y[val_data_ind]
    
    automl = AutoML()
    #automl.add_learner(learner_name='my_lgbm', learner_class=MyLGBM)
    
    automl.fit(X_train = train_data, y_train = train_Y, **settings)
    #print('Best hyperparmeter config:', automl.best_config)
    #print(my_loss_obj(val_Y, automl.predict(train_Y)))

In [None]:
automl.best_config

In [None]:
predictions = automl.predict(test_features)

In [None]:
predictions

In [None]:
sample_submission.Pawpularity = predictions
sample_submission.to_csv("submission.csv", index=False)

##########################################################################

In [None]:
from tensorflow.keras.utils import Sequence

In [None]:
class SETIDataset(Sequence):

    def __init__(self, x_set, y_set=None, batch_size=32, metod_wave=True):
                 
        self.x = x_set
        self.y = y_set
        self.batch_size = batch_size
        self.metod_wave = metod_wave 
        
    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)
        
    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size] 
        
        if self.y is not None:
            batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size] 
             
        list_train = [np.load(path).astype('float') for path in batch_x['file_path']]
        
        if not self.metod_wave:
            if self.y is not None:
                return np.array(list_train), np.array(batch_y)
            else:
                return np.array(list_train)
        else:    
            hi_inv =  [self.hilbert_invert(image) for image in list_train]

            if self.y is not None:
                return hi_inv, np.array(batch_y)
            else:
                return np.array(list_train)