In [1]:
import pandas as pd
import numpy as np

In [2]:
train = pd.read_csv("data/train.csv")

In [3]:
# 이상치 판단
def outlier(data, column):
    q25 = np.quantile(data[column].dropna(), 0.25)
    q75 = np.quantile(data[column].dropna(), 0.75)
    iqr = q75 - q25
    iqr_cut = iqr * 3
    result = data[(data[column] > q75 + iqr_cut) | (data[column] < q25 - iqr_cut)].index
    return result

# 보간
def time_interpolate(data, column):
    tem = data[["일시", column]].copy()
    tem.index = pd.to_datetime(tem['일시'])
    tem = tem.drop(["일시"], axis = 1)
    tem = tem.interpolate(method="time")
    return tem[column].values

# 강수량은 기상청에서 정한 강수표현에 따라 구간을 나누는 것으로 수치 변경
# 비가 내리지 않음 : 0, 매우 약한 비 : 0~1, 약한 비 : 1~3, 보통 비 : 3~15, 강한 비 : 15~30, 매우 강한 비 : 30 이상
train.강수량 = pd.cut(train.강수량, bins = [0, 0.9, 2.9, 14.9, 29.9, max(train.강수량)], labels = [1, 2, 3, 4, 5])
train.강수량 = train.강수량.astype('float')
train.강수량 = train.강수량.fillna(0)

train.최고기온 = time_interpolate(train, "최고기온")
train.최저기온 = time_interpolate(train, "최저기온")

train.일교차 = train.최고기온 - train.최저기온

train.평균풍속 = time_interpolate(train, "평균풍속")

train.일조합 = time_interpolate(train, "일조합")

train.loc[0:4749, "일사합"] = 0
train.loc[4780:4854, "일사합"] = 0
train.일사합 = time_interpolate(train, "일사합")


# 삭제
train = train.iloc[train.일조율.dropna().index]

# 가조합
train["가조합"] = train.일조합/(train.일조율/100)
train.가조합 = [np.nan if i == float("inf") else i for i in train.가조합]
train.가조합 = time_interpolate(train, "가조합")

# 일사합/일조합
train["일사_일조"] = train.일사합/train.일조합
train.일사_일조 = [np.nan if i == float("inf") else i for i in train.일사_일조] # 분모가 0인 경우 임의로 값을 설정할 수 없어 보간으로 처리
train.일사_일조 = time_interpolate(train, "일사_일조")



train["월"] = train.일시.str.split("-", expand = True)[1].astype("int")

# 사계절: 0 겨울, 1 봄, 2 여름, 3 가을
train["계절"] = pd.cut(train.월, bins = [0, 2, 5, 8, 11, 12], labels = [0, 1, 2, 3, 5])
train.계절 = [0 if i == 5 else i for i in train.계절]
train.계절 = train.계절.astype("int")

train = train.drop(["일시", "월"], axis = 1)

In [4]:
# 결측치 없음
train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 22645 entries, 366 to 23010
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   최고기온    22645 non-null  float64
 1   최저기온    22645 non-null  float64
 2   일교차     22645 non-null  float64
 3   강수량     22645 non-null  float64
 4   평균습도    22645 non-null  float64
 5   평균풍속    22645 non-null  float64
 6   일조합     22645 non-null  float64
 7   일사합     22645 non-null  float64
 8   일조율     22645 non-null  float64
 9   평균기온    22645 non-null  float64
 10  가조합     22645 non-null  float64
 11  일사_일조   22645 non-null  float64
 12  계절      22645 non-null  int32  
dtypes: float64(12), int32(1)
memory usage: 2.3 MB


In [5]:
pd.read_csv("data/sample_submission.csv")

Unnamed: 0,일시,평균기온
0,2023-01-01,0
1,2023-01-02,0
2,2023-01-03,0
3,2023-01-04,0
4,2023-01-05,0
...,...,...
353,2023-12-20,0
354,2023-12-21,0
355,2023-12-22,0
356,2023-12-23,0


In [6]:
df = train.copy()
column_indices = {name: i for i, name in enumerate(df.columns)}

n = len(df)
train_df = df[0:int(n*0.8)]
val_df = df[int(n*0.8):]

num_features = df.shape[1]

In [7]:
train_mean = train_df.mean()
train_std = train_df.std()

train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std

In [8]:
df_std = (df - train_mean) / train_std
df_std = df_std.melt(var_name='Column', value_name='Normalized')

In [9]:
class WindowGenerator():
    def __init__(self, input_width, label_width, shift,
               train_df=train_df, val_df=val_df, label_columns=None):
        # Store the raw data.
        self.train_df = train_df
        self.val_df = val_df

        # Work out the label column indices.
        self.label_columns = label_columns
        if label_columns is not None:
              self.label_columns_indices = {name: i for i, name in
                                    enumerate(label_columns)}
        self.column_indices = {name: i for i, name in
                           enumerate(train_df.columns)}
    
    # Work out the window parameters.
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = input_width + shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}',
            f'Label column name(s): {self.label_columns}'])

In [10]:
def split_window(self, features):
    inputs = features[:, self.input_slice, :]
    labels = features[:, self.labels_slice, :]
    if self.label_columns is not None:
        labels = tf.stack(
            [labels[:, :, self.column_indices[name]] for name in self.label_columns],
            axis=-1)

  # Slicing doesn't preserve static shape information, so set the shapes
  # manually. This way the `tf.data.Datasets` are easier to inspect.
    inputs.set_shape([None, self.input_width, None])
    labels.set_shape([None, self.label_width, None])

    return inputs, labels

WindowGenerator.split_window = split_window

In [11]:
def make_dataset(self, data):
    data = np.array(data, dtype=np.float32)
    ds = tf.keras.utils.timeseries_dataset_from_array(
          data=data,
          targets=None,
          sequence_length=self.total_window_size,
          sequence_stride=1,
          shuffle=True,
          batch_size=32,)

    ds = ds.map(self.split_window)

    return ds

WindowGenerator.make_dataset = make_dataset

In [12]:
@property
def train(self):
    return self.make_dataset(self.train_df)

@property
def val(self):
    return self.make_dataset(self.val_df)

@property
def example(self):
    """Get and cache an example batch of `inputs, labels` for plotting."""
    result = getattr(self, '_example', None)
    if result is None:
    # No example batch was found, so get one from the `.train` dataset
      result = next(iter(self.train))
    # And cache it for next time
      self._example = result
    return result

WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.example = example

In [13]:
MAX_EPOCHS = 100

def compile_and_fit(model, window, patience=5):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=patience,
                                                    mode='min',
                                                    restore_best_weights=True)

    model.compile(loss=tf.keras.losses.MeanSquaredError(),
                optimizer=tf.keras.optimizers.Adam(),
                metrics=[tf.keras.metrics.MeanAbsoluteError()])

    history = model.fit(window.train, epochs=MAX_EPOCHS,
                      validation_data=window.val,
                      callbacks=[early_stopping])
    return history

In [17]:
OUT_STEPS = 358
INPUT_WIDTH = OUT_STEPS * 3
multi_window = WindowGenerator(input_width=INPUT_WIDTH,
                               label_width=OUT_STEPS,
                               shift=OUT_STEPS,
                               label_columns=['평균기온'])

multi_window.train.element_spec

(TensorSpec(shape=(None, 1074, 13), dtype=tf.float32, name=None),
 TensorSpec(shape=(None, 358, 1), dtype=tf.float32, name=None))

In [None]:
multi_val_performance = {}
multi_performance = {}

multi_lstm_model = tf.keras.Sequential([
    # Shape [batch, time, features] => [batch, lstm_units].
    # Adding more `lstm_units` just overfits more quickly.
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.LSTM(32, return_sequences=False),
    # Shape => [batch, out_steps*features].
    tf.keras.layers.Dense(OUT_STEPS,
                          kernel_initializer=tf.initializers.zeros()),
    # Shape => [batch, out_steps, features].
    tf.keras.layers.Reshape([OUT_STEPS, 1])
])

history = compile_and_fit(multi_lstm_model, multi_window)

# IPython.display.clear_output()

multi_val_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.val)
multi_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.val, verbose=0)

Epoch 1/100

In [None]:
input_for_submit = val_df[-INPUT_WIDTH:].values.reshape(1, INPUT_WIDTH, 11)
pred1 = multi_lstm_model.predict(input_for_submit)
pred1 = pred1[-1].reshape(-1,) * train_std['평균기온'] + train_mean['평균기온']

In [None]:
multi_val_performance2 = {}
multi_performance2 = {}

multi_lstm_model = tf.keras.Sequential([
    # Shape [batch, time, features] => [batch, lstm_units].
    # Adding more `lstm_units` just overfits more quickly.
    tf.keras.layers.LSTM(128, return_sequences=True),
    tf.keras.layers.LSTM(128, return_sequences=True),
    tf.keras.layers.LSTM(128, return_sequences=False),
    # Shape => [batch, out_steps*features].
    tf.keras.layers.Dense(OUT_STEPS,
                          kernel_initializer=tf.initializers.zeros()),
    # Shape => [batch, out_steps, features].
    tf.keras.layers.Reshape([OUT_STEPS, 1])
])

history = compile_and_fit(multi_lstm_model, multi_window)

# IPython.display.clear_output()

multi_val_performance2['LSTM'] = multi_lstm_model.evaluate(multi_window.val)
multi_performance2['LSTM'] = multi_lstm_model.evaluate(multi_window.val, verbose=0)

In [None]:
input_for_submit = val_df[-INPUT_WIDTH:].values.reshape(1, INPUT_WIDTH, 11)
pred2 = multi_lstm_model.predict(input_for_submit)
pred2 = pred2[-1].reshape(-1,) * train_std['평균기온'] + train_mean['평균기온']

In [16]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf



In [7]:
class LSTM():
    def __init__(self, data, target, window_size, shift):
        self.data = data
        self.target = target
        
        self.inputs = data.drop(target, axis = 1)
        self.outputs = data[target]
        
        self.window_size = window_size
        self.shift = shift
        

In [None]:
def Scale_Split(self, test_size, scaler = None):
    scaler_in = MinMaxScaler()
    scaler_out = MinMaxScaler()
        
    scaler_in.fit(self.inputs)
    self.inputs = pd.DataFrame(scaler_in.transform(self.inputs))
        
    scaler_out.fit(self.outputs)
    self.outputs = pd.DataFrame(scaler_out.fit_trainsform(self.outputs))
    
    train_in, train_out, test_in, test_out = train_test_split(self.inputs, self.outputs, test_size = test_size)
    return train_x, train_y, test_x, test_y

In [98]:
x = train.drop(["평균기온"], axis = 1)
y = train[["평균기온"]]

scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
        
scaler_x.fit(x)
x = pd.DataFrame(scaler_x.transform(x))
        
scaler_y.fit(y)
y = pd.DataFrame(scaler_y.fit_transform(y))
x

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.163534,0.102970,0.502762,0.0,0.682540,0.090909,0.197080,0.000000,0.289990,0.031365,0.000000,0.0
1,0.259398,0.156436,0.635359,0.0,0.664225,0.168831,0.357664,0.000000,0.526316,0.031240,0.000000,0.0
2,0.261278,0.306931,0.220994,0.6,0.721612,0.324675,0.000000,0.000000,0.000000,0.039497,0.000000,0.0
3,0.167293,0.128713,0.441989,0.0,0.343101,0.363636,0.613139,0.000000,0.893705,0.047755,0.000000,0.0
4,0.139098,0.073267,0.513812,0.0,0.444444,0.103896,0.452555,0.000000,0.659443,0.048275,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
22640,0.317669,0.255446,0.530387,0.0,0.633700,0.220779,0.642336,0.306153,0.946336,0.029204,0.009348,0.0
22641,0.257519,0.281188,0.281768,0.2,0.490842,0.311688,0.635036,0.324373,0.934985,0.030306,0.010018,0.0
22642,0.295113,0.245545,0.491713,0.0,0.468864,0.207792,0.656934,0.324970,0.968008,0.028911,0.009702,0.0
22643,0.298872,0.312871,0.314917,0.0,0.582418,0.233766,0.576642,0.323775,0.849329,0.029656,0.011012,0.0


In [105]:
x.element_spec

AttributeError: 'tensorflow.python.framework.ops.EagerTensor' object has no attribute 'element_spec'

In [106]:
x = train.drop(["평균기온"], axis = 1)
y = train[["평균기온"]]

scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
        
scaler_x.fit(x)
x = pd.DataFrame(scaler_x.transform(x))
        
scaler_y.fit(y)
y = pd.DataFrame(scaler_y.fit_transform(y))

window_size = 385
x = np.array(x, dtype = np.float32)
dataset_x = tf.keras.utils.timeseries_dataset_from_array(x, None, sequence_length = window_size)

for batch in dataset_x.take(1):
    x = batch
x

<tf.Tensor: shape=(128, 385, 12), dtype=float32, numpy=
array([[[0.16353384, 0.10297029, 0.50276244, ..., 0.03136498,
         0.        , 0.        ],
        [0.2593985 , 0.15643564, 0.6353591 , ..., 0.03123961,
         0.        , 0.        ],
        [0.26127818, 0.3069307 , 0.22099447, ..., 0.03949737,
         0.        , 0.        ],
        ...,
        [0.2631579 , 0.2752475 , 0.31491712, ..., 0.06646726,
         0.        , 0.        ],
        [0.2669173 , 0.21782178, 0.48618785, ..., 0.0839067 ,
         0.        , 0.        ],
        [0.26127818, 0.21386139, 0.48066297, ..., 0.08439606,
         0.        , 0.        ]],

       [[0.2593985 , 0.15643564, 0.6353591 , ..., 0.03123961,
         0.        , 0.        ],
        [0.26127818, 0.3069307 , 0.22099447, ..., 0.03949737,
         0.        , 0.        ],
        [0.16729324, 0.12871288, 0.44198895, ..., 0.04775512,
         0.        , 0.        ],
        ...,
        [0.2669173 , 0.21782178, 0.48618785, ..., 0.

In [70]:
x = dataset_x.take(1)
print(x)

<TakeDataset element_spec=TensorSpec(shape=(None, None, 12), dtype=tf.float64, name=None)>


In [38]:
print(tf.keras.utils.timeseries_dataset_from_array(x, None, sequence_length = 10))

<BatchDataset element_spec=TensorSpec(shape=(None, None, 12), dtype=tf.float64, name=None)>


In [97]:
x = train.drop(["평균기온"], axis = 1)
y = train[["평균기온"]]

scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
        
scaler_x.fit(x)
x = pd.DataFrame(scaler_x.transform(x))
        
scaler_y.fit(y)
y = pd.DataFrame(scaler_y.fit_transform(y))
for x in tf.keras.utils.timeseries_dataset_from_array(x, None, sequence_length = 10, sequence_stride=1).take(1):
    x
x

<tf.Tensor: shape=(128, 10, 12), dtype=float64, numpy=
array([[[0.16353383, 0.1029703 , 0.50276243, ..., 0.03136498,
         0.        , 0.        ],
        [0.2593985 , 0.15643564, 0.63535912, ..., 0.03123961,
         0.        , 0.        ],
        [0.2612782 , 0.30693069, 0.22099448, ..., 0.03949737,
         0.        , 0.        ],
        ...,
        [0.29887218, 0.30693069, 0.33149171, ..., 0.05424557,
         0.        , 0.        ],
        [0.31015038, 0.31089109, 0.35359116, ..., 0.05912468,
         0.        , 0.        ],
        [0.28947368, 0.19009901, 0.62983425, ..., 0.06400379,
         0.        , 0.        ]],

       [[0.2593985 , 0.15643564, 0.63535912, ..., 0.03123961,
         0.        , 0.        ],
        [0.2612782 , 0.30693069, 0.22099448, ..., 0.03949737,
         0.        , 0.        ],
        [0.16729323, 0.12871287, 0.44198895, ..., 0.04775512,
         0.        , 0.        ],
        ...,
        [0.31015038, 0.31089109, 0.35359116, ..., 0.0

In [96]:
x

<tf.Tensor: shape=(119, 10, 10, 12), dtype=float64, numpy=
array([[[[0.16353383, 0.1029703 , 0.50276243, ..., 0.03136498,
          0.        , 0.        ],
         [0.2593985 , 0.15643564, 0.63535912, ..., 0.03123961,
          0.        , 0.        ],
         [0.2612782 , 0.30693069, 0.22099448, ..., 0.03949737,
          0.        , 0.        ],
         ...,
         [0.29887218, 0.30693069, 0.33149171, ..., 0.05424557,
          0.        , 0.        ],
         [0.31015038, 0.31089109, 0.35359116, ..., 0.05912468,
          0.        , 0.        ],
         [0.28947368, 0.19009901, 0.62983425, ..., 0.06400379,
          0.        , 0.        ]],

        [[0.2593985 , 0.15643564, 0.63535912, ..., 0.03123961,
          0.        , 0.        ],
         [0.2612782 , 0.30693069, 0.22099448, ..., 0.03949737,
          0.        , 0.        ],
         [0.16729323, 0.12871287, 0.44198895, ..., 0.04775512,
          0.        , 0.        ],
         ...,
         [0.31015038, 0.31089

In [55]:
inputs

<tf.Tensor: shape=(128, 10, 12), dtype=float64, numpy=
array([[[0.16353383, 0.1029703 , 0.50276243, ..., 0.03136498,
         0.        , 0.        ],
        [0.2593985 , 0.15643564, 0.63535912, ..., 0.03123961,
         0.        , 0.        ],
        [0.2612782 , 0.30693069, 0.22099448, ..., 0.03949737,
         0.        , 0.        ],
        ...,
        [0.29887218, 0.30693069, 0.33149171, ..., 0.05424557,
         0.        , 0.        ],
        [0.31015038, 0.31089109, 0.35359116, ..., 0.05912468,
         0.        , 0.        ],
        [0.28947368, 0.19009901, 0.62983425, ..., 0.06400379,
         0.        , 0.        ]],

       [[0.2593985 , 0.15643564, 0.63535912, ..., 0.03123961,
         0.        , 0.        ],
        [0.2612782 , 0.30693069, 0.22099448, ..., 0.03949737,
         0.        , 0.        ],
        [0.16729323, 0.12871287, 0.44198895, ..., 0.04775512,
         0.        , 0.        ],
        ...,
        [0.31015038, 0.31089109, 0.35359116, ..., 0.0

In [48]:
for x in tf.keras.utils.timeseries_dataset_from_array(x, None, sequence_length = 10):
    print(x)

tf.Tensor(
[[[0.16353383 0.1029703  0.50276243 ... 0.03136498 0.         0.        ]
  [0.2593985  0.15643564 0.63535912 ... 0.03123961 0.         0.        ]
  [0.2612782  0.30693069 0.22099448 ... 0.03949737 0.         0.        ]
  ...
  [0.29887218 0.30693069 0.33149171 ... 0.05424557 0.         0.        ]
  [0.31015038 0.31089109 0.35359116 ... 0.05912468 0.         0.        ]
  [0.28947368 0.19009901 0.62983425 ... 0.06400379 0.         0.        ]]

 [[0.2593985  0.15643564 0.63535912 ... 0.03123961 0.         0.        ]
  [0.2612782  0.30693069 0.22099448 ... 0.03949737 0.         0.        ]
  [0.16729323 0.12871287 0.44198895 ... 0.04775512 0.         0.        ]
  ...
  [0.31015038 0.31089109 0.35359116 ... 0.05912468 0.         0.        ]
  [0.28947368 0.19009901 0.62983425 ... 0.06400379 0.         0.        ]
  [0.07706767 0.07524752 0.32596685 ... 0.06605621 0.         0.        ]]

 [[0.2612782  0.30693069 0.22099448 ... 0.03949737 0.         0.        ]
  [0.167293

tf.Tensor(
[[[0.78571429 0.65346535 0.79558011 ... 0.47920046 0.00591712 1.        ]
  [0.77819549 0.66336634 0.74585635 ... 0.47791675 0.00591523 1.        ]
  [0.73120301 0.64554455 0.65745856 ... 0.47721589 0.00612128 1.        ]
  ...
  [0.75       0.64356436 0.71823204 ... 0.44338151 0.00556006 1.        ]
  [0.71240602 0.73465347 0.35359116 ... 0.42530793 0.01330574 1.        ]
  [0.7424812  0.65544554 0.66298343 ... 0.42626034 0.00603377 1.        ]]

 [[0.77819549 0.66336634 0.74585635 ... 0.47791675 0.00591523 1.        ]
  [0.73120301 0.64554455 0.65745856 ... 0.47721589 0.00612128 1.        ]
  [0.63533835 0.5960396  0.51381215 ... 0.46101695 0.00532752 1.        ]
  ...
  [0.71240602 0.73465347 0.35359116 ... 0.42530793 0.01330574 1.        ]
  [0.7424812  0.65544554 0.66298343 ... 0.42626034 0.00603377 1.        ]
  [0.71804511 0.69108911 0.49171271 ... 0.4080533  0.00529101 1.        ]]

 [[0.73120301 0.64554455 0.65745856 ... 0.47721589 0.00612128 1.        ]
  [0.635338

tf.Tensor(
[[[0.52443609 0.53861386 0.3480663  ... 0.44390638 0.01462012 0.33333333]
  [0.57330827 0.46534653 0.6961326  ... 0.4433212  0.01367133 0.33333333]
  [0.56203008 0.52277228 0.50276243 ... 0.45985221 0.01400156 0.33333333]
  ...
  [0.33270677 0.27722772 0.51381215 ... 0.49813002 0.01568867 0.33333333]
  [0.43796992 0.33663366 0.65745856 ... 0.49770986 0.01445442 0.33333333]
  [0.46992481 0.3980198  0.5801105  ... 0.51001541 0.02675227 0.33333333]]

 [[0.57330827 0.46534653 0.6961326  ... 0.4433212  0.01367133 0.33333333]
  [0.56203008 0.52277228 0.50276243 ... 0.45985221 0.01400156 0.33333333]
  [0.5093985  0.59207921 0.15469613 ... 0.41874377 0.06701445 0.33333333]
  ...
  [0.43796992 0.33663366 0.65745856 ... 0.49770986 0.01445442 0.33333333]
  [0.46992481 0.3980198  0.5801105  ... 0.51001541 0.02675227 0.33333333]
  [0.56390977 0.43168317 0.76243094 ... 0.51403168 0.0129588  0.33333333]]

 [[0.56203008 0.52277228 0.50276243 ... 0.45985221 0.01400156 0.33333333]
  [0.509398

tf.Tensor(
[[[0.79887218 0.81386139 0.38674033 ... 0.96557019 0.02095175 0.66666667]
  [0.85902256 0.7980198  0.60773481 ... 0.9640678  0.01858198 0.66666667]
  [0.87406015 0.82178218 0.58563536 ... 0.96420416 0.0210314  0.66666667]
  ...
  [0.78007519 0.82772277 0.29281768 ... 0.94715852 0.14060995 0.66666667]
  [0.68421053 0.81584158 0.0441989  ... 0.90941461 0.26011236 0.66666667]
  [0.73120301 0.80792079 0.20441989 ... 0.8716707  0.37961477 0.66666667]]

 [[0.85902256 0.7980198  0.60773481 ... 0.9640678  0.01858198 0.66666667]
  [0.87406015 0.82178218 0.58563536 ... 0.96420416 0.0210314  0.66666667]
  [0.7612782  0.8        0.31491713 ... 0.96376932 0.02927919 0.66666667]
  ...
  [0.68421053 0.81584158 0.0441989  ... 0.90941461 0.26011236 0.66666667]
  [0.73120301 0.80792079 0.20441989 ... 0.8716707  0.37961477 0.66666667]
  [0.86842105 0.84554455 0.50276243 ... 0.94638936 0.01818095 0.66666667]]

 [[0.87406015 0.82178218 0.58563536 ... 0.96420416 0.0210314  0.66666667]
  [0.761278

In [None]:
cat = CatBoostRegressor().fit()