In [1]:
import pandas as pd
import random
import numpy as np

In [7]:
import boto3
import pickle
import io
from urllib.parse import urlparse

def load_pickle_from_s3(s3_uri):
    # S3 URIを分解
    parsed = urlparse(s3_uri)
    bucket = parsed.netloc
    key = parsed.path.lstrip("/")

    # S3からバイナリデータを取得
    s3 = boto3.client("s3")
    response = s3.get_object(Bucket=bucket, Key=key)
    binary_data = response['Body'].read()

    # pickleとして読み込む
    return pickle.load(io.BytesIO(binary_data))

# 使用例
X_train_pkl = "s3://datascience-keigo/kaggle-SpaceshipTitanic/processed/CV_1/X_train.pkl"
Y_train_pkl = "s3://datascience-keigo/kaggle-SpaceshipTitanic/processed/CV_1/Y_train.pkl"
X_valid_pkl = "s3://datascience-keigo/kaggle-SpaceshipTitanic/processed/CV_1/X_valid.pkl"
Y_valid_pkl = "s3://datascience-keigo/kaggle-SpaceshipTitanic/processed/CV_1/Y_valid.pkl"
train_X = load_pickle_from_s3(X_train_pkl)
train_Y = load_pickle_from_s3(Y_train_pkl)
valid_X = load_pickle_from_s3(X_valid_pkl)
valid_Y = load_pickle_from_s3(Y_valid_pkl)
print("✅ 読み込み完了:", type(X_train))

✅ 読み込み完了: <class 'list'>


In [8]:
import lightgbm as lgb
import sklearn.metrics import accuracy_scoree

# prameta settings
lgbm = {
    "Objective": "binary",
    "random_seed": 1234
}
# model格納用リスト
models

for i in range(3):
    X_train = train_X[i]
    X_valid = valid_X[i]
    Y_train = train_Y[i]
    Y_valid = valid_Y[i]

    lgb_train = lgb.Dataset(X_train, Y_train)
    lgb_eval = lgb.Dataset(X_valid, Y_valid, reference=lgb_train)

    model_lgb = lgb.train(lgbm_params,
                          lgb_train,
                          valid_sets=lgb_eval,
                          num_boost_round=100,
                          early_stopping_round=20,
                          verbose_eval=10
                         )
    y_pred = model_lgb.predict(X_valid, num_iteration=model_lgb.best_iteration)
    print(accuracy_score(y_valid, np.round(y_pred)))
    models.append(model_lgb)

In [None]:
preds = []
for model in models:
    pred = model.predict(test_X)
    preds.append(pred)

In [None]:
preds_array = np.array(preds)
preds_mean = np.mean(preds_array, axis=0)

In [None]:
preds_int = (preds_mean > 0.5).astype(int)