<a href="https://colab.research.google.com/github/loveseaslug/causal_inferences/blob/main/optuna_tuning_for_sparceSC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install git+https://github.com/microsoft/SparseSC.git
!pip install optuna

In [None]:
import numpy as np
import pandas as pd
import SparseSC
import optuna
optuna.logging.disable_default_handler()

In [None]:
def shuffle_features(random_state, extract_nums, features, postspective, treatment_units_index):
  #np.random.permutationのseed固定はこれ
  np.random.seed(int(random_state))

  #現状のコントロール群のindexをランダムに並び替えて、上から一定数選択することでランダム化する。その結果選択されたindexを保持する
  shuffled_index = np.random.permutation(np.arange(0,len(features)-1))[:extract_nums]

  #treatmentのindexを忘れないようにくっつける
  shuffled_index_with_treatment = np.concatenate((shuffled_index, treatment_units_index))

  # ランダムコントロール群を抽出した結果の特徴量と使ったindex
  f = features[shuffled_index_with_treatment]
  p = postspective[shuffled_index_with_treatment]
  t = [len(shuffled_index)]

  return f, p, t, shuffled_index_with_treatment

In [None]:
def score_function(preds, target):
  score = np.mean((preds - target)**2)
  return score

def objective_variable_degree(features, postspective, treatment_units_index):

  def objective(trial): ###optuna

    #コントロール群を選ぶ時のシード
    s = trial.suggest_categorical('state', np.arange(0, 10).astype(str))
    mt = trial.suggest_categorical('model_type', ['retrospective', 'prospective'])
    #選択する組数をランダムサンプル
    r = trial.suggest_int('samples', 10, 36)

    #元のデータからコントロール群を選択してくる関数
    m_f, m_p, m_t, _ = shuffle_features(random_state=s, extract_nums=r, features=features, postspective=postspective, treatment_units_index=treatment_units_index)

    #SCM学習
    model = SparseSC.fit_fast(features=m_f, targets=m_p, treated_units=m_t,
                                 w_pens=np.logspace(start=-4, stop=3, num=1000),model_type=mt)
    #予測結果のscore function (MSE)
    preds = model.predict(m_f)[m_t]
    score = score_function(preds, m_f[m_t])

    if score < 1:
      trial.study.stop()

    return score

  return objective

In [None]:
db_name = f"{control}_{treatment}"
study_name = f'{control}_{treatment}_example-study'
#コントロール群の中から適当に選択して、SCMのfittingを最適化（過学習）している

study = optuna.create_study(direction="minimize", study_name=study_name, storage=f'sqlite:///../optuna_study_{db_name}.db',load_if_exists=True)

study.optimize(objective_variable_degree(features=features, postspective=postspective, treatment_units_index=treatment_units_index),
                n_trials=10, show_progress_bar=False)