In [69]:
# import libraries
import os
import pandas as pd

from src.constants import Defaults

%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Make interim data files (preprocess the raw data files)

In [None]:
from src.scripts import make_interim_datasets

make_interim_datasets.run(raw_dir=Dirs.raw_dir, interim_dir=Dirs.interim_dir)

## Make spec files

In [70]:
from src.scripts import make_specs

# where specs will be saved
SPEC_DIR = ''

make_specs.run(out_dir=SPEC_DIR)

created pydraml specs, saved to /Users/maedbhking/Documents/hbn_data
created participant specs, saved to /Users/maedbhking/Documents/hbn_data
created feature specs, saved to /Users/maedbhking/Documents/hbn_data
created target specs, saved to /Users/maedbhking/Documents/hbn_data


## Make train/test splits

In [59]:
import os
from src.scripts import make_train_test_split

DATA_DIR = '/Users/maedbhking/Documents/hbn_data/'

# outpath for train/test data
outpath = os.path.join(DATA_DIR, 'participants_train_test.csv')

# path to dataframe you want to split
fpath = os.path.join(DATA_DIR, 'Clinical_Diagnosis_Demographics.csv')

# which columns of dataframe do you want to stratify
columns_to_stratify = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']

# which column indicates participant identifiers
columns_to_keep = ['Identifiers']

make_train_test_split.run(fpath=fpath, 
                         outpath=outpath,
                         columns_to_stratify=columns_to_stratify,
                         columns_to_keep=columns_to_keep,
                         test_size=.2,
                         random_state=42,
                         )

train/test splits saved to /Users/maedbhking/Documents/hbn_data/participants_train_test.csv


## Make firstlevel model

In [68]:
import glob

# list spec files
list(glob.glob(os.path.join(SPEC_DIR, '*spec.json*')))

['/Users/maedbhking/Documents/hbn_data/target1-spec.json',
 '/Users/maedbhking/Documents/hbn_data/pydraml1-spec.json',
 '/Users/maedbhking/Documents/hbn_data/target_DX_01_Cat_binarize-spec.json',
 '/Users/maedbhking/Documents/hbn_data/features-Teacher_Measures-domains-Peer_Affiliation_and_Social_Acceptance_-Dishion_Teacher-spec.json',
 '/Users/maedbhking/Documents/hbn_data/features1-spec.json',
 '/Users/maedbhking/Documents/hbn_data/participant1-spec.json',
 '/Users/maedbhking/Documents/hbn_data/pydraml2-spec.json']

In [67]:
from src.scripts import make_model

# where spec files are saved
SPEC_DIR = '' # directory where spec files are stored
DATA_DIR = '' # directory where data are stored 
OUT_DIR = '' # directory where model features + spec should be saved

# fullpath to feature/target/participant specs
feature_spec = os.path.join(SPEC_DIR, 'features1-spec.json')
target_spec = os.path.join(SPEC_DIR, 'target1-spec.json')
participant_spec = os.path.join(SPEC_DIR, 'participant1-spec.json')
pydraml_spec = os.path.join(SPEC_DIR, 'pydraml1-spec.json')

# make model spec and features
make_model.run(
    feature_spec,
    target_spec,
    participant_spec,
    pydraml_spec,
    data_dir=DATA_DIR,
    out_dir=OUT_DIR
    )

creating /Users/maedbhking/Documents/hbn_data/test123
created new file: features-766723.csv and model spec file: model_spec-766723.json in /Users/maedbhking/Documents/hbn_data/test123


## Run firstlevel model(s): train and get model summary

In [56]:
from src.scripts import run_model

models = [] # list of model directories to run (e.g., `../processed/models/<model_name>`)

# loop over models
for model in models:
    # get fullpath to model directory
    # model spec file and model features should also be in `model_path`

    model_dir = os.path.join(Dirs.model_dir, model) # directory where model results will be saved
    
    # train model and get model summary
    run_model.run(
        model_dir=model_dir,
        cache_dir=None
    )

## Make secondlevel models

In [None]:
from src.scripts import make_secondlevel_model
import os


# firstlevel models
models = [] # list of model directories to run (e.g., `../processed/models/<model_name>`)

# loop over models
for model in models:

    # define directories
    firstlevel_model_dir = os.path.join(Dirs.model_dir, model)
    secondlevel_model_dir = os.path.join(Dirs.model_dir, f'{model}_secondlevel')

    # make secondlevel models
    make_secondlevel_model.run(firstlevel_model_dir, secondlevel_model_dir)

## Run secondlevel models

In [None]:
from src.scripts import run_model
import os

models = [] # list of model directories to run (e.g., `../processed/models/<model_name>`)

# run second level models
# loop over models
for model in models:
    # get fullpath to model directory
    # model spec file and model features should also be in `model_path`
    model_dir = os.path.join(Dirs.model_dir, model) # directory where model results will be saved

    # train model and get model summary
    run_model.run(
        model_dir=model_dir,
        cache_dir=None
    )