## Load Libraries and Define Paths

In [1]:
import os
import sys

In [2]:
sys.path.insert(0, '/Users/joshua/Developer/civetqc')

In [3]:
from civetqc.data import CIVETData, QCRatingsData, Dataset

In [4]:
RAW_DATA_DIR = '/Users/joshua/Developer/civetqc/data/raw'
PROCESSED_DATA_DIR = '/Users/joshua/Developer/civetqc/data/processed'

## Create Training Data

In [5]:
def create_training_dataset():
    dataset = Dataset()
    for study_name in ["FEP", "INSIGHT", "LAM", "TOPSY", "NUSDAST"]:
        civet_data = CIVETData.from_csv(os.path.join(RAW_DATA_DIR, study_name, f"{study_name}_civet_data.csv"))
        qc_ratings_data = QCRatingsData.from_csv(os.path.join(RAW_DATA_DIR, study_name, f"{study_name}_qc_data.csv"))
        qc_ratings_data.apply_cutoff(1)
        dataset.append(Dataset.from_merge(civet_data, qc_ratings_data))
    return dataset

In [6]:
training_dataset = create_training_dataset()

In [7]:
training_dataset.to_csv(os.path.join(PROCESSED_DATA_DIR, 'training_dataset.csv'), index=False)

## Create Testing Data

In [8]:
def create_testing_dataset():
    civet_data = CIVETData.from_csv(os.path.join(RAW_DATA_DIR, 'UKBB', 'UKBB_civet_data.csv'))
    qc_ratings_data = QCRatingsData.from_csv(os.path.join(RAW_DATA_DIR, 'UKBB', 'UKBB_qc_data.csv'))
    qc_ratings_data.apply_cutoff(1)
    return Dataset.from_merge(civet_data, qc_ratings_data)

In [9]:
testing_dataset = create_testing_dataset()

In [10]:
testing_dataset.to_csv(os.path.join(PROCESSED_DATA_DIR, 'testing_dataset.csv'), index=False)