# load python modules

In [None]:
import pandas as pd

In [None]:
import numpy as np

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score, balanced_accuracy_score

# read in input files

## pheno

In [None]:
adsp_pheno = pd.read_csv('adsp_filt_phenos/ADSPIntegratedPhenotypes_DS_2023.08.08.csv')
print(len(adsp_pheno.index))
adsp_pheno.head()

In [None]:
adsp_pheno_sample = pd.read_csv('input/ADSP_phenotype.80%_train.txt',
                                sep = ' ')
adsp_pheno_sample.head()

## id map

In [None]:
id_map = pd.read_csv('id_map/ADSP.ROSMAP.MSBB.id_map.txt',
                      sep = '\t')
id_map.head()

# clean pheno file

In [None]:
adsp_pheno_id = id_map.merge(adsp_pheno, on = 'SampleID', how = 'inner')
adsp_pheno_id = adsp_pheno_id[['CommonID', 'DX_harmonized']]
adsp_pheno_id.rename(columns = {'CommonID' : 'ID',
                                'DX_harmonized' : 'AD'}, inplace = True)
print(len(adsp_pheno_id.index))
adsp_pheno_id.dropna(inplace = True)
adsp_pheno_id.reset_index(drop = True, inplace = True)
print(len(adsp_pheno_id.index))
adsp_pheno_id.head()

# define number of samples and features

In [None]:
n_samples = 22365
n_samples

In [None]:
n_features_100 = 100
n_features_100

In [None]:
n_features_2722 = 2722
n_features_2722

# create negative control

## 100 features

In [None]:
neg_control_100_x = np.random.randn(n_samples, n_features_100)

In [None]:
neg_control_100_x_df = pd.DataFrame(neg_control_100_x, columns=[f"feature_{i}" for i in range(n_features_100)])
print(neg_control_100_x_df.shape)

In [None]:
neg_control_100 = pd.concat([adsp_pheno_sample, neg_control_100_x_df], axis = 1)
print(neg_control_100.shape)
neg_control_100.head()

## 2722 features

In [None]:
neg_control_2722_x = np.random.randn(n_samples, n_features_2722)

In [None]:
neg_control_2722_x_df = pd.DataFrame(neg_control_2722_x, columns=[f"feature_{i}" for i in range(n_features_2722)])
print(neg_control_2722_x_df.shape)

In [None]:
neg_control_2722 = pd.concat([adsp_pheno_sample, neg_control_2722_x_df], axis = 1)
print(neg_control_2722.shape)
neg_control_2722.head()

# test negative control

## 100 features

In [None]:
print(neg_control_100.drop(columns = ['ID', 'AD']).min().min())
print(neg_control_100.drop(columns = ['ID', 'AD']).max().max())

In [None]:
x_100 = neg_control_100.drop(columns = ['ID', 'AD'])

In [None]:
y_100 = neg_control_100['AD']

In [None]:
x_train_100, x_test_100, y_train_100, y_test_100 = train_test_split(x_100, y_100, stratify = y_100, test_size = 0.3, random_state = 7)

In [None]:
clf_100 = LogisticRegression()
clf_100.fit(x_train_100, y_train_100)
y_pred_100 = clf_100.predict(x_test_100)

In [None]:
auroc_neg_100 = roc_auc_score(y_test_100, y_pred_100)
print(auroc_neg_100)
auprc_neg_100 = average_precision_score(y_test_100, y_pred_100)
print(auprc_neg_100)
f1_neg_100 = f1_score(y_test_100, y_pred_100)
print(f1_neg_100)
bal_acc_neg_100 = balanced_accuracy_score(y_test_100, y_pred_100)
print(bal_acc_neg_100)

## 2722 features

In [None]:
print(neg_control_2722.drop(columns = ['ID', 'AD']).min().min())
print(neg_control_2722.drop(columns = ['ID', 'AD']).max().max())

In [None]:
x_2722 = neg_control_2722.drop(columns = ['ID', 'AD'])

In [None]:
y_2722 = neg_control_2722['AD']

In [None]:
x_train_2722, x_test_2722, y_train_2722, y_test_2722 = train_test_split(x_2722, y_2722, stratify = y_2722, test_size = 0.3, random_state = 7)

In [None]:
clf_2722 = LogisticRegression()
clf_2722.fit(x_train_2722, y_train_2722)
y_pred_2722 = clf_2722.predict(x_test_2722)

In [None]:
auroc_neg_2722 = roc_auc_score(y_test_2722, y_pred_2722)
print(auroc_neg_2722)
auprc_neg_2722 = average_precision_score(y_test_2722, y_pred_2722)
print(auprc_neg_2722)
f1_neg_2722 = f1_score(y_test_2722, y_pred_2722)
print(f1_neg_2722)
bal_acc_neg_2722 = balanced_accuracy_score(y_test_2722, y_pred_2722)
print(bal_acc_neg_2722)

# create positive control

In [None]:
signal_features_2 = [0, 1]

In [None]:
signal_features_3 = [0, 1, 2]

In [None]:
signal_features_4 = [0, 1, 2, 3]

In [None]:
signal_features_5 = [0, 1, 2, 3, 4]

## 100 features

In [None]:
y_100 = neg_control_100['AD']

### 2 features with signal

In [None]:
signal_strength_100_2f = 1.5

pos_control_100_2f_x = np.random.normal(loc = 0.0, scale = 1.0, size = (n_samples, n_features_100))

for f in signal_features_2:
    pos_control_100_2f_x[y_100 == 1, f] = np.random.normal(loc = signal_strength_100_2f, scale = 1.0, size = (y_100 == 1).sum())
pos_control_100_2f_x_df = pd.DataFrame(pos_control_100_2f_x, columns=[f"feature_{i}" for i in range(n_features_100)])
pos_control_100_2f = pd.concat([adsp_pheno_sample, pos_control_100_2f_x_df], axis = 1)
print(pos_control_100_2f.shape)

### 3 features with signal

In [None]:
signal_strength_100_3f = 1.2

pos_control_100_3f_x = np.random.normal(loc = 0.0, scale = 1.0, size = (n_samples, n_features_100))

for f in signal_features_3:
    pos_control_100_3f_x[y_100 == 1, f] = np.random.normal(loc = signal_strength_100_3f, scale = 1.0, size = (y_100 == 1).sum())
pos_control_100_3f_x_df = pd.DataFrame(pos_control_100_3f_x, columns=[f"feature_{i}" for i in range(n_features_100)])
pos_control_100_3f = pd.concat([adsp_pheno_sample, pos_control_100_3f_x_df], axis = 1)
print(pos_control_100_3f.shape)

### 4 features with signal

In [None]:
signal_strength_100_4f = 1

pos_control_100_4f_x = np.random.normal(loc = 0.0, scale = 1.0, size = (n_samples, n_features_100))

for f in signal_features_4:
    pos_control_100_4f_x[y_100 == 1, f] = np.random.normal(loc = signal_strength_100_4f, scale = 1.0, size = (y_100 == 1).sum())
pos_control_100_4f_x_df = pd.DataFrame(pos_control_100_4f_x, columns=[f"feature_{i}" for i in range(n_features_100)])
pos_control_100_4f = pd.concat([adsp_pheno_sample, pos_control_100_4f_x_df], axis = 1)
print(pos_control_100_4f.shape)

### 5 features w signal

In [None]:
signal_strength_100_5f = 0.9

pos_control_100_5f_x = np.random.normal(loc = 0.0, scale = 1.0, size = (n_samples, n_features_100))

for f in signal_features_5:
    pos_control_100_5f_x[y_100 == 1, f] = np.random.normal(loc = signal_strength_100_5f, scale = 1.0, size = (y_100 == 1).sum())
pos_control_100_5f_x_df = pd.DataFrame(pos_control_100_5f_x, columns=[f"feature_{i}" for i in range(n_features_100)])
pos_control_100_5f = pd.concat([adsp_pheno_sample, pos_control_100_5f_x_df], axis = 1)
print(pos_control_100_5f.shape)

## 2722 features

### 2 features with signal

In [None]:
signal_strength_2722_2f = 1.7

pos_control_2722_2f_x = np.random.normal(loc = 0.0, scale = 1.0, size = (n_samples, n_features_2722))

for f in signal_features_2:
    pos_control_2722_2f_x[y_2722 == 1, f] = np.random.normal(loc = signal_strength_2722_2f, scale = 1.0, size = (y_2722 == 1).sum())
pos_control_2722_2f_x_df = pd.DataFrame(pos_control_2722_2f_x, columns=[f"feature_{i}" for i in range(n_features_2722)])
pos_control_2722_2f = pd.concat([adsp_pheno_sample, pos_control_2722_2f_x_df], axis = 1)
print(pos_control_2722_2f.shape)

### 3 features with signal

In [None]:
signal_strength_2722_3f = 1.4

pos_control_2722_3f_x = np.random.normal(loc = 0.0, scale = 1.0, size = (n_samples, n_features_2722))

for f in signal_features_3:
    pos_control_2722_3f_x[y_2722 == 1, f] = np.random.normal(loc = signal_strength_2722_3f, scale = 1.0, size = (y_2722 == 1).sum())
pos_control_2722_3f_x_df = pd.DataFrame(pos_control_2722_3f_x, columns=[f"feature_{i}" for i in range(n_features_2722)])
pos_control_2722_3f = pd.concat([adsp_pheno_sample, pos_control_2722_3f_x_df], axis = 1)
print(pos_control_2722_3f.shape)

### 4 features with signal

In [None]:
signal_strength_2722_4f = 1.2

pos_control_2722_4f_x = np.random.normal(loc = 0.0, scale = 1.0, size = (n_samples, n_features_2722))

for f in signal_features_4:
    pos_control_2722_4f_x[y_2722 == 1, f] = np.random.normal(loc = signal_strength_2722_4f, scale = 1.0, size = (y_2722 == 1).sum())
pos_control_2722_4f_x_df = pd.DataFrame(pos_control_2722_4f_x, columns=[f"feature_{i}" for i in range(n_features_2722)])
pos_control_2722_4f = pd.concat([adsp_pheno_sample, pos_control_2722_4f_x_df], axis = 1)
print(pos_control_2722_4f.shape)

### 5 features with signal

In [None]:
signal_strength_2722_5f = 1.1

pos_control_2722_5f_x = np.random.normal(loc = 0.0, scale = 1.0, size = (n_samples, n_features_2722))

for f in signal_features_5:
    pos_control_2722_5f_x[y_2722 == 1, f] = np.random.normal(loc = signal_strength_2722_5f, scale = 1.0, size = (y_2722 == 1).sum())
pos_control_2722_5f_x_df = pd.DataFrame(pos_control_2722_5f_x, columns=[f"feature_{i}" for i in range(n_features_2722)])
pos_control_2722_5f = pd.concat([adsp_pheno_sample, pos_control_2722_5f_x_df], axis = 1)
print(pos_control_2722_5f.shape)

# test positive control

## 100 features

### 2 features with signal

In [None]:
print(pos_control_100_2f.drop(columns = ['ID', 'AD']).min().min())
print(pos_control_100_2f.drop(columns = ['ID', 'AD']).max().max())
print(' ')
x_100_2f = pos_control_100_2f.drop(columns = ['ID', 'AD'])
y_100_2f = pos_control_100_2f['AD']
x_train_100_2f, x_test_100_2f, y_train_100_2f, y_test_100_2f = train_test_split(x_100_2f, y_100_2f, stratify = y_100_2f, test_size = 0.3, random_state = 7)
clf_100_2f = LogisticRegression()
clf_100_2f.fit(x_train_100_2f, y_train_100_2f)
y_pred_100_2f = clf_100_2f.predict(x_test_100_2f)

auroc_pos_100_2f = roc_auc_score(y_test_100_2f, y_pred_100_2f)
print(auroc_pos_100_2f)
auprc_pos_100_2f = average_precision_score(y_test_100_2f, y_pred_100_2f)
print(auprc_pos_100_2f)
f1_pos_100_2f = f1_score(y_test_100_2f, y_pred_100_2f)
print(f1_pos_100_2f)
bal_acc_pos_100_2f = balanced_accuracy_score(y_test_100_2f, y_pred_100_2f)
print(bal_acc_pos_100_2f)

### 3 features with signal

In [None]:
print(pos_control_100_3f.drop(columns = ['ID', 'AD']).min().min())
print(pos_control_100_3f.drop(columns = ['ID', 'AD']).max().max())
print(' ')

x_100_3f = pos_control_100_3f.drop(columns = ['ID', 'AD'])
y_100_3f = pos_control_100_3f['AD']
x_train_100_3f, x_test_100_3f, y_train_100_3f, y_test_100_3f = train_test_split(x_100_3f, y_100_3f, stratify = y_100_3f, test_size = 0.3, random_state = 7)
clf_100_3f = LogisticRegression()
clf_100_3f.fit(x_train_100_3f, y_train_100_3f)
y_pred_100_3f = clf_100_3f.predict(x_test_100_3f)

auroc_pos_100_3f = roc_auc_score(y_test_100_3f, y_pred_100_3f)
print(auroc_pos_100_3f)
auprc_pos_100_3f = average_precision_score(y_test_100_3f, y_pred_100_3f)
print(auprc_pos_100_3f)
f1_pos_100_3f = f1_score(y_test_100_3f, y_pred_100_3f)
print(f1_pos_100_3f)
bal_acc_pos_100_3f = balanced_accuracy_score(y_test_100_3f, y_pred_100_3f)
print(bal_acc_pos_100_3f)

### 4 features with signal

In [None]:
print(pos_control_100_4f.drop(columns = ['ID', 'AD']).min().min())
print(pos_control_100_4f.drop(columns = ['ID', 'AD']).max().max())
print(' ')

x_100_4f = pos_control_100_4f.drop(columns = ['ID', 'AD'])
y_100_4f = pos_control_100_4f['AD']
x_train_100_4f, x_test_100_4f, y_train_100_4f, y_test_100_4f = train_test_split(x_100_4f, y_100_4f, stratify = y_100_4f, test_size = 0.3, random_state = 7)
clf_100_4f = LogisticRegression()
clf_100_4f.fit(x_train_100_4f, y_train_100_4f)
y_pred_100_4f = clf_100_4f.predict(x_test_100_4f)
bal_acc_100_4f = balanced_accuracy_score(y_test_100_4f, y_pred_100_4f)

auroc_pos_100_4f = roc_auc_score(y_test_100_4f, y_pred_100_4f)
print(auroc_pos_100_4f)
auprc_pos_100_4f = average_precision_score(y_test_100_4f, y_pred_100_4f)
print(auprc_pos_100_4f)
f1_pos_100_4f = f1_score(y_test_100_4f, y_pred_100_4f)
print(f1_pos_100_4f)
bal_acc_pos_100_4f = balanced_accuracy_score(y_test_100_4f, y_pred_100_4f)
print(bal_acc_pos_100_4f)

### 5 features with signal

In [None]:
print(pos_control_100_5f.drop(columns = ['ID', 'AD']).min().min())
print(pos_control_100_5f.drop(columns = ['ID', 'AD']).max().max())
print(' ')
x_100_5f = pos_control_100_5f.drop(columns = ['ID', 'AD'])
y_100_5f = pos_control_100_5f['AD']
x_train_100_5f, x_test_100_5f, y_train_100_5f, y_test_100_5f = train_test_split(x_100_5f, y_100_5f, stratify = y_100_5f, test_size = 0.3, random_state = 7)
clf_100_5f = LogisticRegression()
clf_100_5f.fit(x_train_100_5f, y_train_100_5f)
y_pred_100_5f = clf_100_5f.predict(x_test_100_5f)
bal_acc_100_5f = balanced_accuracy_score(y_test_100_5f, y_pred_100_5f)

auroc_pos_100_5f = roc_auc_score(y_test_100_5f, y_pred_100_5f)
print(auroc_pos_100_5f)
auprc_pos_100_5f = average_precision_score(y_test_100_5f, y_pred_100_5f)
print(auprc_pos_100_5f)
f1_pos_100_5f = f1_score(y_test_100_5f, y_pred_100_5f)
print(f1_pos_100_5f)
bal_acc_pos_100_5f = balanced_accuracy_score(y_test_100_5f, y_pred_100_5f)
print(bal_acc_pos_100_5f)

## 2722 features

### 2 features w signal

In [None]:
print(pos_control_2722_2f.drop(columns = ['ID', 'AD']).min().min())
print(pos_control_2722_2f.drop(columns = ['ID', 'AD']).max().max())
print(' ')

x_2722_2f = pos_control_2722_2f.drop(columns = ['ID', 'AD'])
y_2722_2f = pos_control_2722_2f['AD']
x_train_2722_2f, x_test_2722_2f, y_train_2722_2f, y_test_2722_2f = train_test_split(x_2722_2f, y_2722_2f, stratify = y_2722_2f, test_size = 0.3, random_state = 7)
clf_2722_2f = LogisticRegression()
clf_2722_2f.fit(x_train_2722_2f, y_train_2722_2f)
y_pred_2722_2f = clf_2722_2f.predict(x_test_2722_2f)

auroc_pos_2722_2f = roc_auc_score(y_test_2722_2f, y_pred_2722_2f)
print(auroc_pos_2722_2f)
auprc_pos_2722_2f = average_precision_score(y_test_2722_2f, y_pred_2722_2f)
print(auprc_pos_2722_2f)
f1_pos_2722_2f = f1_score(y_test_2722_2f, y_pred_2722_2f)
print(f1_pos_2722_2f)
bal_acc_pos_2722_2f = balanced_accuracy_score(y_test_2722_2f, y_pred_2722_2f)
print(bal_acc_pos_2722_2f)

### 3 features w signal

In [None]:
print(pos_control_2722_3f.drop(columns = ['ID', 'AD']).min().min())
print(pos_control_2722_3f.drop(columns = ['ID', 'AD']).max().max())
print(' ')

x_2722_3f = pos_control_2722_3f.drop(columns = ['ID', 'AD'])
y_2722_3f = pos_control_2722_3f['AD']
x_train_2722_3f, x_test_2722_3f, y_train_2722_3f, y_test_2722_3f = train_test_split(x_2722_3f, y_2722_3f, stratify = y_2722_3f, test_size = 0.3, random_state = 7)
clf_2722_3f = LogisticRegression()
clf_2722_3f.fit(x_train_2722_3f, y_train_2722_3f)
y_pred_2722_3f = clf_2722_3f.predict(x_test_2722_3f)

auroc_pos_2722_3f = roc_auc_score(y_test_2722_3f, y_pred_2722_3f)
print(auroc_pos_2722_3f)
auprc_pos_2722_3f = average_precision_score(y_test_2722_3f, y_pred_2722_3f)
print(auprc_pos_2722_3f)
f1_pos_2722_3f = f1_score(y_test_2722_3f, y_pred_2722_3f)
print(f1_pos_2722_3f)
bal_acc_pos_2722_3f = balanced_accuracy_score(y_test_2722_3f, y_pred_2722_3f)
print(bal_acc_pos_2722_3f)

### 4 features with signal

In [None]:
print(pos_control_2722_4f.drop(columns = ['ID', 'AD']).min().min())
print(pos_control_2722_4f.drop(columns = ['ID', 'AD']).max().max())
print(' ')

x_2722_4f = pos_control_2722_4f.drop(columns = ['ID', 'AD'])
y_2722_4f = pos_control_2722_4f['AD']
x_train_2722_4f, x_test_2722_4f, y_train_2722_4f, y_test_2722_4f = train_test_split(x_2722_4f, y_2722_4f, stratify = y_2722_4f, test_size = 0.3, random_state = 7)
clf_2722_4f = LogisticRegression()
clf_2722_4f.fit(x_train_2722_4f, y_train_2722_4f)
y_pred_2722_4f = clf_2722_4f.predict(x_test_2722_4f)

auroc_pos_2722_4f = roc_auc_score(y_test_2722_4f, y_pred_2722_4f)
print(auroc_pos_2722_4f)
auprc_pos_2722_4f = average_precision_score(y_test_2722_4f, y_pred_2722_4f)
print(auprc_pos_2722_4f)
f1_pos_2722_4f = f1_score(y_test_2722_4f, y_pred_2722_4f)
print(f1_pos_2722_4f)
bal_acc_pos_2722_4f = balanced_accuracy_score(y_test_2722_4f, y_pred_2722_4f)
print(bal_acc_pos_2722_4f)

### 5 features with signal

In [None]:
print(pos_control_2722_5f.drop(columns = ['ID', 'AD']).min().min())
print(pos_control_2722_5f.drop(columns = ['ID', 'AD']).max().max())
print(' ')

x_2722_5f = pos_control_2722_5f.drop(columns = ['ID', 'AD'])
y_2722_5f = pos_control_2722_5f['AD']
x_train_2722_5f, x_test_2722_5f, y_train_2722_5f, y_test_2722_5f = train_test_split(x_2722_5f, y_2722_5f, stratify = y_2722_5f, test_size = 0.3, random_state = 7)
clf_2722_5f = LogisticRegression()
clf_2722_5f.fit(x_train_2722_5f, y_train_2722_5f)
y_pred_2722_5f = clf_2722_5f.predict(x_test_2722_5f)

auroc_pos_2722_5f = roc_auc_score(y_test_2722_5f, y_pred_2722_5f)
print(auroc_pos_2722_5f)
auprc_pos_2722_5f = average_precision_score(y_test_2722_5f, y_pred_2722_5f)
print(auprc_pos_2722_5f)
f1_pos_2722_5f = f1_score(y_test_2722_5f, y_pred_2722_5f)
print(f1_pos_2722_5f)
bal_acc_pos_2722_5f = balanced_accuracy_score(y_test_2722_5f, y_pred_2722_5f)
print(bal_acc_pos_2722_5f)

# remove AD column

## negative control

### 100 features

In [None]:
neg_control_100_export = neg_control_100.drop(columns = ['AD'])

### 2722 features

In [None]:
neg_control_2722_export = neg_control_2722.drop(columns = ['AD'])

## positive control

### 100 features

In [None]:
pos_control_100_2f_export = pos_control_100_2f.drop(columns = ['AD'])

In [None]:
pos_control_100_3f_export = pos_control_100_3f.drop(columns = ['AD'])

In [None]:
pos_control_100_4f_export = pos_control_100_4f.drop(columns = ['AD'])

In [None]:
pos_control_100_5f_export = pos_control_100_5f.drop(columns = ['AD'])

### 2722 features

In [None]:
pos_control_2722_2f_export = pos_control_2722_2f.drop(columns = ['AD'])

In [None]:
pos_control_2722_3f_export = pos_control_2722_3f.drop(columns = ['AD'])

In [None]:
pos_control_2722_4f_export = pos_control_2722_4f.drop(columns = ['AD'])

In [None]:
pos_control_2722_5f_export = pos_control_2722_5f.drop(columns = ['AD'])

# export

## negative controls

In [None]:
neg_control_100_export.to_csv('simulated_datasets/ADSP.simulated.100_features.80%_train.negative_control.txt',
                              sep = ' ',
                              index = None,
                              na_rep = 'NaN')

In [None]:
neg_control_2722_export.to_csv('simulated_datasets/ADSP.simulated.2722_features.80%_train.negative_control.txt',
                               sep = ' ',
                               index = None,
                               na_rep = 'NaN')

## positive controls

### 100 features

In [None]:
pos_control_100_2f_export.to_csv('simulated_datasets/ADSP.simulated.100_features.features_0-1_signal.80%_train.positive_control.txt',
                                 sep = ' ',
                                 index = None,
                                 na_rep = 'NaN')

In [None]:
pos_control_100_3f_export.to_csv('simulated_datasets/ADSP.simulated.100_features.features_0-2_signal.80%_train.positive_control.txt',
                                 sep = ' ',
                                 index = None,
                                 na_rep = 'NaN')

In [None]:
pos_control_100_4f_export.to_csv('simulated_datasets/ADSP.simulated.100_features.features_0-3_signal.80%_train.positive_control.txt',
                                 sep = ' ',
                                 index = None,
                                 na_rep = 'NaN')

In [None]:
pos_control_100_5f_export.to_csv('simulated_datasets/ADSP.simulated.100_features.features_0-4_signal.80%_train.positive_control.txt',
                                 sep = ' ',
                                 index = None,
                                 na_rep = 'NaN')

### 2722 features

In [None]:
pos_control_2722_2f_export.to_csv('simulated_datasets/ADSP.simulated.2722_features.features_0-1_signal.80%_train.positive_control.txt',
                                  sep = ' ',
                                  index = None,
                                  na_rep = 'NaN')

In [None]:
pos_control_2722_3f_export.to_csv('simulated_datasets/ADSP.simulated.2722_features.features_0-2_signal.80%_train.positive_control.txt',
                                  sep = ' ',
                                  index = None,
                                  na_rep = 'NaN')

In [None]:
pos_control_2722_4f_export.to_csv('simulated_datasets/ADSP.simulated.2722_features.features_0-3_signal.80%_train.positive_control.txt',
                                  sep = ' ',
                                  index = None,
                                  na_rep = 'NaN')

In [None]:
pos_control_2722_5f_export.to_csv('simulated_datasets/ADSP.simulated.2722_features.features_0-4_signal.80%_train.positive_control.txt',
                                  sep = ' ',
                                  index = None,
                                  na_rep = 'NaN')