# How to use `indigopy`
Example code for how to use the `indigopy` package.

## Set up environment

In [7]:
# Import dependencies
import pandas as pd
from scipy.stats import spearmanr
from sklearn.metrics import r2_score, classification_report
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

# Import package functions
import sys
sys.path.append('c:/Users/carol/github/INDIGOpy/')
from indigopy.core import load_sample, featurize, classify

## Example: *E. coli*

In [8]:
# Load sample data
sample = load_sample('ecoli')

# Define input arguments
key             = sample['key']
profiles        = sample['profiles']
feature_names   = sample['feature_names']
train_ixns      = sample['train']['interactions']
train_scores    = sample['train']['scores']
test_ixns       = sample['test']['interactions']
test_scores     = sample['test']['scores']

# Determine ML features
train_data      = featurize(train_ixns, profiles, feature_names=feature_names, key=key, silent=True)
test_data       = featurize(test_ixns, profiles, feature_names=feature_names, key=key, silent=True)
X_train, X_test = train_data['feature_df'].to_numpy().transpose(), test_data['feature_df'].to_numpy().transpose()

# Determine class labels
thresh, classes = (-0.5, 2), ('S', 'N', 'A')
train_labels    = classify(train_scores, thresholds=thresh, classes=classes)
test_labels     = classify(test_scores, thresholds=thresh, classes=classes)

# Train and apply a regression-based model
reg_model = RandomForestRegressor()
reg_model.fit(X_train, train_scores)
reg_y = reg_model.predict(X_test)
r, p = spearmanr(test_scores, reg_y)
r2 = r2_score(test_scores, reg_y)
print('Regression results:')
print('\tSpearman R = {}'.format(round(r, 4)))
print('\tSpearman p = {:.3g}'.format(p))
print('\tR2 = {}'.format(round(r2, 4)))

# Train and apply a classification-based model
class_model = RandomForestClassifier()
class_model.fit(X_train, train_labels)
class_y = class_model.predict(X_test)
print('Classification results:')
print(classification_report(test_labels, class_y))

Regression results:
	Spearman R = 0.6421
	Spearman p = 6.22e-09
	R2 = 0.3976
Classification results:
              precision    recall  f1-score   support

           A       0.44      0.31      0.36        13
           N       0.67      0.90      0.77        42
           S       0.00      0.00      0.00        11

    accuracy                           0.64        66
   macro avg       0.37      0.40      0.38        66
weighted avg       0.51      0.64      0.56        66



  _warn_prf(average, modifier, msg_start, len(result))


## Example: *M. tuberculosis*

In [9]:
# Load sample data
sample = load_sample('mtb')

# Define input arguments
key             = sample['key']
profiles        = sample['profiles']
feature_names   = sample['feature_names']
train_ixns      = sample['train']['interactions']
train_scores    = sample['train']['scores']
test_ixns       = sample['test']['interactions']
test_scores     = sample['test']['scores']
clinical_ixns   = sample['clinical']['interactions']
clinical_scores = sample['clinical']['scores']

# Determine ML features
train_data      = featurize(train_ixns, profiles, feature_names=feature_names, key=key, silent=True)
test_data       = featurize(test_ixns, profiles, feature_names=feature_names, key=key, silent=True)
clinical_data   = featurize(clinical_ixns, profiles, feature_names=feature_names, key=key, silent=True)
X_train, X_test = train_data['feature_df'].to_numpy().transpose(), test_data['feature_df'].to_numpy().transpose()
X_clinical      = clinical_data['feature_df'].to_numpy().transpose()

# Determine class labels
thresh, classes = (0.9, 1.1), ('S', 'N', 'A')
train_labels    = classify(train_scores, thresholds=thresh, classes=classes)
test_labels     = classify(test_scores, thresholds=thresh, classes=classes)

# Train and apply a regression-based model
reg_model = RandomForestRegressor()
reg_model.fit(X_train, train_scores)
reg_y = reg_model.predict(X_test)
r, p = spearmanr(test_scores, reg_y)
r2 = r2_score(test_scores, reg_y)
print('Regression results:')
print('\tSpearman R = {}'.format(round(r, 4)))
print('\tSpearman p = {:.3g}'.format(p))
print('\tR2 = {}'.format(round(r2, 4)))

# Train and apply a classification-based model
class_model = RandomForestClassifier()
class_model.fit(X_train, train_labels)
class_y = class_model.predict(X_test)
print('Classification results:')
print(classification_report(test_labels, class_y))

# Apply model to clinical data
clinical_y = reg_model.predict(X_clinical)
r, p = spearmanr(clinical_scores, clinical_y)
print('Clinical results:')
print('\tSpearman R = {}'.format(round(-r, 4)))
print('\tSpearman p = {:.3g}'.format(p))

Regression results:
	Spearman R = 0.5306
	Spearman p = 0.000871
	R2 = 0.1236
Classification results:
              precision    recall  f1-score   support

           A       0.50      0.25      0.33        16
           N       0.00      0.00      0.00         1
           S       0.64      0.74      0.68        19

    accuracy                           0.50        36
   macro avg       0.38      0.33      0.34        36
weighted avg       0.56      0.50      0.51        36

Clinical results:
	Spearman R = 0.4481
	Spearman p = 0.000473
