# Integrated Fundus Set (IRFundusSet) User Guide
How to use the IRFunduSet package and dataset 
- How to setup and generate your IRFundusSet
- How to use it in a ML/AI data pipeline 


In [None]:
from pathlib import Path 
import configparser
import pandas as pd
import matplotlib.pyplot as plt 

### Helpers  

In [None]:
def get_ini_file(fp):
    c = configparser.ConfigParser() 
    c.read_file(open(fp, 'r')) 
    return c 



# 1. Setup your local directory of source datasets
**Steps:**
1. Obtain the source datasets (cohorts) from their download links, and unzip them to your local directory 
2. Make a copy of the template `templet_set_cohorts.ini` file and update it with the local directories of the cohorts
3. Decide on the desired output image size width
4. Decide on the desired output location/directory for the unified `IRFundusSet`

# 2. Get dataset object 
Expected activity
- Parse `local directories` and index the images 
- Harmonize and generate consolidated `IRFundusSet` 
- Retrieve `Dataset` object in line with standard ML/AI data pipelines 

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from irfundusset import IRFundusSet 

## 2.1. Generate Only
Returns a status message and a list of cohorts included 

In [None]:
hstatus, hcollection = IRFundusSet(out_dir="../output_irfundus_set",
                        out_img_w_size=256,
                        in_cohorts_config="../cohorts.ini", 
                        harmonize_method=None,
                        generate_only=True ) 

## 2.2. Dataset Object
Returns a Dataset Object 

In [None]:
irf_dataset = IRFundusSet(out_dir="../output_irfundus_set",
                        out_img_w_size=256,
                        in_cohorts_config="../cohorts.ini", 
                        harmonize_method=None,
                        generate_only=False,
                        target_col=None,        #can opt for source condition labels
                        xtransform=None, 
                        ytransform=None,)

# 3. Sample records in the dataset object 
Returns a data observation dict with keys []

In [None]:
def pretty_format(v):
    return (v.shape if isinstance(v, (np.ndarray, )) \
                else str(v)[35:] if (':\\' in str(v)) \
                else v)

In [None]:
idx = np.random.randint(len(irf_dataset)) 
sample_record = irf_dataset[idx] 
print("Record at index ", idx )
_ = [print(f">> {k:30s}:\t", pretty_format(v) ) for k,v in sample_record.items() ]
plt.imshow(sample_record['image'])