In [1]:
import FACSPy as fp

In [2]:
### to create metadata, a table has to be provided
import pandas as pd

user_metadata = pd.read_csv("../Tutorials/spectral_dataset/metadata.csv", sep = ";")
user_metadata.head()

Unnamed: 0,sample_ID,file_name,group_fd,internal_id,organ,staining,diag_main,diag_fine,donor_id,material,batch
0,1,3742.fcs,healthy,3742,PB,stained,healthy,healthy,3742,PBMC,1
1,2,4337.fcs,healthy,4337,PB,stained,healthy,healthy,4337,PBMC,1
2,3,4449.fcs,healthy,4449,PB,stained,healthy,healthy,4449,PBMC,2
3,4,5143.fcs,healthy,5143,PB,stained,healthy,healthy,5143,PBMC,2
4,5,6042.fcs,healthy,6042,PB,stained,healthy,healthy,6042,PBMC,1


In [3]:
### There are two obligatory columns: sample_ID and file_name
### it is highly recommended to provide a column named "staining" as well to be able to automatically calculate the cofactors

metadata = fp.dt.Metadata(metadata = user_metadata)

In [4]:
### we created a Metadata object
metadata

Metadata(36 entries with factors ['group_fd', 'internal_id', 'organ', 'diag_main', 'diag_fine', 'donor_id', 'material', 'batch'])

In [5]:
### not providing sample_ID or file_name will result in an error
### uncomment the next line to provoke the error

# metadata = fp.dt.Metadata(metadata = user_metadata.drop("sample_ID", axis = 1))

In [6]:
### Metadata can also be read directly from the hard drive:
metadata = fp.dt.Metadata("../Tutorials/spectral_dataset/metadata.csv")

In [7]:
### we created a Metadata object
metadata

Metadata(36 entries with factors ['group_fd', 'internal_id', 'organ', 'diag_main', 'diag_fine', 'donor_id', 'material', 'batch'])

In [8]:
### the underlying data are stored as a pandas dataframe and are accessed by the .dataframe attribute or the .to_df() method
df = metadata.dataframe
df.head()

Unnamed: 0,sample_ID,file_name,group_fd,internal_id,organ,staining,diag_main,diag_fine,donor_id,material,batch
0,1,3742.fcs,healthy,3742,PB,stained,healthy,healthy,3742,PBMC,1
1,2,4337.fcs,healthy,4337,PB,stained,healthy,healthy,4337,PBMC,1
2,3,4449.fcs,healthy,4449,PB,stained,healthy,healthy,4449,PBMC,2
3,4,5143.fcs,healthy,5143,PB,stained,healthy,healthy,5143,PBMC,2
4,5,6042.fcs,healthy,6042,PB,stained,healthy,healthy,6042,PBMC,1


In [9]:
df = metadata.to_df()
df.head()

Unnamed: 0,sample_ID,file_name,group_fd,internal_id,organ,staining,diag_main,diag_fine,donor_id,material,batch
0,1,3742.fcs,healthy,3742,PB,stained,healthy,healthy,3742,PBMC,1
1,2,4337.fcs,healthy,4337,PB,stained,healthy,healthy,4337,PBMC,1
2,3,4449.fcs,healthy,4449,PB,stained,healthy,healthy,4449,PBMC,2
3,4,5143.fcs,healthy,5143,PB,stained,healthy,healthy,5143,PBMC,2
4,5,6042.fcs,healthy,6042,PB,stained,healthy,healthy,6042,PBMC,1


In [10]:
### metadata factors are entries that have been submitted by the user and are not "sample_ID", "file_name" or "staining"
### these factors can be extracted by the .get_factors() method
metadata.get_factors()

['group_fd',
 'internal_id',
 'organ',
 'diag_main',
 'diag_fine',
 'donor_id',
 'material',
 'batch']

In [11]:
### columns can be renamed
metadata.rename("batch", "newly_named_batch")
metadata.dataframe.columns

Index(['sample_ID', 'file_name', 'group_fd', 'internal_id', 'organ',
       'staining', 'diag_main', 'diag_fine', 'donor_id', 'material',
       'newly_named_batch'],
      dtype='object')

In [12]:
### metadata can be subset
metadata.subset("file_name", ["3742.fcs", "4337.fcs"])
metadata.to_df()

Unnamed: 0,sample_ID,file_name,group_fd,internal_id,organ,staining,diag_main,diag_fine,donor_id,material,newly_named_batch
0,1,3742.fcs,healthy,3742,PB,stained,healthy,healthy,3742,PBMC,1
1,2,4337.fcs,healthy,4337,PB,stained,healthy,healthy,4337,PBMC,1


In [13]:
### metadata can be annotated
metadata.annotate(["3742.fcs", "4337.fcs"], "new_col", "new_val")
metadata.dataframe.head()

  self.dataframe.loc[self.dataframe["file_name"].isin(file_names), column] = value


Unnamed: 0,sample_ID,file_name,group_fd,internal_id,organ,staining,diag_main,diag_fine,donor_id,material,newly_named_batch,new_col
0,1,3742.fcs,healthy,3742,PB,stained,healthy,healthy,3742,PBMC,1,new_val
1,2,4337.fcs,healthy,4337,PB,stained,healthy,healthy,4337,PBMC,1,new_val


In [14]:
### metadata can be written to the hard drive
metadata.write("../Tutorials/spectral_dataset/vignette_metadata.csv")