# Create PyWGCNA object

In [8]:
import PyWGCNA
import pandas as pd
import numpy as np
import random

In [None]:
# Read in matrix made from all 48 samples
TPM = pd.read_csv("/data/class/cosmos2023/PUBLIC/shai_hulud/data/mirna/actual_data/mirna_cpm_matrix.csv", index_col = 0)
TPM['gene_id'] = TPM.index.str.split('.').str[0]

# WGCNA requires gene id
TPM = TPM.set_index(TPM['gene_id'])

# df can only have numbers, no strings
TPM.drop(["gene_id"], axis = 1, inplace=True)

# samples in rows, genes in columns
TPM = TPM.T

In [None]:
TPM.head()

In [None]:
# Read in metadata file; only really care about certain columns for plotting purposes
metadata = pd.read_csv("/data/class/cosmos2023/PUBLIC/shai_hulud/data/mirna/actual_data/mirna_metadata.csv",
                      usecols=['File.accession', 'Biosample.term.name', 'Age'])

metadata['Age'] = metadata['Age'].str.replace('_', ' ') # no underscores

# set index to be run name (no underscores)
metadata.index = metadata['File.accession']

# Replace underscores with spaces in column names
metadata.drop(["File.accession"], axis = 1, inplace=True)

In [None]:
metadata.head()

# Make pyWGCNA object

In [None]:
mirna = PyWGCNA.WGCNA(name='mirna', 
                        species='Mouse', 
                        geneExp=TPM, 
                        save=True)

# Preprocess including removing outlier genes and samples

In [None]:
mirna.preprocess()

# Running PyWGCNA and finding modules

In [None]:
## Running PyWGCNA and finding modules
mirna.findModules()

In [None]:
mirna.updateSampleInfo(sampleInfo=metadata)
# add color for metadata
mirna.setMetadataColor('Biosample.term.name', 
                      {"C2C12": "blue", 
                       "gastrocnemius": "green",
                       "limb": "red",
                       "myotube" : "purple",
                       "skeletal muscle tissue" : "yellow"})
mirna.setMetadataColor('Age', 
                      {'0hr': '#E41A1C',
                       '72hr': '#8D4C6A',
                       'ED 10.5': '#377EB8',
                       'ED 11.5': '#419681',
                       'ED 12.5': '#4DAF4A',
                       'ED 13.5': '#727E76',
                       'ED 14.5': '#984EA3',
                       'ED 15.5': '#CB6651',
                       'PND 0': '#FF7F00',
                       'PND 04': '#FFBF19',
                       'PND 10': '#FFFF33',
                       'PND 14': '#D2AA2D',
                       'PND 25': '#A65628',
                       'PND 36': '#CE6B73',
                       'PNM 02': '#F781BF',
                       'PNM 08': '#C88DAC',
                       'PNM 18-20': '#999999'
                      })

geneList = PyWGCNA.getGeneList(dataset='mmusculus_gene_ensembl',
                              attributes=['ensembl_gene_id', 
                                           'external_gene_name', 
                                           'gene_biotype'])


# Analyzing results including plotting module-trait relationship heatmap, eigengene heatmap and barplot, and GO term analysis for each module

In [None]:
mirna.analyseWGCNA(geneList=geneList)

# save PyWGCNA object

In [None]:
mirna.saveWGCNA()

# Read PyWGCNA object

In [None]:
## Read PyWGCNA object
import sys
import PyWGCNA
mirna = PyWGCNA.readWGCNA("mirna.p")

# How to work with object

## Extract gene list for each module and save it as a csv file

In [None]:
# module you're interested in
module = "silver"
geneList = mirna.datExpr.var[mirna.datExpr.var.moduleColors == module]
geneList.head()
# save gene list as MODULE.csv near object file
geneList.to_csv(mirna.outputPath + '/' + module + '.csv')