# Analysing PMRD Precursors for counts

Starting by loading the required libraries.

In [None]:
%pylab inline
import matplotlib_venn
import pandas
import scipy

Loading input table:

In [None]:
mirnas_counts = pandas.read_csv("pmrd_premirs_counts.tsv",
                                sep = "\t", 
                                header = 0)
mirnas_counts.head(10)

Calculate the number of miRNAs present on each sample.

We will consider that miRNA is present if it's norm count if >= than 1

In [None]:
# To correct this, columns will be renamed just to be sure that all is ok

mirnas_counts.columns = ["miRNA", "accession",
                         "FB", "FEF", "FH",
                         "MB", "MEF", "MH",
                         "TNB", "TNEF", "TNH",
                         "FB_norm", "FEF_norm", "FH_norm",
                         "MB_norm", "MEF_norm", "MH_norm",
                         "TNB_norm", "TNEF_norm", "TNH_norm"]

samples_list = ["FB", "FEF", "FH", "MB", "MEF", "MH", "TNB", "TNEF", "TNH"]

miRNAs_actives = dict()
for sample in samples_list:
    sample_miRNAs_present = sum(mirnas_counts[sample + "_norm"] >= 10) # Changes from 1 to 10 on 2019.02.01
    miRNAs_actives[sample] = sample_miRNAs_present

print(miRNAs_actives)

In [None]:
matplotlib.pyplot.bar(miRNAs_actives.keys(),
                      miRNAs_actives.values(),
                      color = ["#003300", "#003300", "#003300",
                               "#336600", "#336600", "#336600",
                               "#666633", "#666633", "#666633"])

## Lets check which miRNAs are differently present/absent

### Approach by flower type

In [None]:
mirna_list = dict()
for sample in samples_list:
    mirna_list[sample] = set(mirnas_counts.loc[mirnas_counts[sample + "_norm"] >= 10]["miRNA"]) # Changes from 1 to 10 on 2019.02.01

# print(mirna_list)

venn_female = matplotlib_venn.venn3_unweighted([mirna_list["FB"], mirna_list["FEF"], mirna_list["FH"]],
                                               set_labels = ("FB", "FEF", "FH")
                                              )

#savefig('pmrd_madure_counts_veen_females.png')

relevant_miRNAs_female = list()

print("Exclussivos de FB:")
miRNA_list_FB = mirna_list["FB"].difference(mirna_list["FEF"], mirna_list["FH"])
relevant_miRNAs_female.extend(miRNA_list_FB)
print(sorted(miRNA_list_FB))

print("Exclussivos de FEF:")
miRNA_list_FEF = mirna_list["FEF"].difference(mirna_list["FB"], mirna_list["FH"])
relevant_miRNAs_female.extend(miRNA_list_FEF)
print(sorted(miRNA_list_FEF))

print("Exclussivos de FH:")
miRNA_list_FH = mirna_list["FH"].difference(mirna_list["FB"], mirna_list["FEF"])
relevant_miRNAs_female.extend(miRNA_list_FH)
print(sorted(miRNA_list_FH))

print("Presentes apenas em FB + FEF:")
miRNA_list_FB_FEF = mirna_list["FB"].intersection(mirna_list["FEF"]).difference(mirna_list["FH"])
relevant_miRNAs_female.extend(miRNA_list_FB_FEF)
print(sorted(miRNA_list_FB_FEF))

print("Presentes apenas em FB + FH:")
miRNA_list_FB_FH = mirna_list["FB"].intersection(mirna_list["FH"]).difference(mirna_list["FEF"])
relevant_miRNAs_female.extend(miRNA_list_FB_FH)
print(sorted(miRNA_list_FB_FH))

print("Presentes apenas em FEF + FH:")
miRNA_list_FEF_FH = mirna_list["FEF"].intersection(mirna_list["FH"]).difference(mirna_list["FB"])
relevant_miRNAs_female.extend(miRNA_list_FEF_FH)
print(sorted(miRNA_list_FEF_FH))

print("Lista de miRNAs com presença diferencial:")
relevant_miRNAs_female = sorted(set(relevant_miRNAs_female))
print(relevant_miRNAs_female)

In [None]:
venn_male = matplotlib_venn.venn3_unweighted([mirna_list["MB"], mirna_list["MEF"], mirna_list["MH"]],
                                             set_labels = ("MB", "MEF", "MH")
                                            )

#savefig('pmrd_madure_counts_veen_males.png')

relevant_miRNAs_male = list()

print("Exclussivos de MB:")
miRNA_list_MB = mirna_list["MB"].difference(mirna_list["MEF"], mirna_list["MH"])
relevant_miRNAs_male.extend(miRNA_list_MB)
print(sorted(miRNA_list_MB))

print("Exclussivos de MEF:")
miRNA_list_MEF = mirna_list["MEF"].difference(mirna_list["MB"], mirna_list["MH"])
relevant_miRNAs_male.extend(miRNA_list_MEF)
print(sorted(miRNA_list_MEF))

print("Exclussivos de MH:")
miRNA_list_MH = mirna_list["MH"].difference(mirna_list["MB"], mirna_list["MEF"])
relevant_miRNAs_male.extend(miRNA_list_MH)
print(sorted(miRNA_list_MH))

print("Presentes apenas em MB + MEF:")
miRNA_list_MB_MEF = mirna_list["MB"].intersection(mirna_list["MEF"]).difference(mirna_list["MH"])
relevant_miRNAs_male.extend(miRNA_list_MB_MEF)
print(sorted(miRNA_list_MB_MEF))

print("Presentes apenas em MB + MH:")
miRNA_list_MB_MH = mirna_list["MB"].intersection(mirna_list["MH"]).difference(mirna_list["MEF"])
relevant_miRNAs_male.extend(miRNA_list_MB_MH)
print(sorted(miRNA_list_MB_MH))

print("Presentes apenas em MEF + MH:")
miRNA_list_MEF_MH = mirna_list["MEF"].intersection(mirna_list["MH"]).difference(mirna_list["MB"])
relevant_miRNAs_male.extend(miRNA_list_MEF_MH)
print(sorted(miRNA_list_MEF_MH))

print("Lista de miRNAs com presença diferencial:")
relevant_miRNAs_male = sorted(set(relevant_miRNAs_male))
print(relevant_miRNAs_male)

In [None]:
venn_hermaphrodite = matplotlib_venn.venn3_unweighted([mirna_list["TNB"], mirna_list["TNEF"], mirna_list["TNH"]],
                                                      set_labels = ("TNB", "TNEF", "TNH")
                                                     )

#savefig('pmrd_madure_counts_veen_hermaphrodites.png')

relevant_miRNAs_hermaphrodite = list()

print("Exclussivos de TNB:")
miRNA_list_TNB = mirna_list["TNB"].difference(mirna_list["TNEF"], mirna_list["TNH"])
relevant_miRNAs_hermaphrodite.extend(miRNA_list_TNB)
print(sorted(miRNA_list_TNB))

print("Exclussivos de TNEF:")
miRNA_list_TNEF = mirna_list["TNEF"].difference(mirna_list["TNB"], mirna_list["TNH"])
relevant_miRNAs_hermaphrodite.extend(miRNA_list_TNEF)
print(sorted(miRNA_list_TNEF))

print("Exclussivos de TNH:")
miRNA_list_TNH = mirna_list["TNH"].difference(mirna_list["TNB"], mirna_list["TNEF"])
relevant_miRNAs_hermaphrodite.extend(miRNA_list_TNH)
print(sorted(miRNA_list_TNH))

print("Presentes apenas em TNB + TNEF:")
miRNA_list_TNB_TNEF = mirna_list["TNB"].intersection(mirna_list["TNEF"]).difference(mirna_list["TNH"])
relevant_miRNAs_hermaphrodite.extend(miRNA_list_TNB_TNEF)
print(sorted(miRNA_list_TNB_TNEF))

print("Presentes apenas em TNB + TNH:")
miRNA_list_TNB_TNH = mirna_list["TNB"].intersection(mirna_list["TNH"]).difference(mirna_list["TNEF"])
relevant_miRNAs_hermaphrodite.extend(miRNA_list_TNB_TNH)
print(sorted(miRNA_list_TNB_TNH))

print("Presentes apenas em TNEF + TNH:")
miRNA_list_TNEF_TNH = mirna_list["TNEF"].intersection(mirna_list["TNH"]).difference(mirna_list["TNB"])
relevant_miRNAs_hermaphrodite.extend(miRNA_list_TNEF_TNH)
print(sorted(miRNA_list_TNEF_TNH))

print("Lista de miRNAs com presença diferencial:")
relevant_miRNAs_hermaphrodite = sorted(set(relevant_miRNAs_hermaphrodite))
print(relevant_miRNAs_hermaphrodite)

In [None]:
relevant_miRNAs_by_flower_type = sorted(set(relevant_miRNAs_female
                                            + relevant_miRNAs_male
                                            + relevant_miRNAs_hermaphrodite))

print("Lista de miRNAs com presença diferencial em pelo menos um dos tipos de flor ({}):".format(len(relevant_miRNAs_by_flower_type)))
print(relevant_miRNAs_by_flower_type)

for miRNA in relevant_miRNAs_by_flower_type:
    # Colect values
    miRNA_norm_counts = list()
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FH_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MH_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNH_norm"]))
    
    # Plot
    dataplot = pandas.Series(miRNA_norm_counts,
                             index = samples_list)
    
    dataplot.plot(kind = "bar",
                  title = "Frequence of " + miRNA,
                  color = ["#003300", "#003300", "#003300",
                           "#336600", "#336600", "#336600",
                           "#666633", "#666633", "#666633"])
    
    threshold = pandas.Series([10, 10, 10, 10, 10, 10, 10, 10, 10],
                              index = samples_list)
    
    threshold.plot(kind = "line",
                   color = ["#660000"])
    
    plt.xlabel("Sample")
    plt.ylabel("Normalized counts")
    plt.show()

### Approach by developmental stage

In [None]:
venn_stage_b = matplotlib_venn.venn3_unweighted([mirna_list["FB"], mirna_list["MB"], mirna_list["TNB"]],
                                                set_labels = ("FB", "MB", "TNB")
                                               )

relevant_miRNAs_b = list()

print("Exclussivos de FB:")
miRNA_list_FB = mirna_list["FB"].difference(mirna_list["MB"], mirna_list["TNB"])
relevant_miRNAs_b.extend(miRNA_list_FB)
print(sorted(miRNA_list_FB))

print("Exclussivos de MB:")
miRNA_list_MB = mirna_list["MB"].difference(mirna_list["FB"], mirna_list["TNB"])
relevant_miRNAs_b.extend(miRNA_list_MB)
print(sorted(miRNA_list_MB))

print("Exclussivos de TNB:")
miRNA_list_TNB = mirna_list["TNB"].difference(mirna_list["FB"], mirna_list["MB"])
relevant_miRNAs_b.extend(miRNA_list_TNB)
print(sorted(miRNA_list_TNB))

print("Presntes apenas em FB + MB:")
miRNA_list_FB_MB = mirna_list["FB"].intersection(mirna_list["MB"]).difference(mirna_list["TNB"])
relevant_miRNAs_b.extend(miRNA_list_FB_MB)
print(sorted(miRNA_list_FB_MB))

print("Presntes apenas em FB + TNB:")
miRNA_list_FB_TNB = mirna_list["FB"].intersection(mirna_list["TNB"]).difference(mirna_list["MB"])
relevant_miRNAs_b.extend(miRNA_list_FB_TNB)
print(sorted(miRNA_list_FB_TNB))

print("Presntes apenas em MB + TNB:")
miRNA_list_MB_TNB = mirna_list["MB"].intersection(mirna_list["TNB"]).difference(mirna_list["FB"])
relevant_miRNAs_b.extend(miRNA_list_MB_TNB)
print(sorted(miRNA_list_MB_TNB))

print("Lista de miRNAs com presença diferencial:")
relevant_miRNAs_b = sorted(set(relevant_miRNAs_b))
print(relevant_miRNAs_b)

In [None]:
venn_stage_ef = matplotlib_venn.venn3_unweighted([mirna_list["FEF"], mirna_list["MEF"], mirna_list["TNEF"]],
                                                 set_labels = ("FEF", "MEF", "TNEF")
                                                )

relevant_miRNAs_ef = list()

print("Exclussivos de FEF:")
miRNA_list_FEF = mirna_list["FEF"].difference(mirna_list["MEF"], mirna_list["TNEF"])
relevant_miRNAs_ef.extend(miRNA_list_FEF)
print(sorted(miRNA_list_FEF))

print("Exclussivos de MEF:")
miRNA_list_MEF = mirna_list["MEF"].difference(mirna_list["FEF"], mirna_list["TNEF"])
relevant_miRNAs_ef.extend(miRNA_list_MEF)
print(sorted(miRNA_list_MEF))

print("Exclussivos de TNEF:")
miRNA_list_TNEF = mirna_list["TNEF"].difference(mirna_list["FEF"], mirna_list["MEF"])
relevant_miRNAs_ef.extend(miRNA_list_TNEF)
print(sorted(miRNA_list_TNEF))

print("Presentes apenas em FEF + MEF:")
miRNA_list_FEF_MEF = mirna_list["FEF"].intersection(mirna_list["MEF"]).difference(mirna_list["TNEF"])
relevant_miRNAs_ef.extend(miRNA_list_FEF_MEF)
print(sorted(miRNA_list_FEF_MEF))

print("Presentes apenas em FEF + TNEF:")
miRNA_list_FEF_TNEF = mirna_list["FEF"].intersection(mirna_list["TNEF"]).difference(mirna_list["MEF"])
relevant_miRNAs_ef.extend(miRNA_list_FEF_TNEF)
print(sorted(miRNA_list_FEF_TNEF))

print("Presentes apenas em MEF + TNEF:")
miRNA_list_MEF_TNEF = mirna_list["MEF"].intersection(mirna_list["TNEF"]).difference(mirna_list["FEF"])
relevant_miRNAs_ef.extend(miRNA_list_MEF_TNEF)
print(sorted(miRNA_list_MEF_TNEF))

print("Lista de miRNAs com presença diferencial:")
relevant_miRNAs_ef = sorted(set(relevant_miRNAs_ef))
print(relevant_miRNAs_ef)

In [None]:
venn_stage_h = matplotlib_venn.venn3_unweighted([mirna_list["FH"], mirna_list["MH"], mirna_list["TNH"]],
                                                set_labels = ("FH", "MH", "TNH")
                                               )

relevant_miRNAs_h = list()

print("Exclussivos de FH:")
miRNA_list_FH = mirna_list["FH"].difference(mirna_list["MH"], mirna_list["TNH"])
relevant_miRNAs_h.extend(miRNA_list_FH)
print(sorted(miRNA_list_FH))

print("Exclussivos de MH:")
miRNA_list_MH = mirna_list["MH"].difference(mirna_list["FH"], mirna_list["TNH"])
relevant_miRNAs_h.extend(miRNA_list_MH)
print(sorted(miRNA_list_MH))

print("Exclussivos de TNH:")
miRNA_list_TNH = mirna_list["TNH"].difference(mirna_list["FH"], mirna_list["MH"])
relevant_miRNAs_h.extend(miRNA_list_TNH)
print(sorted(miRNA_list_TNH))

print("Presentes apenas em FH + MH:")
miRNA_list_FH_MH = mirna_list["FH"].intersection(mirna_list["MH"]).difference(mirna_list["TNH"])
relevant_miRNAs_h.extend(miRNA_list_FH_MH)
print(sorted(miRNA_list_FH_MH))

print("Presentes apenas em FH + TNH:")
miRNA_list_FH_TNH = mirna_list["FH"].intersection(mirna_list["TNH"]).difference(mirna_list["MH"])
relevant_miRNAs_h.extend(miRNA_list_FH_TNH)
print(sorted(miRNA_list_FH_TNH))

print("Presentes apenas em MH + TNH:")
miRNA_list_MH_TNH = mirna_list["MH"].intersection(mirna_list["TNH"]).difference(mirna_list["FH"])
relevant_miRNAs_h.extend(miRNA_list_MH_TNH)
print(sorted(miRNA_list_MH_TNH))

print("Lista de miRNAs com presença diferencial:")
relevant_miRNAs_h = sorted(set(relevant_miRNAs_h))
print(relevant_miRNAs_h)

In [None]:
relevant_miRNAs_by_developmental_stage = sorted(set(relevant_miRNAs_b
                                                    + relevant_miRNAs_ef
                                                    + relevant_miRNAs_h))

print("Lista de miRNAs com presença diferencial em pelo menos um dos estágios de desenvolvimento ({}):".format(len(relevant_miRNAs_by_developmental_stage)))
print(relevant_miRNAs_by_developmental_stage)

for miRNA in relevant_miRNAs_by_developmental_stage:
    # Colect values
    miRNA_norm_counts = list()
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FH_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MH_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNH_norm"]))
    
    # Plot
    dataplot = pandas.Series(miRNA_norm_counts,
                             index = samples_list)
    
    dataplot.plot(kind = "bar",
                  title = "Frequence of " + miRNA,
                  color = ["#003300", "#003300", "#003300",
                           "#336600", "#336600", "#336600",
                           "#666633", "#666633", "#666633"])
    
    threshold = pandas.Series([10, 10, 10, 10, 10, 10, 10, 10, 10],
                              index = samples_list)
    
    threshold.plot(kind = "line",
                   color = ["#660000"])
    
    plt.xlabel("Sample")
    plt.ylabel("Normalized counts")
    plt.show()

### Lista de miRNAs relevantes independentemente de onde vêm

In [None]:
relevant_miRNAs_all = sorted(set(relevant_miRNAs_by_developmental_stage
                                 + relevant_miRNAs_by_flower_type))

print("Lista de miRNAs com presença diferencial geral ({}):".format(len(relevant_miRNAs_all)))
print(relevant_miRNAs_all)

for miRNA in relevant_miRNAs_all:
    # Colect values
    miRNA_norm_counts = list()
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FH_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MH_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNH_norm"]))
    
    # Plot
    dataplot = pandas.Series(miRNA_norm_counts,
                             index = samples_list)
    
    dataplot.plot(kind = "bar",
                  title = "Frequence of " + miRNA,
                  color = ["#003300", "#003300", "#003300",
                           "#336600", "#336600", "#336600",
                           "#666633", "#666633", "#666633"])
    
    threshold = pandas.Series([10, 10, 10, 10, 10, 10, 10, 10, 10],
                              index = samples_list)
    
    threshold.plot(kind = "line",
                   color = ["#660000"])
    
    plt.xlabel("Sample")
    plt.ylabel("Normalized counts")
    plt.show()

In [None]:
relevant_miRNAs_values = list()

for miRNA in relevant_miRNAs_all:
    # Colect values
    miRNA_norm_counts = list()
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FH_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MH_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNH_norm"]))
    
    relevant_miRNAs_values.append(miRNA_norm_counts)

plt.figure(figsize = (5, 15))
plt.pcolor(relevant_miRNAs_values)
plt.yticks(np.arange(0.5, len(relevant_miRNAs_all), 1), relevant_miRNAs_all)
plt.xticks(numpy.arange(0.5, len(samples_list), 1), labels = samples_list)
colorbar()
plt.show()



In [None]:
relevant_miRNAs = pandas.DataFrame.from_records(relevant_miRNAs_values,
                                                index = relevant_miRNAs_all,
                                                columns = samples_list)
relevant_miRNAs

In [None]:
# This list comes from differential expressed genes
differential_expressed = ['vvi-MIR156e', 'vvi-MIR156f', 'vvi-MIR156g', 'vvi-MIR156i', 'vvi-MIR160c', 'vvi-MIR160d', 'vvi-MIR167d', 'vvi-MIR169a', 'vvi-MIR169c', 'vvi-MIR169e', 'vvi-MIR169g', 'vvi-MIR169j', 'vvi-MIR169k', 'vvi-MIR169n', 'vvi-MIR169s', 'vvi-MIR169u', 'vvi-MIR171e', 'vvi-MIR172a', 'vvi-MIR172b', 'vvi-MIR2111', 'vvi-MIR3624', 'vvi-MIR3627', 'vvi-MIR3631c', 'vvi-MIR3634', 'vvi-MIR393a', 'vvi-MIR395a', 'vvi-MIR395b', 'vvi-MIR395c', 'vvi-MIR395d', 'vvi-MIR395e', 'vvi-MIR395f', 'vvi-MIR395g', 'vvi-MIR395h', 'vvi-MIR395i', 'vvi-MIR395j', 'vvi-MIR395k', 'vvi-MIR395l', 'vvi-MIR395m', 'vvi-MIR395n', 'vvi-MIR396b', 'vvi-MIR396c', 'vvi-MIR396d', 'vvi-MIR398a', 'vvi-MIR399a', 'vvi-MIR399b', 'vvi-MIR399c', 'vvi-MIR399h', 'vvi-MIR477', 'vvi-MIR845c', 'vvi-MIR845d', 'vvi-MIR845e']

# List miRNAs found by both strategies
mirnas_both = sorted(set(relevant_miRNAs.index).intersection(differential_expressed))

print("There are {} miRNAs indentified on both methods.".format(len(mirnas_both)))

mirnas_both_values = list()

for miRNA in mirnas_both:
    # Colect values
    miRNA_norm_counts = list()
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FH_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MH_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNH_norm"]))
    
    mirnas_both_values.append(miRNA_norm_counts)
    
mirnas_both_expression = pandas.DataFrame.from_records(mirnas_both_values,
                                                       index = mirnas_both,
                                                       columns = samples_list)
mirnas_both_expression

In [None]:
# List miRNAs found only by presence/absence
mirnas_only_counts = sorted(set(relevant_miRNAs.index).difference(differential_expressed))

print("There are {} miRNAs indentified only on presence/absence.".format(len(mirnas_only_counts)))

mirnas_only_counts_values = list()

for miRNA in mirnas_only_counts:
    # Colect values
    miRNA_norm_counts = list()
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FH_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MH_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNH_norm"]))
    
    mirnas_only_counts_values.append(miRNA_norm_counts)
    
mirnas_only_counts_expression = pandas.DataFrame.from_records(mirnas_only_counts_values,
                                                       index = mirnas_only_counts,
                                                       columns = samples_list)
mirnas_only_counts_expression

In [None]:
# List miRNAs found only by differential expression
mirnas_only_differential_expressed = sorted(set(differential_expressed).difference(relevant_miRNAs.index))

print("There are {} miRNAs indentified only on differential expression.".format(len(mirnas_only_differential_expressed)))

mirnas_only_differential_expressed_values = list()

for miRNA in mirnas_only_differential_expressed:
    # Colect values
    miRNA_norm_counts = list()
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["FH_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["MH_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNB_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNEF_norm"]))
    miRNA_norm_counts.extend(set(mirnas_counts[mirnas_counts["miRNA"] == miRNA]["TNH_norm"]))
    
    mirnas_only_differential_expressed_values.append(miRNA_norm_counts)
    
mirnas_only_differential_expressed_expression = pandas.DataFrame.from_records(mirnas_only_differential_expressed_values,
                                                       index = mirnas_only_differential_expressed,
                                                       columns = samples_list)
mirnas_only_differential_expressed_expression