# miRNA-Seq

## Most relevant miRNAs based on families

### Load required modules

In [None]:
%matplotlib inline
import pandas
import matplotlib.pyplot as plt
import matplotlib_venn as plt_venn

### Load the input file

In [None]:
mirbase_hairpins = pandas.read_csv('../../../2019_only_valid_mirnas/mirbase_hairpins_counts_pass.tsv',
                                   sep = '\t',
                                   index_col = 0
                                  )

mirbase_hairpins.head()

### Compute families

In [None]:
def set_family(mirna):
    if mirna[-1].isalpha():
        return mirna[:-1]
    
    return mirna

In [None]:
mirbase_hairpins['miRNA'].tolist()
mirbase_hairpins['family'] = [set_family(mirna) for mirna in mirbase_hairpins['miRNA'].tolist()]
mirbase_hairpins.head()

In [None]:
mirbase_hairpins_group = mirbase_hairpins.groupby(by = 'family').sum()
mirbase_hairpins_group.columns = ['FB', 'FEF', 'FH', 'MB', 'MEF', 'MH', 'TNB', 'TNEF', 'TNH', 'FB_norm', 'FEF_norm', 'FH_norm', 'MB_norm', 'MEF_norm', 'MH_norm', 'TNB_norm', 'TNEF_norm', 'TNH_norm']
mirbase_hairpins_group.head()

### Save the results into a new CSV file

In [None]:
mirbase_hairpins_group.to_csv('mirnas_by_family.csv',
                              sep = '\t'
                             )

### By flower type

In this segment, I will evaluate which miRNAs are flower type dependent.

#### Allways present miRNAS

In [None]:
# Prepare present filters
fb_active = mirbase_hairpins_group['FB_norm'] > 10
fef_active = mirbase_hairpins_group['FEF_norm'] > 10
fh_active = mirbase_hairpins_group['FH_norm'] > 10

mb_active = mirbase_hairpins_group['MB_norm'] > 10
mef_active = mirbase_hairpins_group['MEF_norm'] > 10
mh_active = mirbase_hairpins_group['MH_norm'] > 10

tnb_active = mirbase_hairpins_group['TNB_norm'] > 10
tnef_active = mirbase_hairpins_group['TNEF_norm'] > 10
tnh_active = mirbase_hairpins_group['TNH_norm'] > 10

In [None]:
families_allways_present = mirbase_hairpins_group[fb_active & fef_active & fh_active & mb_active & mef_active & mh_active & tnb_active & tnef_active & tnh_active].index.tolist()
print(f'There are {len(families_allways_present)} families allways present')
families_allways_present

#### Present in *V. v. sylvestris* but not in *V. v. vinifera*

In [None]:
# Prepare absence filters
fb_inactive = mirbase_hairpins_group['FB_norm'] < 10
fef_inactive = mirbase_hairpins_group['FEF_norm'] < 10
fh_inactive = mirbase_hairpins_group['FH_norm'] < 10

mb_inactive = mirbase_hairpins_group['MB_norm'] < 10
mef_inactive = mirbase_hairpins_group['MEF_norm'] < 10
mh_inactive = mirbase_hairpins_group['MH_norm'] < 10

tnb_inactive = mirbase_hairpins_group['TNB_norm'] < 10
tnef_inactive = mirbase_hairpins_group['TNEF_norm'] < 10
tnh_inactive = mirbase_hairpins_group['TNH_norm'] < 10

In [None]:
families_present_only_sylvestris = mirbase_hairpins_group[fb_active & fef_active & fh_active & mb_active & mef_active & mh_active & tnb_inactive & tnef_inactive & tnh_inactive].index.tolist()
print(f'There are {len(families_present_only_sylvestris)} families present in V. v. sylvestris but not in V. v. vinifera')
families_present_only_sylvestris

#### Not present in *V. v. sylvestris* but present in *V. v. vinifera*

In [None]:
families_present_only_vinifera = mirbase_hairpins_group[fb_inactive & fef_inactive & fh_inactive & mb_inactive & mef_inactive & mh_inactive & tnb_active & tnef_active & tnh_active].index.tolist()
print(f'There are {len(families_present_only_vinifera)} families present in V. v. vinifera but not in V. v. sylvestris')
families_present_only_vinifera

#### By flower type

In [None]:
present_in_females = mirbase_hairpins_group[fb_active & fef_active & fh_active].index.tolist()
print(f'There are {len(present_in_females)} families present Female')

present_in_males = mirbase_hairpins_group[mb_active & mef_active & mh_active].index.tolist()
print(f'There are {len(present_in_males)} families present Males')

present_in_hermaphrodites = mirbase_hairpins_group[tnb_active & tnef_active & tnh_active].index.tolist()
print(f'There are {len(present_in_hermaphrodites)} families present Hermaphrodites')

print(f'{mirbase_hairpins_group.shape[0]} families tested')


In [None]:
plt_venn.venn3_unweighted([set(present_in_females),
                           set(present_in_males),
                           set(present_in_hermaphrodites)],
                          set_labels = ('Female', 'Male', 'Hermaphrodite')
                         )

In [None]:
[mirna for mirna in present_in_females if mirna not in present_in_males]

vvi-MIR172 is present in all female samples, but absent in at least one male and one hermaphrodite sample.

In [None]:
[mirna for mirna in present_in_males if mirna not in present_in_females]

vvi-MIR828 is present in all male samples, but absent in at least one female and one hermaphrodite sample.

#### By developmental stage

In [None]:
present_in_stage_b = mirbase_hairpins_group[fb_active & mb_active & tnb_active].index.tolist()
print(f'There are {len(present_in_stage_b)} families present in stage B')

present_in_stage_ef = mirbase_hairpins_group[fef_active & mef_active & tnef_active].index.tolist()
print(f'There are {len(present_in_stage_ef)} families present in stage EF')

present_in_stage_h = mirbase_hairpins_group[fh_active & mh_active & tnh_active].index.tolist()
print(f'There are {len(present_in_stage_h)} families present in stage H')

print(f'{mirbase_hairpins_group.shape[0]} families tested')

In [None]:
plt_venn.venn3_unweighted([set(present_in_stage_b),
                           set(present_in_stage_ef),
                           set(present_in_stage_h)],
                          set_labels = ('B', 'E/F', 'H')
                         )

In [None]:
[mirna for mirna in present_in_stage_ef if mirna not in present_in_stage_b and mirna not in present_in_stage_h]

In [None]:
[mirna for mirna in present_in_stage_ef if mirna in present_in_stage_b and mirna not in present_in_stage_h]

In [None]:
[mirna for mirna in present_in_stage_ef if mirna not in present_in_stage_b and mirna in present_in_stage_h]

In [None]:
[mirna for mirna in present_in_stage_h if mirna not in present_in_stage_b and mirna not in present_in_stage_ef]

#### Combine both (flower type and developmental stage)

##### Distinguish by flower type approach

Order is B, E/F, H

In [None]:
# Get lists
fb_active = mirbase_hairpins_group[mirbase_hairpins_group['FB_norm'] > 10].index.tolist()
fef_active = mirbase_hairpins_group[mirbase_hairpins_group['FEF_norm'] > 10].index.tolist()
fh_active = mirbase_hairpins_group[mirbase_hairpins_group['FH_norm'] > 10].index.tolist()

mb_active = mirbase_hairpins_group[mirbase_hairpins_group['MB_norm'] > 10].index.tolist()
mef_active = mirbase_hairpins_group[mirbase_hairpins_group['MEF_norm'] > 10].index.tolist()
mh_active = mirbase_hairpins_group[mirbase_hairpins_group['MH_norm'] > 10].index.tolist()

tnb_active = mirbase_hairpins_group[mirbase_hairpins_group['TNB_norm'] > 10].index.tolist()
tnef_active = mirbase_hairpins_group[mirbase_hairpins_group['TNEF_norm'] > 10].index.tolist()
tnh_active = mirbase_hairpins_group[mirbase_hairpins_group['TNH_norm'] > 10].index.tolist()

In [None]:
plt_venn.venn3_unweighted([set(fb_active),
                           set(mb_active),
                           set(tnb_active)],
                          set_labels = ('Female', 'Male', 'Hermaphrodite')
                         )

In [None]:
[mirna for mirna in mb_active if mirna not in fb_active and mirna in tnb_active]

In [None]:
[mirna for mirna in tnb_active if mirna not in fb_active and mirna not in mb_active]

In [None]:
plt_venn.venn3_unweighted([set(fef_active),
                           set(mef_active),
                           set(tnef_active)],
                          set_labels = ('Female', 'Male', 'Hermaphrodite')
                         )

In [None]:
plt_venn.venn3_unweighted([set(fh_active),
                           set(mh_active),
                           set(tnh_active)],
                          set_labels = ('Female', 'Male', 'Hermaphrodite')
                         )

In [None]:
[mirna for mirna in fh_active if mirna not in mh_active and mirna not in tnh_active]

In [None]:
[mirna for mirna in tnh_active if mirna not in mh_active and mirna not in fh_active]

##### Distinguish by developmental stage approach

Order is Female, Male, Hermaphrodite

In [None]:
plt_venn.venn3_unweighted([set(fb_active),
                           set(fef_active),
                           set(fh_active)],
                          set_labels = ('B', 'E/F', 'H')
                         )

In [None]:
[mirna for mirna in fef_active if mirna not in fb_active and mirna not in fh_active]

In [None]:
[mirna for mirna in fef_active if mirna not in fb_active and mirna in fh_active]

In [None]:
[mirna for mirna in fh_active if mirna not in fb_active and mirna not in fef_active]

In [None]:
plt_venn.venn3_unweighted([set(mb_active),
                           set(mef_active),
                           set(mh_active)],
                          set_labels = ('B', 'E/F', 'H')
                         )

In [None]:
[mirna for mirna in mb_active if mirna in mef_active and mirna not in mh_active]

In [None]:
[mirna for mirna in mef_active if mirna not in mb_active and mirna not in mh_active]

In [None]:
[mirna for mirna in mh_active if mirna not in mb_active and mirna not in mef_active]

In [None]:
plt_venn.venn3_unweighted([set(tnb_active),
                           set(tnef_active),
                           set(tnh_active)],
                          set_labels = ('B', 'E/F', 'H')
                         )

In [None]:
[mirna for mirna in tnb_active if mirna in tnef_active and mirna not in tnh_active]

In [None]:
[mirna for mirna in tnh_active if mirna not in tnb_active and mirna not in tnef_active]

In [None]:
[mirna for mirna in tnb_active if mirna in tnh_active and mirna not in tnef_active]

### Venn by flower type, with all developmental stages added as "or"

In [None]:
fb_active_condition = mirbase_hairpins_group['FB_norm'] > 10
fef_active_condition = mirbase_hairpins_group['FEF_norm'] > 10
fh_active_condition = mirbase_hairpins_group['FH_norm'] > 10

mb_active_condition = mirbase_hairpins_group['MB_norm'] > 10
mef_active_condition = mirbase_hairpins_group['MEF_norm'] > 10
mh_active_condition = mirbase_hairpins_group['MH_norm'] > 10

tnb_active_condition = mirbase_hairpins_group['TNB_norm'] > 10
tnef_active_condition = mirbase_hairpins_group['TNEF_norm'] > 10
tnh_active_condition = mirbase_hairpins_group['TNH_norm'] > 10

present_in_females = mirbase_hairpins_group[fb_active_condition | fef_active_condition | fh_active_condition].index.tolist()
print(f'There are {len(present_in_females)} families present Female')

present_in_males = mirbase_hairpins_group[mb_active_condition | mef_active_condition | mh_active_condition].index.tolist()
print(f'There are {len(present_in_males)} families present Males')

present_in_hermaphrodites = mirbase_hairpins_group[tnb_active_condition | tnef_active_condition | tnh_active_condition].index.tolist()
print(f'There are {len(present_in_hermaphrodites)} families present Hermaphrodites')

print(f'{mirbase_hairpins_group.shape[0]} families in the file')

In [None]:
plt_venn.venn3_unweighted([set(present_in_females),
                           set(present_in_males),
                           set(present_in_hermaphrodites)],
                          set_labels = ('Female', 'Male', 'Hermaphrodite')
                         )

In [None]:
[mirna for mirna in present_in_hermaphrodites if mirna not in present_in_males and mirna not in present_in_females]

### Venn by developmental stage, with all flower types added as "or"

In [None]:
present_in_b = mirbase_hairpins_group[fb_active_condition | mb_active_condition | tnb_active_condition].index.tolist()
print(f'There are {len(present_in_b)} families present in stage B')

present_in_ef = mirbase_hairpins_group[fef_active_condition | mef_active_condition | tnef_active_condition].index.tolist()
print(f'There are {len(present_in_ef)} families present in stage E/F')

present_in_h = mirbase_hairpins_group[fh_active_condition | mh_active_condition | tnh_active_condition].index.tolist()
print(f'There are {len(present_in_h)} families present in stage H')

In [None]:
plt_venn.venn3_unweighted([set(present_in_b),
                           set(present_in_ef),
                           set(present_in_h)],
                          set_labels = ('B', 'E/F', 'H')
                         )

In [None]:
[mirna for mirna in present_in_b if mirna in present_in_ef and mirna not in present_in_h]

In [None]:
[mirna for mirna in present_in_b if mirna in present_in_h and mirna not in present_in_ef]

In [None]:
[mirna for mirna in present_in_h if mirna not in present_in_b and mirna not in present_in_ef]

### Create a table with only the most relevant miRNAs

In [None]:
relevant_mirnas = ['vvi-MIR172',
                   'vvi-MIR477',
                   'vvi-MIR828',
                   'vvi-MIR845',
                   'vvi-MIR2111'
                  ]

In [None]:
mirbase_hairpins_group.loc[relevant_mirnas]

In [None]:
mirbase_hairpins_group.loc[relevant_mirnas].to_csv('mirnas_by_family_relevant_only.csv',
                                                   sep = '\t'
                                                  )