# Generate the lncRNA based on log2Foldchange

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import numpy as np
from ipywidgets import widgets
from IPython.display import Markdown, display

def printmd(string):
    display(Markdown(string))

    # Create text widget for output
output_text = widgets.Text()

# Create text widget for input
input_text = widgets.Text()

# Define function to bind value of the input to the output variable 
def bind_input_to_output(sender):
    output_text.value = input_text.value

# Tell the text input widget to call bind_input_to_output() on submit
input_text.on_submit(bind_input_to_output)

In [5]:
# Display output text box widget (will populate when value submitted in input)
printmd("**Type the log2fold that you want to use?**")
output_text


**Type the log2fold that you want to use?**

Text(value='0.58')

In [6]:
new_variable = float(output_text.value)
printmd("**Cut-off:**")
print(new_variable)

#read files for foldchange criteria
path = r'L:\basic\divg\CEMM-Lexor\Leandro\TEAD\TEAD_lnRNA/*/known_gene/' 
all_files = glob.glob(path + "gb_change.txt")
list_of_dfs = [pd.read_csv(all_files, sep="\t").drop(['Gene', 'baseMean','lfcSE','stat','pvalue', 'padj'], 1).set_index('Transcript') for all_files in all_files]

#loop for creating a list of datadrames - 1 per file
for dataframe, all_file in zip(list_of_dfs, all_files):
    all_file2 = all_file.split('\\')
    dataframe.rename(columns={'log2FoldChange':all_file2[7]}, inplace=True)

#Combine all the dfs
combined_df = pd.concat(list_of_dfs, axis=1, sort=True)
#combined_df_pvalue = pd.concat(list_of_dfs2, axis=1, sort=True)

#Create a coloumn named label containing all the cell line intersections based on gene name. Replace "-1" by the wanted foldchange
combined_df['label'] = (combined_df < new_variable).apply(lambda y: combined_df.columns[y.tolist()].tolist(), axis=1)

#Count the number of intersection using "," as delimiter
combined_df['Count Overlap'] = combined_df['label'].astype(str).str.count("\,") +1 

#Sort based on # of intersections
combined_df_sorted = combined_df.sort_values(by=['Count Overlap'], ascending=False)

#remove empty intersection
combined_df_sorted_clean = combined_df_sorted[~combined_df_sorted['label'].astype(str).str.contains('\[]')].fillna(0)
combined_df_sorted_clean

**Cut-off:**

0.58


Unnamed: 0,CL40_TEAD_lncRNA,COLO320_TEAD_lncRNA,GTG7_TEAD_lncRNA,HT55_TEAD_lncRNA,HUTU80_TEAD_lncRNA,LS180_TEAD_lncRNA,LS411_TEAD_lncRNA,MDST8_TEAD_lncRNA,SW1463_TEAD_lncRNA,label,Count Overlap
ENSG00000279088.1_6,-1.985314,0.256104,-0.054574,-2.575375,-0.263831,-1.954861,-1.171365,-1.056075,-2.483758,"[CL40_TEAD_lncRNA, COLO320_TEAD_lncRNA, GTG7_T...",9
ENSG00000279861.1_6,-1.426898,-0.787838,-0.029133,-2.485955,0.450377,-0.769824,-0.637907,-0.292813,0.207655,"[CL40_TEAD_lncRNA, COLO320_TEAD_lncRNA, GTG7_T...",9
ENSG00000265408.1_6,-0.034071,-0.043602,-0.585374,0.298556,-1.130773,-0.018665,0.457003,-0.527306,0.283889,"[CL40_TEAD_lncRNA, COLO320_TEAD_lncRNA, GTG7_T...",9
ENSG00000267322.2_5,0.139018,0.049950,-0.353444,0.163068,-0.814161,-0.518287,-0.172769,-0.638427,-0.534161,"[CL40_TEAD_lncRNA, COLO320_TEAD_lncRNA, GTG7_T...",9
ENSG00000236137.1_5,-0.002486,0.084431,-0.241415,-1.145611,-0.351489,-0.854069,-0.562251,-0.132422,-0.018930,"[CL40_TEAD_lncRNA, COLO320_TEAD_lncRNA, GTG7_T...",9
...,...,...,...,...,...,...,...,...,...,...,...
ENSG00000248901.1_4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.150003,[SW1463_TEAD_lncRNA],1
ENSG00000248858.7_6,0.000000,0.389693,0.000000,0.000000,0.000000,0.000000,0.000000,1.001135,0.000000,[COLO320_TEAD_lncRNA],1
ENSG00000248884.1_5,0.000000,-2.055226,0.000000,0.000000,0.000000,0.000000,0.000000,0.712415,0.000000,[COLO320_TEAD_lncRNA],1
ENSG00000248890.1_5,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.404962,0.000000,[MDST8_TEAD_lncRNA],1


## Save the matrix as a excel file:

In [18]:
#save file
combined_df_sorted_clean.to_csv("/Users/leandromoreno/Desktop/laura_upregulation_shared.csv", sep='\t')
#combined_df_pvalue_sorted_clean.to_csv("lncRNA_foldchange05_pvaluenew2.csv", sep='\t')

## Expression of a specific lncRNA

In [7]:
combined_df_sorted_clean.loc["ENSG00000163597.14_2"]

CL40_TEAD_lncRNA                                                -0.40411
COLO320_TEAD_lncRNA                                             0.750437
GTG7_TEAD_lncRNA                                                0.322119
HT55_TEAD_lncRNA                                                -1.13087
HUTU80_TEAD_lncRNA                                              0.983587
LS180_TEAD_lncRNA                                               0.324583
LS411_TEAD_lncRNA                                               0.643906
MDST8_TEAD_lncRNA                                              -0.933503
SW1463_TEAD_lncRNA                                             -0.328891
label                  [CL40_TEAD_lncRNA, GTG7_TEAD_lncRNA, HT55_TEAD...
Count Overlap                                                          6
Name: ENSG00000163597.14_2, dtype: object

In [9]:
glob.__version__

AttributeError: module 'glob' has no attribute '__version__'

In [11]:
ipywidgets.__version__

NameError: name 'ipywidgets' is not defined