# Example of R in Python in Colab

#### ( Data files not included in Colab, see https://hanlab.uth.edu/HeRA/download )

In [6]:
# show files
!ls

'Fallopian_tube (1).csv'   sample_data	       Testis-Target.rds
'Fallopian_tube (1).rds'   Testis-Target.csv


In [1]:
# Mix Python and R to convert files to .csv

"""
Instructions to use:
    1. open a terminal in the folder where your .rds files are
    2. Run this line in terminal:
       $ python3 py_r_csv.py
"""

"""
(User) Problem
We Have: many big .rds files (which can only be opened in R)
We Need: .csv files
We Must: Use python

Solution (Product)
The strategy is to use python to make and then call R scripts.

Use R to convert .rds R database files to .csv files,
in a why that is overall inside a python script
that can be called inside a directory and 
will then convert all applicable files inside that directory.

e.g. Let's convert the .rds files found here:
    https://hanlab.uth.edu/HeRA/download

There are only 2 lines of code to be run in R (converting .rds to .csv):
    df <- readRDS("{doc_name}.rds")
    write.csv(df, "{doc_name}.csv")

The challenge is to automate this.
The automation and file management is easier in Python.
(And in some cases the required code-stack is python, not R.)

What we need is the ability to call an R script inside Python:
We can do this with these one line (not including the import line):

    import subprocess
    return_code = subprocess.call(['/usr/bin/Rscript','convert.r'])

    or
    return_code = subprocess.call([ where R is on your computer , the r script])

(Note: "return_code = " may not be required, it just tell you 
whither the R script returned something or not: 0 means no, etc.)
"""


import os
import subprocess

# a function to write the temporary R script
def write_script(doc_name):

    # create file: readme_text
    readme_text = f'df <- readRDS("{doc_name}.rds")\nwrite.csv(df, "{doc_name}.csv")'

    # create, write-to, & save .txt file
    file_to_create1 = open("convert.r", "w")
    file_to_create1.write(readme_text)
    file_to_create1.close()

    return None


# in the case of a long wait, give the user some idea
# of the progress through the files (crude but works)
progress_counter = 0
print("progress Counter:")


# iterate through all .rds files in directory
the_path = "."
for filename in os.listdir(the_path):

    # check that each file is really an .rds file
    if filename[-4:] == ".rds":

    # save the file name minus the .suffix
        doc_name = filename[:-4]

        # write the R script
        write_script(doc_name)

        # Run the R script:
        # (which converts the .rds to .csv!)
        # Note: requires import subprocess
        subprocess.call(['/usr/bin/Rscript','convert.r'])

    # Show Progress:
    progress_counter += 1
    print(progress_counter)


# Clean Up Trash
# remove the last R script
os.remove("convert.r")

# Yay!!
print("All Done!!")


progress Counter:
1
2
3
4
All Done!!


In [2]:
# Show files
!ls


'Fallopian_tube (1).csv'   sample_data	       Testis-Target.rds
'Fallopian_tube (1).rds'   Testis-Target.csv


# Check the resulting .csv

In [4]:
import pandas as pd

In [5]:
# read in file
df = pd.read_csv("Testis-Target.csv")
# show just top 5 rows
df.head()

Unnamed: 0.1,Unnamed: 0,Symbol,eRNA,R,P,FDR,N,echr,estart,eend,gchr,gstart,gend,tss,dis
0,54370,TAS1R3,ENSR00000000145,0.328111,5.778029e-06,3.27373e-05,183,chr1,1086450,1092451,chr1,1266660,1270694,1266660,177210.0
1,108552,AL627309.1,ENSR00000000146,0.536818,4.884981e-15,1.233228e-13,183,chr1,1090087,1096088,chr1,92230,129217,129217,963870.0
2,108560,RP4-669L17.8,ENSR00000000146,0.543113,1.998401e-15,5.387148e-14,183,chr1,1090087,1096088,chr1,326096,328112,326096,766991.0
3,108592,AL669831.1,ENSR00000000146,0.330486,4.897052e-06,2.81012e-05,183,chr1,1090087,1096088,chr1,661265,714006,714006,379081.0
4,108594,FAM87B,ENSR00000000146,0.401077,1.838805e-08,1.591011e-07,183,chr1,1090087,1096088,chr1,752751,755217,752751,340336.0
