# Example of R in Python in Colab

#### ( Data files not included, see https://hanlab.uth.edu/HeRA/download )

In [2]:
!ls

Cells_leukemia-Target.rds  sample_data	Testis-Target.rds


In [None]:
# Mix Python and R to convert files to .csv

# (User) Problem
# We know / We have: big .rds files (which can only be opened in R)
# We need / We don't have: .csv files
# We must: Use python,
# and convert a large number of files,
# too many to do individually manually

# Solution (Product)
#
# Use R to convert rds R database files to csv files,
# in a why that is overall inside a python script
# that can be called and will convert all applicable files
# inside a directory


# e.g. files https://hanlab.uth.edu/HeRA/download
# core code to run an r script in python
# https://stackoverflow.com/questions/24544190/calling-r-script-from-python-using-rpy2#24544362

# There are only 2 lines of code to be run in R
# df <- readRDS("{doc_name}.rds")
# write.csv(df, "{doc_name}.csv")
#
# The challenge is to automate this.
# The automation and file management is easier in Python
# (and in some cases the required code-stack is python, not R)

# What we need is the ability to call an R script inside Python:
# We can do this with these ~two lines (including the import):

# import subprocess
# retcode = subprocess.call(['/usr/bin/Rscript','convert.r'])
# or
# retcode = subprocess.call([ where R is on your computer , the r script])

# The strategy is to use python to make and then call R scripts

# Instructions to use:
# 1. open a terminal in the folder where your .rds files are
# 2. Run this line in terminal:
#    $ python3 py_r_csv.py

import os
import subprocess

# a function to write the temporary R script
def write_script(doc_name):

    # create file: readme_text
    readme_text = f'df <- readRDS("{doc_name}.rds")\nwrite.csv(df, "{doc_name}.csv")'

    # create, write-to, & save .txt file
    file_to_create1 = open("convert.r", "w")
    file_to_create1.write(readme_text)
    file_to_create1.close()

    return None

# in the case of a long wait, give the user some idea
# of the progress through the files (crude but works)
progress_counter = 0
print("progress Counter:")

# iterate through all .rds files in directory
the_path = "."
for filename in os.listdir(the_path):

    # check that each file is really an .rds file
    if filename[-4:] == ".rds":

	# save the file name minus the .suffix
        doc_name = filename[:-4]

        # write the R script
        write_script(doc_name)

        # run the R script:
        # which converts the .rds to .csv!
        # note: requires import subprocess
        retcode = subprocess.call(['/usr/bin/Rscript','convert.r'])

    # show progress:
    progress_counter += 1
    print(progress_counter)


# Clean up Trash
# remove the last R script
os.remove("convert.r")

# Yay!!
print("All Done!!")


progress Counter:
1
2
3
4
All Done!!


In [None]:
# Show files
!ls


Cells_leukemia-Target.csv  sample_data	      Testis-Target.rds
Cells_leukemia-Target.rds  Testis-Target.csv


# Check the resulting .csv

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("Cells_leukemia-Target.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,K-562-SM-4BRWK,K-562-SM-4DM4W,K-562-SM-4IHK7,K-562-SM-4JBIQ,K-562-SM-47JYY,K-562-SM-4KKZ9,K-562-SM-4AD4F,K-562-SM-4B66B,K-562-SM-48FEU,K-562-SM-4AT3W,K-562-SM-46MWI,K-562-SM-48TE3,K-562-SM-4BONS,K-562-SM-4LVKX,K-562-SM-4LMI2,K-562-SM-4GICD,K-562-SM-4EDPU,K-562-SM-5GZWN,K-562-SM-5Q5CL,K-562-SM-5LU2Y,K-562-SM-5K7WK,K-562-SM-4PQYN,K-562-SM-4U82S,K-562-SM-5HL7A,K-562-SM-48U2C,K-562-SM-5NQ9D,K-562-SM-5Q59U,K-562-SM-5J1MB,K-562-SM-5IJB7,K-562-SM-5DUV6,K-562-SM-4OOR8,K-562-SM-4YCCQ,K-562-SM-5P9HH,K-562-SM-5PNU4,K-562-SM-57WAW,K-562-SM-5IFHC,K-562-SM-5RQHM,K-562-SM-5K7ZB,K-562-SM-5MR32,...,K-562-SM-5LU8G,K-562-SM-4RGL3,K-562-SM-5O9BA,K-562-SM-5PNWU,K-562-SM-5J2LX,K-562-SM-4M1XD,K-562-SM-4WWAK,K-562-SM-5P9EQ,K-562-SM-2D7VW,K-562-SM-2D7VY,K-562-SM-2I5EP,K-562-SM-2HMJ2,K-562-SM-2I3E7,K-562-SM-2YUNN,K-562-SM-2S1OR,K-562-SM-2S1RI,K-562-SM-2S1Q5,K-562-SM-2TF5R,K-562-SM-2TF4E,K-562-SM-2TC5E,K-562-SM-26GMQ,K-562-SM-32PLC,K-562-SM-33HC7,K-562-SM-32PMP,K-562-SM-32QPQ,K-562-SM-3GAFC,K-562-SM-2XCE8,K-562-SM-3DB9M,K-562-SM-3GILO,K-562-SM-2XCFL,K-562-SM-3K2BF,K-562-SM-3GADY,K-562-SM-3GIKB,K-562-SM-2XCBH,K-562-SM-3MJHH,K-562-SM-3P61Y,K-562-SM-3NMDG,K-562-SM-3NMAP,K-562-SM-3NB3I,K-562-SM-3LK7S
0,ENSR00000000015,1.0953,0.9974,0.929,0.8863,1.1057,0.8779,0.803,1.1197,0.9212,1.2372,1.1463,1.3806,0.9896,1.0936,1.0659,1.247,1.1995,0.6381,0.8714,1.1283,1.1345,0.8327,1.3073,0.6017,1.2502,0.8253,1.6143,1.1875,1.3011,0.9459,0.8207,1.3543,1.3399,1.4122,1.2929,0.7967,0.8824,1.1914,1.3687,...,1.3255,0.6648,1.1139,1.0973,0.8651,0.745,1.4709,1.1067,1.4047,0.3014,1.5374,2.7042,0.6669,1.0263,0.8088,1.3704,0.5853,1.6739,1.4161,0.9422,6.2521,0.8044,1.7249,0.8414,1.0704,1.2687,1.0186,0.7077,1.5587,1.163,0.5192,1.2028,1.1134,0.9813,0.9712,0.8351,1.071,1.1736,1.1972,1.1923
1,ENSR00000000037,1.4191,1.3662,0.7911,0.8879,1.2577,1.1946,1.0766,1.081,1.3234,1.0665,1.5895,0.9618,1.4919,1.2204,1.3822,1.2262,1.1938,1.0563,1.0622,1.0763,1.159,1.3508,1.2547,0.9017,1.0887,1.0912,1.6639,1.428,1.1443,0.9061,0.8961,1.0487,1.3573,1.5688,1.1243,1.0708,1.0428,0.8652,1.1061,...,1.4445,0.8704,1.4445,1.2031,0.9689,0.8894,1.2406,1.2519,1.862,0.3027,1.9399,2.194,0.8654,0.9567,0.3894,1.6244,0.4776,1.8368,1.6644,1.0309,7.6508,1.2257,1.733,0.9917,1.3751,1.2577,1.2484,1.1182,1.5401,1.4723,0.8975,1.1245,1.461,1.0934,0.9415,1.0711,1.2233,1.1445,1.3236,0.7313
2,ENSR00000000236,1.24,0.922,1.7152,0.7856,1.2081,1.2783,1.3523,1.4406,1.9895,1.2977,1.5456,0.8972,1.404,1.0903,1.1357,1.3432,0.8523,1.3859,1.4034,1.6529,0.9729,1.2881,1.6769,1.0609,0.768,1.1118,1.7983,1.1298,1.5883,1.5928,0.7998,1.4342,1.7815,1.7022,0.9624,0.825,0.9379,1.4661,1.3564,...,1.1451,1.482,1.19,1.5396,1.2311,1.3813,0.9714,1.8129,2.8379,0.8541,2.5122,3.1998,1.7679,1.7403,0.6149,1.5506,0.7137,1.9072,0.3815,0.547,8.622,1.4057,1.5152,1.2513,1.1328,1.9358,1.1182,1.3212,1.4105,1.3069,0.9916,1.4163,1.0726,1.4112,1.2493,1.0279,1.1818,1.247,1.1387,0.8415
3,ENSR00000000323,2.1955,1.6801,1.726,1.6948,2.1856,2.2657,2.772,1.9233,1.1369,1.6385,2.4102,1.9217,1.5546,1.9241,1.8182,2.3844,1.5097,1.8957,1.8014,1.4303,0.426,1.2391,1.8744,1.0322,1.219,1.4217,1.7058,1.2824,0.9629,1.3809,1.2519,1.7769,2.4146,2.2718,1.1783,1.0625,0.9064,1.9302,2.1693,...,2.2764,1.9546,1.9584,1.9591,1.3572,1.906,0.95,2.0632,3.3473,1.1667,2.8584,1.6814,1.8975,1.6376,0.1968,1.2668,0.4957,1.9439,0.6187,0.739,3.2633,1.8458,2.3632,1.7027,2.1123,1.9509,1.5494,1.5839,1.7779,1.8072,1.5362,1.6995,1.0336,1.708,1.5954,1.4853,1.8873,1.6751,0.6188,1.4044
4,ENSR00000000324,3.2762,2.0711,2.1926,1.8514,2.8508,2.7883,3.171,2.7448,1.3463,2.3901,2.5244,1.9027,1.7264,2.5279,2.4511,2.8726,1.6984,2.4078,2.1296,2.0438,0.7893,1.6995,2.1013,1.2842,4.1085,1.8803,2.0054,1.6051,1.2985,1.8713,1.8236,2.1733,3.3283,2.7098,1.4839,1.5147,1.5396,1.874,2.7458,...,2.5419,2.0209,2.5674,2.2253,1.7868,2.5395,1.3389,2.8139,3.7475,1.1234,3.1788,1.8862,2.4943,2.4543,0.3139,1.7676,0.6461,2.6067,0.1837,0.7702,2.3553,2.0599,2.5944,2.2185,2.0992,2.4848,1.8422,1.9052,2.343,2.2862,1.8936,2.6625,0.9419,1.7244,2.2395,1.9372,2.4559,2.3324,0.8795,1.6557
