In [1]:
import os, json
from rna_data import DataFolder
import numpy as np
from rna_data.env import DATA_FOLDER_TESTING, DATA_FOLDER
path_test_data = DATA_FOLDER
path_test_data_sequences = DATA_FOLDER_TESTING+'/sequences.fasta'
path_test_data_dreem = DATA_FOLDER_TESTING+'/dreem_output.json'
path_test_data_ct =  DATA_FOLDER_TESTING+'/ct_files'
TESTING_DATAFOLDER_NAME = 'for_testing'

# Log into Hugging face

### 1. Join the Hugging Face Community

[Hugging face](https://huggingface.co)

### 2. Join the Rouskinlab organization

[Rouskin Lab on Hugging Face](https://huggingface.co/RouskinLab)

### 3. Install the Hugging Face dependencies


In [None]:
!pip install huggingface
!pip install huggingface_hub



### 4. Login to the Hugging Face Hub

1. Get a token from https://huggingface.co/settings/token

2. Run this command line **in a terminal** and follow the instructions:


In [None]:
!huggingface-cli login

# Create a datafolder from local files


## From DREEM output

In [2]:
datafolder = DataFolder.from_dreem_output(
    name='from_dreem_output', # name of the input data by default
    path_in=path_test_data_dreem, 
    path_out=path_test_data, 
    generate_npy=True, 
    predict_structure=True)

## From a list of CT files

In [30]:
datafolder = DataFolder.from_ct_folder(
    name='from_ct_folder', # name of the input data by default
    path_in=path_test_data_ct, 
    path_out=path_test_data, 
    generate_npy=True, 
    predict_dms=False,
    )  # won't take the structure from the ct files into account


## From fasta

In [34]:
datafolder = DataFolder.from_fasta(
    name= 'sequences', # name of the input data by default
    path_in = path_test_data_sequences, 
    path_out = path_test_data, 
    generate_npy=True, 
    predict_structure=True,
    predict_dms=True)

## Explore the datafolder object

In [28]:
print(datafolder)
print('main folder:',datafolder.get_main_folder())
print('sequences.npy file:\n',np.load(datafolder.get_sequences_npy(), allow_pickle=True))
print('base_pairs.npy file:\n',np.load(datafolder.get_base_pairs_npy(), allow_pickle=True))
print('dms.npy file:\n',np.load(datafolder.get_dms_npy(), allow_pickle=True))
print('json file:\n', json.load(open(datafolder.get_json(), 'r')))
print('dms.npy file:',datafolder.get_dms_npy())
print('json file:',datafolder.get_json())
print('source files:',datafolder.get_source_files())
print('info file:',datafolder.get_info_file())

CreateDatafolderFromFasta @data/datafolders/sequences
main folder: data/datafolders/sequences
sequences.npy file:
 [array([1, 1, 1, 2, 2, 2, 2, 1, 3, 4, 1, 3, 3, 3, 3, 4, 4, 4])
 array([4, 4, 4, 2, 2, 2, 2, 4, 1, 3, 4, 1, 4, 3, 3, 3, 3, 1, 1, 1])]
base_pairs.npy file:
 [array([[ 6, 11],
        [ 5, 12],
        [ 4, 13],
        [ 3, 14],
        [ 2, 15],
        [ 1, 16],
        [ 0, 17]]) array([[ 6, 13],
                          [ 5, 14],
                          [ 4, 15],
                          [ 3, 16],
                          [ 2, 17],
                          [ 1, 18]])]
dms.npy file:
 [list([0.3317625732102161, 0.8619477949493498, 0.9849505502538177, 0.9997828311396947, 0.9999562120800795, 0.9999454042235505, 0.9970246205212348, 3.298876877182004e-10, 1.0113344525791075e-08, 9.30187856307975e-09, 2.1481183689942838e-10, 0.996953491189411, 0.9999737756512344, 0.9999846271614127, 0.9997971647502685, 0.9848757245213886, 0.8618760196213241, 0.3319091664146036])
 list([0.

# Load a datafolder from Hugging Face

In [None]:
datafolder = DataFolder.from_huggingface(
    name=TESTING_DATAFOLDER_NAME,
    path_out=path_test_data,
    )

## Load a datafolder from a local folder

In [None]:
datafolder = DataFolder.from_local(
    name = TESTING_DATAFOLDER_NAME,
    path_in = path_test_data,
)

# Push a local datafolder to Hugging Face

## Push the datafolder to Hugging Face

In [31]:
datafolder = DataFolder.from_fasta(
    name= TESTING_DATAFOLDER_NAME, # name of the input data by default
    path_in = path_test_data_sequences, 
    path_out = path_test_data, 
    generate_npy=True, 
    predict_structure=True,
    predict_dms=True)

### 1. Create a repository if it does not exist

In [None]:
# Find more arguments here: https://huggingface.co/docs/huggingface_hub/guides/repository#create-a-repository
datafolder.create_repo(
    exist_ok=True,
    private=True
)

### 2. Push the datafolder to Hugging Face

In [32]:

# Find more arguments here: https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder
future = datafolder.upload_folder(
    revision='main', # branch name
    commit_message='Upload demo dataset',
    commit_description='This is a demo dataset',
    run_as_future=True,
)

future.done() # True if the upload is done
future.result() # Wait for the upload to complete (blocking action)

'https://huggingface.co/datasets/rouskinlab/for_testing/tree/main/'