In [None]:
%load_ext autoreload
%autoreload 2

from rna_data import DataFolder
from rna_data.config import DATA_FOLDER_TESTING, DATA_FOLDER

path_test_data = DATA_FOLDER
path_test_data_sequences = DATA_FOLDER_TESTING+'/sequences.fasta'
path_test_data_dreem = DATA_FOLDER_TESTING+'/dreem_output.json'
path_test_data_ct =  DATA_FOLDER_TESTING+'/ct_files'
TESTING_DATASET_NAME = 'for_testing'

# Log into Hugging face

### 1. Join the Hugging Face Community

[Hugging face](https://huggingface.co)

### 2. Join the Rouskinlab organization

[Rouskin Lab on Hugging Face](https://huggingface.co/RouskinLab)

### 3. Install the Hugging Face dependencies


In [None]:
!pip install huggingface
!pip install huggingface_hub



### 4. Login to the Hugging Face Hub

1. Get a token from https://huggingface.co/settings/token

2. Run this command line **in a terminal** and follow the instructions:


In [None]:
!huggingface-cli login

# Create a dataset from local files

## From fasta

In [None]:
datafolder = DataFolder.from_fasta(
    name= 'sequences', # name of the dataset or something else
    path_in = path_test_data_sequences, 
    path_out = path_test_data, 
    generate_npy=True, 
    predict_structure=True,
    predict_dms=True)

## From DREEM output

In [None]:
datafolder = DataFolder.from_dreem_output(
    name='from_dreem_output', # name of the dataset or something else
    path_in=path_test_data_dreem, 
    path_out=path_test_data, 
    generate_npy=True, 
    predict_structure=True)

## From a list of CT files

In [None]:
datafolder = DataFolder.from_ct_folder(
    name='from_ct_folder', # name of the dataset or something else
    path_in=path_test_data_ct, 
    path_out=path_test_data, 
    generate_npy=True, 
    predict_dms=False)  # won't take the structure from the ct files into account

## Explore the dataset object

In [None]:
print(datafolder)
print('main folder:',datafolder.get_main_folder())
print('structure.npy file:',datafolder.get_structure_npy())
print('dms.npy file:',datafolder.get_dms_npy())
print('json file:',datafolder.get_json())
print('source files:',datafolder.get_source_files())
print('info file:',datafolder.get_info_file())

# Load a dataset from Hugging Face

In [None]:
datafolder = DataFolder.from_huggingface(
    name=TESTING_DATASET_NAME,
    path_out=path_test_data,
    )

# Push a local dataset to Hugging Face

## Push the dataset to Hugging Face

In [None]:
datafolder = DataFolder.from_fasta(
    name= TESTING_DATASET_NAME, # name of the dataset or something else
    path_in = path_test_data_sequences, 
    path_out = path_test_data, 
    generate_npy=True, 
    predict_structure=True,
    predict_dms=True)

### 1. Create a repository if it does not exist

In [None]:
# Find more arguments here: https://huggingface.co/docs/huggingface_hub/guides/repository#create-a-repository
datafolder.create_repo(
    exist_ok=True,
    private=True
)

### 2. Push the dataset to Hugging Face

In [None]:

# Find more arguments here: https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder
future = datafolder.upload_folder(
    revision='main', # branch name
    commit_message='Upload demo dataset',
    commit_description='This is a demo dataset',
    run_as_future=True,
)

future.done() # True if the upload is done
future.result() # Wait for the upload to complete (blocking action)