In [None]:
import os
from pyspark.conf import SparkConf
from pyspark.sql import SparkSession
from pyspark.sql.functions import regexp_extract, col
from delta import *

warehouse_diretory_path = '[YOUR_WAREHOUSE]'
mirtarbase_mirna_target_gene_file = 'file:///[FILE_FULL_NAME:hsa_mti.csv]'
biological_database_name = 'biological_database'
mirtarbase_mirna_target_gene_table_name = 'bronze_mirtarbase_mirna_target_gene_interaction'

conf = SparkConf()
conf.setAll(
    [
        ('spark.master', 'local[*]'), 
        ('spark.driver.host', 'localhost'),
        ('spark.app.name', 'miRTarBase mirna Target Gene Interaction Importer'),
        ('spark.ui.showConsoleProgress', 'true'),
        ('spark.sql.execution.arrow.pyspark.enabled', 'true'),       
        ('spark.sql.extensions','io.delta.sql.DeltaSparkSessionExtension'),
        ('spark.sql.catalog.spark_catalog', 'org.apache.spark.sql.delta.catalog.DeltaCatalog'),
        ('spark.sql.warehouse.dir', warehouse_diretory_path),
        ('spark.driver.extraJavaOptions', f'-Dderby.system.home={warehouse_diretory_path}')
    ])

spark = SparkSession.builder.config(conf=conf).enableHiveSupport().getOrCreate()

In [None]:
mirna_target_gene_interaction_df = spark.read \
    .option('header', True) \
    .options(delimiter=';') \
    .csv(mirtarbase_mirna_target_gene_file) \
    .selectExpr("`miRTarBase ID` AS miRTarBase_ID", 'miRNA', "`Species (miRNA)` AS Species_miRNA", 
                "`Target Gene` AS Target_Gene", "`Target Gene (Entrez ID)` AS Target_Gene_Entrez_Gene_ID", 
                "`Species (Target Gene)` AS Species_Target_Gene", 'Experiments', 
                "`Support Type` AS Support_Type", "`References (PMID)` AS References_PMID")

In [None]:
spark.sql(f'CREATE DATABASE IF NOT EXISTS {biological_database_name};')

In [None]:
spark.sql(f'USE {biological_database_name};')

In [None]:
mirna_target_gene_interaction_df.write \
    .format('delta') \
    .mode('overwrite') \
    .option('overwriteSchema', 'true') \
    .option('partitionOverwriteMode', 'dynamic') \
    .saveAsTable(mirtarbase_mirna_target_gene_table_name)

In [None]:
spark.stop()