# Inserting tables

Unfortunately, the permissions are set so that `LOAD DATA INFILE` is not possible.
This notebook inserts miscellaneous metadata tables.

In [7]:
import numpy as np
import pandas as pd
import sqlalchemy
import tqdm

In [8]:
# Load MySQL password from file
with open('../../mysql_password.txt') as f:
    password = f.readline().strip()
# Create MySQL connector
engine = sqlalchemy.create_engine(f"mysql+mysqlconnector://mnz2108:{password}@localhost/effect_nsides")

In [13]:
engine.execute('show tables;').fetchall()

[('CONDITION_CONCEPT',),
 ('CONDITION_OCCURRENCE',),
 ('DRUG_CONCEPT',),
 ('DRUG_EXPOSURE',),
 ('OFFSIDES',),
 ('REPORT',),
 ('TWOSIDES',)]

## CONDITION_CONCEPT

In [14]:
engine.execute('DROP TABLE IF EXISTS CONDITION_CONCEPT;')

engine.execute('''
CREATE TABLE CONDITION_CONCEPT (
condition_concept_id int,
condition_concept_name varchar(255),
condition_meddra_id int,
condition_snomed_id int
);
''')

condition_concept = (
    pd.read_csv('../../data/tables/condition_concept.csv.xz')
    .rename(columns={
        'concept_id': 'condition_concept_id',
        'concept_name': 'condition_concept_name',
        'meddra_concept_id': 'condition_meddra_id',
        'snomed_concept_id': 'condition_snomed_id',
    })
)

condition_concept.to_sql(
    'CONDITION_CONCEPT',
    con=engine,
    if_exists='append',
    index=False,
    dtype={
        'condition_concept_id': sqlalchemy.types.INTEGER,
        'condition_concept_name': sqlalchemy.types.String,
        'condition_meddra_id': sqlalchemy.types.INTEGER,
        'condition_snomed_id': sqlalchemy.types.INTEGER,
    }
)

<sqlalchemy.engine.result.ResultProxy at 0x7f856fc16a58>

## CONDITION_OCCURRENCE

In [5]:
engine.execute('DROP TABLE IF EXISTS CONDITION_OCCURRENCE;')

engine.execute('''
CREATE TABLE CONDITION_OCCURRENCE (
report_id int,
condition_concept_id int
);
''')

<sqlalchemy.engine.result.ResultProxy at 0x7f11c0a4fa90>

In [6]:
condition_occurrence = pd.read_csv('../../data/tables/condition_occurrence.csv.xz')

condition_occurrence.to_sql(
    'CONDITION_OCCURRENCE',
    con=engine,
    if_exists='append',
    index=False,
    dtype={
        'report_id': sqlalchemy.types.INTEGER,
        'condition_concept_id': sqlalchemy.types.INTEGER,
    },
    chunksize=200_000,
)

## REPORT

In [11]:
engine.execute('DROP TABLE IF EXISTS REPORT;')

engine.execute('''
CREATE TABLE REPORT (
report_id int,
report_year int,
person_age int,
person_sex char(1)
);
''')

<sqlalchemy.engine.result.ResultProxy at 0x7fb313930fd0>

In [12]:
report = pd.read_csv('../../data/tables/report.csv.xz')

report.to_sql(
    'REPORT',
    con=engine,
    if_exists='append',
    index=False,
    dtype={
        'report_id': sqlalchemy.types.INTEGER,
        'report_year': sqlalchemy.types.INTEGER,
        'person_age': sqlalchemy.types.INTEGER,
        'person_sex': sqlalchemy.types.CHAR(length=1)
    },
    chunksize=200_000
)

## DRUG_CONCEPT

In [25]:
engine.execute('DROP TABLE IF EXISTS DRUG_CONCEPT;')

engine.execute('''
CREATE TABLE DRUG_CONCEPT (
drug_concept_id int,
drug_concept_name varchar(255),
rxnorm_concept_id int,
drugbank_concept_id varchar(255),
chebi_concept_id int
);
''')

<sqlalchemy.engine.result.ResultProxy at 0x7f1246be13c8>

In [28]:
drug_concept = (
    pd.read_csv('../../data/tables/drug_concept.csv.xz')
    .rename(columns={
        'concept_id': 'drug_concept_id',
        'concept_name': 'drug_concept_name',
    })
)

drug_concept.to_sql(
    'DRUG_CONCEPT',
    con=engine,
    if_exists='append',
    index=False,
    dtype={
        'drug_concept_id': sqlalchemy.types.INTEGER,
        'drug_concept_name': sqlalchemy.types.String,
        'rxnorm_concept_id': sqlalchemy.types.INTEGER,
        'drugbank_concept_id': sqlalchemy.types.String,
        'chebi_concept_id': sqlalchemy.types.INTEGER,
    },
    chunksize=200_000
)

## DRUG_EXPOSURE

In [30]:
engine.execute('DROP TABLE IF EXISTS DRUG_EXPOSURE;')

engine.execute('''
CREATE TABLE DRUG_EXPOSURE (
report_id int,
drug_concept_id int
);
''')

<sqlalchemy.engine.result.ResultProxy at 0x7f1164c2dbe0>

In [18]:
drug_exposure = pd.read_csv('../../data/tables/drug_exposure.csv.xz')

drug_exposure.to_sql(
    'DRUG_EXPOSURE',
    con=engine,
    if_exists='append',
    index=False,
    dtype={
        'report_id': sqlalchemy.types.INTEGER,
        'drug_concept_id': sqlalchemy.types.INTEGER,
    },
    chunksize=200_000
)