** Build Adjacency Matrix **

**Note:** You must put the generated JSON file into a zip file. We probably should code this in too.

In [1]:
import sqlite3
import json

In [2]:
# Progress Bar I found on the internet.
# https://github.com/alexanderkuk/log-progress
from progress_bar import log_progress

In [3]:

PLOS_PMC_DB = 'sqlite_data/data.plos-pmc.sqlite'
ALL_DB = 'sqlite_data/data.all.sqlite'

PLOS_PMC_MATRIX = 'json_data/plos-pmc/adjacency_matrix.json'
ALL_MATRIX = 'json_data/all/adjacency_matrix.json'

In [4]:
conn_plos_pmc = sqlite3.connect(PLOS_PMC_DB)
cursor_plos_pmc = conn_plos_pmc.cursor()

conn_all = sqlite3.connect(ALL_DB)
cursor_all = conn_all.cursor()

Queries

In [5]:
# For getting the maximum row id
QUERY_MAX_ID = "SELECT id FROM interactions ORDER BY id DESC LIMIT 1"

# Get interaction data
QUERY_INTERACTION = "SELECT geneids1, geneids2, probability FROM interactions WHERE id = {}"

# Get all at once
QUERY_ALL_INTERACTION = "SELECT geneids1, geneids2, probability FROM interactions"

In [6]:
actions = [
    {
        "db":PLOS_PMC_DB,
        "matrix" : PLOS_PMC_MATRIX,
        "conn": conn_plos_pmc,
        "cursor": cursor_plos_pmc,
    },
        {
        "db":ALL_DB,
        "matrix" : ALL_MATRIX,
        "conn": conn_all,
        "cursor": cursor_all,
    },
]

Step through every interaction.

1. If geneids1 not in matrix - insert it as dict.
2. If geneids2 not in matrix[geneids1] - insert it as []
3. If probability not in matrix[geneids1][geneids2] - insert it.
4. Perform the reverse.

In [7]:
# for action in actions:
for action in log_progress(actions, every=1, name="Matrix"):
    print("Executing SQL query. May take a minute.")
    matrix = {}
    cursor = action["cursor"].execute(QUERY_ALL_INTERACTION)
    interactions = cursor.fetchall()
    print("Query complete")
    for row in log_progress(interactions, every=1000, name=action["matrix"]+" rows"):
        if row == None:
            continue
            
        id1 = row[0]
        id2 = row[1]
        try:
            prob = int(round(row[2],2) * 1000)
        except Exception:
            continue

        # Forward
        if id1 not in matrix:
            matrix[id1] = {}

        if id2 not in matrix[id1]:
            matrix[id1][id2] = []

        if prob not in matrix[id1][id2]:
            matrix[id1][id2].append(prob)

        # Backwards
        if id2 not in matrix:
            matrix[id2] = {}

        if id1 not in matrix[id2]:
            matrix[id2][id1] = []

        if prob not in matrix[id2][id1]:
            matrix[id2][id1].append(prob)
        
    with open(action["matrix"], "w+") as file:
        file.write(json.dumps( matrix ))
        
print("All Matrices generated")
  

Executing SQL query. May take a minute.
Query complete


Executing SQL query. May take a minute.
Query complete


All Matrices generated


In [8]:
action["conn"].close()