In [8]:
import sqlite3
from sqlite3 import Error
from sqlite3 import IntegrityError
from sqlite3 import OperationalError
import os
from github import Github
from github import GithubException
import requests

In [2]:

alignments_table_string = """CREATE TABLE IF NOT EXISTS alignments(
    id integer PRIMARY KEY,
    name text NOT NULL,
    taxa integer NOT NULL,
    sites integer NOT NULL,
    data_type text CHECK(data_type IN ('cc','sc','ms', 'mp')) NOT NULL,
    UNIQUE (name, data_type)
); """


def create_connection(db_file):
    """ create a database connection to a SQLite database """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        print(sqlite3.version)
    except Error as e:
        print(e)
    return conn
            
def create_table(conn, create_table_sql):
    try:
        cur = conn.cursor()
        cur.execute(create_table_sql)
    except Error as e:
        print(e)
        
        
def clean_db(conn):
    alignment_sql = '''DROP TABLE alignments;'''
    cur = conn.cursor()
    try:
        cur.execute(alignment_sql)
        conn.commit()
    except OperationalError as e:
        print(e)
        
def setup_db(conn):
    clean_db(conn)
    if conn is not None:
        create_table(conn, alignments_table_string)
    else:
        print("Error! cannot create the database connection.")
    
                
def create_alignment(conn, alignment_data):
    alignment_sql = ''' INSERT INTO alignments(name, taxa, sites, data_type)
              VALUES(?, ?, ?, ?) '''
    conn.cursor().execute(alignment_sql, alignment_data)
    conn.commit()
    
    

In [3]:
def get_repo_files(repo):
    repo_files = []
    try:
        contents = repo.get_contents("")
        while contents:
            file_content = contents.pop(0)
            if file_content.type == "dir":
                contents.extend(repo.get_contents(file_content.path))
            else:
                file = file_content
                repo_files.append(str(file).replace('ContentFile(path="','').replace('")',''))
    except GithubException as e:
        print(e.args[1]['message'])
    return repo_files



def insert_alignment(conn, repo, d, file_name):
    data = file_name.split('.')
    name = '.'.join(data[:-2])
    data_type = data[1]
    #if data[1] == 'cc':
    #    data_type = "cognate"
    #if data[1] == 'sc':
    #    data_type = "soundclass"
    #if data[1] == 'ms':
    #    data_type = "morphosyntactic"
    #if data[1] == 'mp':
    #    data_type = "morphophonological"
            
    
    with open(d+ file_name, 'r') as file:
        lines = file.readlines()
        data = lines[0].split(" ")
        if len(data) == 3:
            data = data[1:]
        taxa = int(data[0])
        sites = int(data[1][:-1])
    try:
        create_alignment(conn, (name, taxa, sites, data_type))
    except IntegrityError as e:
        print(file_name)
        print(e)
        return
    return        
    with open(d+ file_name, 'r') as file:
        alignment = file.read()
    git_file = "alignments/" + file_name
    repo.create_file(git_file, git_file + " created", alignment, branch="master")
    print(git_file + ' CREATED')
    #if git_file in repo_files:
    #    contents = repo.get_contents(git_file)
    #    repo.update_file(contents.path, git_file + " updated", alignment, contents.sha, branch="master")
    #    print(git_file + ' UPDATED')
    #else:
    #    repo.create_file(git_file, git_file + " created", alignment, branch="master")
    #    print(git_file + ' CREATED')


def insert_alignments(conn, repo, d):
    
    repo_files = get_repo_files(repo)  
    
    #alignment_id = conn.cursor().execute("SELECT MAX(alignment_id) FROM alignments;").fetchone()[0]
    #if alignment_id == None:
    #    alignment_id = 0
    with os.scandir(d) as it:
        for entry in it:
            if not entry.is_file():
                continue
            #alignment_id = alignment_id+1
            insert_alignment(conn, repo, d, entry.name)

            
            


In [15]:
upload_dir = "../database/datasets/alignments/"
download_dir = "../database/datasets/alignment_downloads/"
db_path = "../database/alignments.db"
repo_name = 'language_alignment_database'
github_user = 'luisevonderwiese'
github_token = "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"

conn = create_connection(db_path)
g = Github(github_token)
repo = g.get_user().get_repo(repo_name)


#setup_db(conn)
    
#insert_alignments(conn, repo, upload_dir)
def get_file_name(entry):
    name = entry[1]
    data_type = entry[4]
    return name + "." + data_type + '.phy'
    
def query_db(conn, repo, query_string, download = False, download_dir = ""):
    results = conn.cursor().execute("SELECT * FROM alignments;").fetchall()
    print(results[0][1])
    if download: 
        if not os.path.exists(download_dir):
            os.makedirs(download_dir)
        for entry in results:
            file_name = get_file_name(entry)
            contents = repo.get_contents("alignments/" + file_name)
            url = contents.download_url
            response = requests.get(url)
            open(download_dir + file_name, "w+").write(response.content.decode("utf-8"))
    return results

query_db(conn, repo, "SELECT * FROM alignments;", True, download_dir)




2.6.0
constenlachibchan


In [5]:

                
                

                
def insert_alignments_old(conn):
    alignment_id = conn.cursor().execute("SELECT MAX(alignment_id) FROM alignments;").fetchone()[0]
    if alignment_id == None:
        alignment_id = 0
    d = "../database/datasets/alignments/"
    with os.scandir(d) as it:
        for entry in it:
            if not entry.is_file():
                continue
            alignment_id = alignment_id+1
            data = entry.name.split('.')
            if data[1] == 'cc':
                data_type = "cognate"
            if data[1] == 'sc':
                data_type = "soundclass"
            if data[1] == 'ms':
                data_type = "morphosyntactic"
            if data[1] == 'mp':
                data_type = "morphophonological"
            name = '.'.join(data[:-2])
        
            with open(d+ entry.name, 'r') as file:
                alignment = file.read()
            with open(d+ entry.name, 'r') as file:
                lines = file.readlines()
                data = lines[0].split(" ")
                if len(data) == 3:
                    data = data[1:]
                taxa = int(data[0])
                sites = int(data[1][:-1])
            create_alignment(conn, (alignment_id, name, taxa, sites, data_type))
            
def insert_lexibankMrbayes(conn):
    d = "../database/datasets/lexibankMrbayes_converted/"
    with os.scandir(d) as it:
        data_type = "cognate"
        for entry in it:
            if not entry.is_file():
                continue
            with open(d+ entry.name, 'r') as file:
                alignment = file.read()
            with open(d+ entry.name, 'r') as file:
                lines = file.readlines()
                data = lines[2].split(" ")
                taxa = int(data[1].split("=")[1])
                sites = int(data[4].split(";")[0])
                name = entry.name.split('.')[0]
                create_alignment(conn, [alignment], (name, taxa, sites, data_type))


def insert_misc(conn):
    metadata = [
        ("IE2011_RelaxedCovarion_AllSingletonsGeo", 46, 5997, "cognate"),
        ("science", 103, 5997, "cognate"),
        ("Indo-European_WALS_BinaryOutgroup_March21_stability.ONLYFIN_common_grammatical", 46, 425, "morphosyntactic")
    ]
    for dataset in metadata:
        with open("../database/datasets/" + dataset[0] + ".phy", 'r') as file:
            alignment = file.read()
        create_alignment(conn, [alignment], dataset)
