In [1]:
import sqlite3
import pandas as pd

import os, sys
rootpath = os.path.join(os.getcwd(), '.')
sys.path.append(rootpath)
from src.utils import *
# reimport modules if they change
%load_ext autoreload
%autoreload 2

In [2]:
# connect to the database (create file if not exists)
con = sqlite3.connect('./simulations.db')
# create a cursor object
cur = con.cursor()

## Simulations

In [3]:
# drop table if exists
#cur.execute('DROP TABLE IF EXISTS simulations')

In [4]:
# create table that does not allow duplicates
cur.execute("""
CREATE TABLE IF NOT EXISTS simulations (
            simulation_id integer PRIMARY KEY,
            N int NOT NULL,
            K int NOT NULL,
            lambda float NOT NULL,
            mu float NOT NULL,
            h float NOT NULL,
            window float NOT NULL,
            seed int NOT NULL,
            raw_file TEXT NOT NULL,
            dataset TEXT NOT NULL
            )
""")
cur.execute("CREATE UNIQUE INDEX IF NOT EXISTS parameters ON simulations (N,K,lambda,mu,h,window,seed);")
# can be dropped with cur.execute("DROP INDEX parameters;") if needs to be redesigned
con.commit()

In [5]:
# show the simulations
# cur.execute("SELECT * FROM simulations")
# print(cur.fetchall())
simulations = pd.read_sql_query("SELECT * FROM simulations", con)
simulations

Unnamed: 0,simulation_id,N,K,lambda,mu,h,window,seed,raw_file,dataset
0,1,10000,100,0.0000,0.2,1.000000,0.000001,1001,/scratch02.local/johannes/projects/sahel_finit...,samples/1e-06
1,2,10000,100,0.0000,0.2,1.000000,1.000000,1001,/scratch02.local/johannes/projects/sahel_finit...,samples/1.0
2,3,10000,100,0.0000,0.2,1.000000,10.000000,1001,/scratch02.local/johannes/projects/sahel_finit...,samples/10.0
3,4,10000,100,0.0000,0.2,1.000000,100.000000,1001,/scratch02.local/johannes/projects/sahel_finit...,samples/100.0
4,5,10000,100,0.0000,0.2,1.000000,1000.000000,1001,/scratch02.local/johannes/projects/sahel_finit...,samples/1000.0
...,...,...,...,...,...,...,...,...,...,...
5768,5769,10000,100,0.9999,0.2,0.005623,1.000000,1000,/scratch02.local/johannes/projects/sahel_finit...,samples/1.0
5769,5770,10000,100,0.9999,0.2,0.005623,10.000000,1000,/scratch02.local/johannes/projects/sahel_finit...,samples/10.0
5770,5771,10000,100,0.9999,0.2,0.005623,100.000000,1000,/scratch02.local/johannes/projects/sahel_finit...,samples/100.0
5771,5772,10000,100,0.9999,0.2,0.005623,1000.000000,1000,/scratch02.local/johannes/projects/sahel_finit...,samples/1000.0


## Beta Approximation

In [6]:
# create table that does not allow duplicates
cur.execute("""
CREATE TABLE IF NOT EXISTS beta_approximations (
            simulation_id INTEGER NOT NULL UNIQUE,
            a float NOT NULL,
            b float NOT NULL,
            loc float NOT NULL,
            scale float NOT NULL,
            FOREIGN KEY (simulation_id) REFERENCES simulations (simulation_id)
            )
""")
con.commit()

In [7]:
# show state of approximation simulations
beta_approximations = pd.read_sql_query("SELECT * FROM beta_approximations", con)
beta_approximations

Unnamed: 0,simulation_id,a,b,loc,scale
0,1,3.000801e+03,2.072159e+04,-0.0001,1.0002
1,2,6.499563e+03,4.488178e+04,-0.0001,1.0002
2,3,5.995206e+04,4.139902e+05,-0.0001,1.0002
3,4,5.947034e+05,4.106638e+06,-0.0001,1.0002
4,5,5.693645e+06,3.931665e+07,-0.0001,1.0002
...,...,...,...,...,...
5768,5769,2.253999e+01,2.030146e+00,-0.0001,1.0002
5769,5770,2.293624e+01,2.066115e+00,-0.0001,1.0002
5770,5771,2.642955e+01,2.382490e+00,-0.0001,1.0002
5771,5772,5.584185e+01,5.034823e+00,-0.0001,1.0002


## Beta Interpolation

In [8]:
# create table that does not allow duplicates
cur.execute("""
CREATE TABLE IF NOT EXISTS beta_interpolations (
            N INTEGER NOT NULL,
            K INTEGER NOT NULL,
            mu FLOAT NOT NULL,
            seed INTEGER NOT NULL,
            filename TEXT NOT NULL
            )
""")
cur.execute("CREATE UNIQUE INDEX IF NOT EXISTS neural_network ON beta_interpolations (N,K,mu,seed);")
con.commit()

In [9]:
# show state of approximation simulations
beta_interpolations = pd.read_sql_query("SELECT * FROM beta_interpolations", con)
beta_interpolations

Unnamed: 0,N,K,mu,seed,filename
0,10000,100,0.2,1001,./dat/beta_interpolation_N=10000_K=100_mu=0.2_...


## Results

In [10]:
# create table that does not allow duplicates
cur.execute("""
CREATE TABLE IF NOT EXISTS results (
            N INTEGER NOT NULL,
            K INTEGER NOT NULL,
            mu FLOAT NOT NULL,
            seed INTEGER NOT NULL,
            window float NOT NULL,
            sigma float NOT NULL,
            epsilon float NOT NULL,
            filename TEXT NOT NULL
            )
""")
# do not allow duplicates
cur.execute("CREATE UNIQUE INDEX IF NOT EXISTS result ON results (N,K,mu,seed,window,sigma,epsilon);")
con.commit()

In [11]:
results = pd.read_sql_query("SELECT * FROM results", con)
results

Unnamed: 0,N,K,mu,seed,window,sigma,epsilon,filename
0,10000,100,0.2,1001,1e-06,0.01,0.1,/data.nst/johannes/projects/sahel_finite-obser...
1,10000,100,0.2,1001,1.0,0.01,0.1,/data.nst/johannes/projects/sahel_finite-obser...
2,10000,100,0.2,1001,10.0,0.01,0.1,/data.nst/johannes/projects/sahel_finite-obser...
3,10000,100,0.2,1001,100.0,0.01,0.1,/data.nst/johannes/projects/sahel_finite-obser...
4,10000,100,0.2,1001,1000.0,0.01,0.1,/data.nst/johannes/projects/sahel_finite-obser...
5,10000,100,0.2,1001,10000.0,0.01,0.1,/data.nst/johannes/projects/sahel_finite-obser...


## Clean up 

In [12]:
from tqdm import tqdm
import h5py
import numpy as np

know_what_you_do = False
if know_what_you_do:
    database = "./simulations.db"
    # add previous simulations and their locations to the database
    windows=[1e0,1e1,1e2,1e3,1e4]

    # seed1001 for now
    root='/scratch02.local/johannes/projects/sahel_finite-observation-dynamic-range/N=10000_K=100/seed=1001/'
    lambda_dirs = os.listdir(root)
    lambdas = [1-float(dir.split("=")[-1]) for dir in  lambda_dirs]
    # sort lamdas and lambda_dirs according to value ot lambda
    lambdas, lambda_dirs = zip(*sorted(zip(lambdas, lambda_dirs)))
    print("available lambda values:")
    print(lambdas)
    for lambda_dir in lambda_dirs:
        path=root+lambda_dir
        filenames=os.listdir(path)

        # sort filenames according to h value
        hs = [float(filename.split("_")[-2].split("=")[-1]) for filename in filenames]
        hs, filenames = zip(*sorted(zip(hs, filenames)))
        print(filenames)
        # loop fit over all filenames    
        for (h,name) in zip(hs, filenames):
            filename = path+'/'+name
            with h5py.File(filename, 'r') as file:
                params_db = dict()
                params_db['N'] = int(file.attrs['N'])
                params_db['K'] = int(file.attrs['K'])
                params_db['lambda'] = float(file.attrs['lambda'])
                params_db['mu'] = float(file.attrs['mu'])
                params_db['h'] = float(file.attrs['h'])
                params_db['seed'] = int(file.attrs['seed'])
                params_db['raw_file'] = filename
                # iterate over windows
                windows = np.array(file['windows'])
                for window in windows:
                    dataset = f'samples/{window}'
                    # add to database
                    params_db['window'] = window
                    params_db['dataset'] = dataset
                    # add to database
                    if not exists_in_database(con, cur, 'simulations', params_db):
                        insert_into_database(con, cur, 'simulations', params_db)
                    else:
                        # get entry from database and show row
                        cur.execute("SELECT * FROM simulations WHERE N=:N AND K=:K AND lambda=:lambda AND mu=:mu AND h=:h AND window=:window AND seed=:seed", params_db)
                        print(cur.fetchall())
                
                


In [13]:
con.commit()
con.close()