# Prerequisites

1. All the databases are installed on the system.
2. All the script are written in Python 3.
3. All the required python libraries are explicitly installed.


# Data-generation

```
TimeUTC, Value1, Value2
1537282851165, 1, 2
```
Sample of Data.

All the data are geneated in the folder of "./data".

In [None]:
import csv
import os
#import date

directory = "data"
input_data = [1, 1000, 10000, 100000, 1000000, 10000000, 100000000]
end_time = 1537282851165 #ms

if not os.path.exists(directory): 
    os.makedirs(directory) 
for i in input_data:
    file_name = str(i)+'.csv'
    
    file_path = directory +'/'+file_name 
    with open(file_path, 'w', newline='') as f:
        writer = csv.writer(f, delimiter=',')
        for k in range(i):
            line = []
            line.append(end_time-k)
            line.append(i-k)
            line.append(k)
            writer.writerow(line)

# SQLite Database Size Tests

In [None]:
import sqlite3
import os

sqlite_import_cmd = """CREATE TABLE mac_water_data_{0} (time varchar(255) not null, valeu1 varchar(255) not null);
.mode csv
.import data/{0}.csv mac_water_data_{0}"""

database_dir = "sqlite_database"

if not os.path.exists(database_dir): 
    os.makedirs(database_dir)
input_data = [1, 1000, 10000, 100000, 1000000, 10000000, 100000000]
for i in input_data:
    sql_cmd = sqlite_import_cmd.format(i)
    file_path = database_dir +'/'+str(i)+".sql" 
    with open(file_path, 'w', newline='') as f:
        f.write(sql_cmd)
        f.close()
    database = "sqlite_database"+"/"+str(i)+".db"
    cmd = "sqlite3 "+ database +" < "+ file_path
    #sqlite3.exe test.sqlite < import.sql
    os.system(cmd)

In [None]:
%%sh
du -sh sqlite_database/*.db

# ElasticSearch Database Size Tests

In [None]:
from elasticsearch import helpers, Elasticsearch
import csv

es = Elasticsearch()
input_data = [1, 1000, 10000, 100000, 1000000, 10000000, 10000000, 100000000]
for i in input_data:
    print(i)
    with open('data/'+str(i)+'.csv') as f:
        reader = csv.DictReader(f)
        helpers.bulk(es, reader, index='mac_water_'+str(i), doc_type='my-type')

# InfluxDB Database Size Tests

In [None]:
import requests
import csv
input_data =[1, 1000, 10000, 100000, 1000000, 10000000, 100000000]
for i in input_data:
    filename = "data/"+str(i)+".csv"
    url_string = 'http://localhost:8086/write?db=mac_water_{0}'.format(str(i))
    print(filename)
    with open(filename, 'r', newline='') as csvfile:
        rows = csv.reader(csvfile, delimiter=',')
        for row in rows:
            data_string = 'mac_water,v=serverError value={0}'.format(row[1])
            requests.post(url_string, data=data_string)
    csvfile.close()

data/10000000.csv
data/100000000.csv


# OpenTDBS Database size Tests

The format is the same as the Telnet put interface.

`<metric> <timestamp> <value> <tagk=tagv> [<tagkN=tagvN>]`
Where:

1. metric Is the name of the metric. Note that the metric name may not include spaces.
2. timestamp Is the absolute timestamp of the data point in seconds or milliseconds
3. value Is the value to store
4. tagk=tagv Is a pair of one or more space sparate tag name and value pairs. Note that the tags may not have spaces in them.
Example:
mac_water_1 1356998400 42 value1=24 


In [None]:
import csv
import os
#import date

directory = "data/opentsdb"
input_data = [1, 1000, 10000, 100000, 1000000, 10000000, 100000000]
end_time = 1537282851165 #ms

if not os.path.exists(directory): 
    os.makedirs(directory) 
for i in input_data:
    file_name = str(i)+'.gz'
    
    file_path = directory +'/'+file_name 
    with open(file_path, 'w', newline='') as f:
        writer = csv.writer(f, delimiter=',')
        for k in range(i):
            line = []
            line.append('mac.water.'+str(i))
            line.append(end_time-k)
            line.append(i-k)
            #line.append('value1={0}'.format(k))
            writer.writerow(line)

In [None]:
%%sh
./tsdb import file

# RDDTool

refer: https://apfelboymchen.net/gnu/rrd/create/
`rrdtool create arpcache.rrd \
        --step 1 \
        DS:num:GAUGE:600:U:U \
        RRA:AVERAGE:0.5:1:2016
`
`rrdtool create mac_water_10000000.rrd --step 1 DS:num:GAUGE:600:U:U  RRA:AVERAGE:0.5:1:10000000`

`rrdtool create mac_water_1000000.rrd --step 1 DS:num:GAUGE:600:U:U  RRA:AVERAGE:0.5:1:1000000`

`rrdtool create mac_water_100000.rrd --step 1 DS:num:GAUGE:600:U:U  RRA:AVERAGE:0.5:1:100000`

`rrdtool create mac_water_10000.rrd --step 1 DS:num:GAUGE:600:U:U  RRA:AVERAGE:0.5:1:10000`

`rrdtool create mac_water_1000.rrd --step 1 DS:num:GAUGE:600:U:U  RRA:AVERAGE:0.5:1:1000`

`rrdtool create mac_water_1.rrd --step 1 DS:num:GAUGE:600:U:U  RRA:AVERAGE:0.5:1:1`


result:
[root@jhub3 rrdtool]# du -sh *
763M    mac_water_100000000.rrd
77M     mac_water_10000000.rrd
7.7M    mac_water_1000000.rrd
784K    mac_water_100000.rrd
80K     mac_water_10000.rrd
12K     mac_water_1000.rrd
4.0K    mac_water_1.rrd

 # Graphite Whisper database
 
 https://www.infoq.com/articles/graphite-intro

`whisper-create.py ./mac_water_100000000  1:100000000`

`whisper-create.py ./mac_water_10000000  1:10000000`

`whisper-create.py ./mac_water_1000000  1:1000000`

`whisper-create.py ./mac_water_100000  1:100000`

`whisper-create.py ./mac_water_10000  1:10000`

`whisper-create.py ./mac_water_1000  1:1000`

`whisper-create.py ./mac_water_1  1:1`

 [root@jhub3 database]# du -sh *
4.0K    mac_water_1
12K     mac_water_1000
120K    mac_water_10000
1.2M    mac_water_100000
12M     mac_water_1000000
115M    mac_water_10000000
1.2G    mac_water_100000000
 