# Simple read and write

In [91]:
from cassandra.cluster import Cluster
import cassandra.query as q
import time
from datetime import datetime
time_ = datetime.strptime('2017-08-20 16:31:32.123', '%Y-%m-%d %H:%M:%S.%f')

cluster = Cluster(['192.168.1.149', '192.168.1.115'])
session = cluster.connect('test')

#### Simple read query

In [35]:
start = time.time()
rows = session.execute('select * from data.training_gpeh limit 1000')
end = time.time()
print(end-start)

0.082053899765


#### Write 100k random rows to table

In [89]:
def single_write(numOfWrites):
    for i in xrange(0,numOfWrites):
        imsi = str(i)
        session.execute(
        """
        INSERT INTO test.training_gpeh (imsi, ts, ev, plifls, plt1, t1id, t1rnc, t2rnc, t3id, t3rnc, t4id, t4rnc) 
        VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
        """,
        (imsi, time_, 1,1,1,1,1,1,1,1,1,1)
    )

numOfWrites = 100000
start = time.time()
single_write(numOfWrites)
end = time.time()
avg_time = (end-start)/i

print(i)
print(end-start)
print(avg_time)

99
113.681812048
1.1483011318


#### In this case the prepared statement gives no time gains

In [93]:
query = "INSERT INTO test.training_gpeh (imsi, ts, ev, plifls, plt1, t1id, t1rnc, t2rnc, t3id, t3rnc, t4id, t4rnc) VALUES (?,?,?,?,?,?,?,?,?,?,?,?)"
prepare = session.prepare(query)
start = time.time()
for i in xrange(0, numOfWrites):
    imsi = str(i)
    bound = prepare.bind((imsi, time_, 1,1,1,1,1,1,1,1,1,1))
    session.execute(bound)
end = time.time()
avg_time = (end-start)/i
print(i)
print(end-start)
print(avg_time)

99999
111.352442026
0.00111353555562


# Batch statement

In [84]:
def batch_add(batchSize,j):
    batch = q.BatchStatement() 
    for i in xrange(0,batchSize):
        imsi = str(1000*j + i)
        batch.add(data, (imsi, time_, 1,1,1,1,1,1,1,1,1,1))
    session.execute(batch)
    
query = "INSERT INTO test.training_gpeh (imsi, ts, ev, plifls, plt1, t1id, t1rnc, t2rnc, t3id, t3rnc, t4id, t4rnc) VALUES(?,?,?,?,?,?,?,?,?,?,?,?)"
data = session.prepare(query)
batch = q.BatchStatement() 
batchSize = 1000

start = time.time()
for i in xrange(0,100):
    batch_add(batchSize, i)
end = time.time()
avg_time = (end-start)/i

print(i)
print(end-start)
print(avg_time)

99
11.9441959858
0.120648444301
