In [None]:
import requests
res = requests.get('http://localhost:9200')
print(res.content)

In [None]:
from elasticsearch import Elasticsearch
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

In [None]:
#index some test data
es.index(index='test-index', doc_type='test', id=1, body={'test': 'test'})

In [None]:
#delete test data and try with something more interesting
es.delete(index='test-index', doc_type='test', id=1)

In [None]:

es.index(index='sw', doc_type='people', id=1, body={
	"name": "Luke Skywalker",
	"height": "172",
	"mass": "77",
	"hair_color": "blond",
	"birth_year": "19BBY",
	"gender": "male",
})

In [None]:
#let's iterate over swapi people documents and index them
import json
r = requests.get('http://localhost:9200') 
i = 1
while r.status_code == 200:
    r = requests.get('http://swapi.co/api/people/'+ str(i))
    es.index(index='sw', doc_type='people', id=i, body=json.loads(r.content.decode('utf-8')))
    i=i+1
    
print(i)

In [None]:
#let's add more data but using node 2! Starting where the other stopped
import json

r = requests.get('http://localhost:9200') 
i = 18
while r.status_code == 200:
    r = requests.get('http://swapi.co/api/people/'+ str(i))
    es.index(index='sw', doc_type='people', id=i, body=json.loads(r.content))
    i=i+1
print(i)

In [None]:
es.get(index='sw', doc_type='people', id=65)

In [None]:
#let's find darth vader
es.search(index="sw", body={"query": {"prefix" : { "name" : "Darth Vader" }}})

In [None]:
#let's see if we find anyone with a name or surname starting with lu
es.search(index="sw", body={"query": {"prefix" : { "name" : "lu" }}})

In [None]:
#let's try a fuzzy query
q = es.search(index="sw", body={"query": {"fuzzy_like_this_field" : { "name" : {"like_text": "jaba", "max_query_terms":5}}}})

In [None]:
import math
import time
import urllib.request
import matplotlib.pyplot as plt

from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor


addrs = ['http://www.poatek.com',
        'https://www.poatek.com/team/',
        'https://www.poatek.com/blog/',
        'https://www.poatek.com/our-method/',
        'https://www.poatek.com/services/',
        'https://en.wikipedia.org/wiki/Main_Page',
        'https://www.google.com/',
        'https://www.kaggle.com/competitions',
        'https://www.amazon.com/charts/mostread/fiction/',
        'https://www.amazon.com/charts/mostread/nonfiction',
        'https://www.amazon.com/charts/mostsold/nonfiction',
        'https://www.amazon.com/charts/mostsold/fiction',
        'https://www.nytimes.com',
        'https://www.bbc.com/',
        'https://www.lemonde.fr',
        'https://edition.cnn.com',
        ]

def multithreading(func, args, workers):
    with ThreadPoolExecutor(workers) as ex:
        res = ex.map(func, args)
    return list(res)


def multiprocessing(func, args, workers):
    with ProcessPoolExecutor(workers) as ex:
        res = ex.map(func, args)
    return list(res)

def io_intensive(x):
    write_count = 50
    with urllib.request.urlopen(addrs[x], timeout=20) as conn:
        page = conn.read()
        for _ in range(write_count):
            with open('output.txt', 'w') as output:
                output.write(str(page))

def test_io_intensive_threads(thread_count_lst):
    times = []
    num_tasks = len(addrs)
    time_init = time.time()
    for i in range(num_tasks): 
        io_intensive(i)
    time_end = time.time()
    times.append(float(time_end - time_init))
    print(f'Serial execution took {time_end - time_init}s.')
    for n_threads in thread_count_lst:
        time_init = time.time()
        multithreading(io_intensive, range(num_tasks), n_threads)
        time_end = time.time()
        times.append(float(time_end - time_init))
        print(f'Multithreading with {n_threads} threads took {time_end - time_init}s.')
    return times

times = test_io_intensive_threads([2, 4, 8, 16])
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.set_xlabel('Number of Threads')
ax.set_ylabel('Execution Time [s]')
num_threads = ['1', '2', '4', '8', '16']
ax.bar(num_threads, times)
plt.show()


In [None]:
def compute_intensive(x):
    foo = 0
    for i in range(10**7):
        foo += foo * math.cos(i*math.pi)
        
def test_compute_intensive():
    times = []
    num_tasks = 4
    time_init = time.time()
    for i in range(num_tasks): 
        compute_intensive(i)
    time_end = time.time()
    times.append(float(time_end - time_init))
    print(f'Serial execution took {time_end - time_init}s.')
    n_threads = num_tasks
    time_init = time.time()
    multithreading(compute_intensive, range(num_tasks), n_threads)
    time_end = time.time()
    times.append(float(time_end - time_init))
    print(f'Multithreading with {n_threads} threads took {time_end - time_init}s.')
    n_procs = num_tasks
    time_init = time.time()
    multiprocessing(compute_intensive, range(num_tasks), n_procs)
    time_end = time.time()
    times.append(float(time_end - time_init))
    print(f'Multiprocessing with {n_procs} processes took {time_end - time_init}s.')
    return times

times = test_compute_intensive()
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.set_xlabel('Execution')
ax.set_ylabel('Execution Time [s]')
num_threads = ['Serial', '4 Threads', '4 Processes']
ax.bar(num_threads, times)
plt.show()

In [None]:
from multiprocessing import Pool, freeze_support
import windows_worker_cpu

def test_compute_intensive():
    times = []
    num_tasks = 4
    time_init = time.time()
    for i in range(num_tasks): 
        windows_worker_cpu.compute_intensive(i)
    time_end = time.time()
    times.append(float(time_end - time_init))
    print(f'Serial execution took {time_end - time_init}s.')
    n_threads = num_tasks
    time_init = time.time()
    multithreading(windows_worker_cpu.compute_intensive, range(num_tasks), n_threads)
    time_end = time.time()
    times.append(float(time_end - time_init))
    print(f'Multithreading with {n_threads} threads took {time_end - time_init}s.')
    n_procs = num_tasks
    time_init = time.time()
    multiprocessing(windows_worker_cpu.compute_intensive, range(num_tasks), n_procs)
    time_end = time.time()
    times.append(float(time_end - time_init))
    print(f'Multiprocessing with {n_procs} processes took {time_end - time_init}s.')
    return times
    

if __name__ == '__main__':
    times = test_compute_intensive()
    fig = plt.figure()
    ax = fig.add_axes([0,0,1,1])
    ax.set_xlabel('Execution')
    ax.set_ylabel('Execution Time [s]')
    num_threads = ['Serial', '4 Threads', '4 Processes']
    ax.bar(num_threads, times)
    plt.show()

In [None]:
def test_io_intensive_processes(proc_count_lst):
    times = []
    num_tasks = len(addrs)
    time_init = time.time()
    for i in range(num_tasks): 
        io_intensive(i)
    time_end = time.time()
    times.append(float(time_end - time_init))
    print(f'Serial execution took {time_end - time_init}s.')
    for n_procs in proc_count_lst:
        time_init = time.time()
        multiprocessing(io_intensive, range(num_tasks), n_procs)
        time_end = time.time()
        times.append(float(time_end - time_init))
        print(f'Multiprocessing with {n_procs} processes took {time_end - time_init}s.')
    return times

times = test_io_intensive_processes([2, 4, 8])
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.set_xlabel('Number of Processes')
ax.set_ylabel('Execution Time [s]')
num_threads = ['1', '2', '4', '8']
ax.bar(num_threads, times)
plt.show()