In [1]:
import time
from datetime import datetime, timedelta
import pandas as pd

from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan


def ConnectES():
    user, passwd, mapboxtoken = None, None, None
    with open("creds.key") as f:
        user = f.readline().strip()
        passwd = f.readline().strip()

    try:
        es = Elasticsearch([{'host': 'atlas-kibana.mwt2.org', 'port': 9200, 'scheme': 'https'}],
                                request_timeout=240, http_auth=(user, passwd), max_retries=10)
        print('Success' if es.ping()==True else 'Fail')
        return es
    except Exception as error:
        print (">>>>>> Elasticsearch Client Error:", error)


# Expected values: time in miliseconds or string (%Y-%m-%d %H:%M')
def FindPeriodDiff(dateFrom, dateTo):
    if (isinstance(dateFrom, int) and isinstance(dateTo, int)):
        d1 = datetime.fromtimestamp(dateTo/1000)
        d2 = datetime.fromtimestamp(dateFrom/1000)
        time_delta = (d1 - d2)
    else:
        fmt = '%Y-%m-%d %H:%M'
        d1 = datetime.strptime(dateFrom, fmt)
        d2 = datetime.strptime(dateTo, fmt)
        time_delta = d2-d1
    return time_delta


def GetTimeRanges(dateFrom, dateTo, intv=1):
    diff = FindPeriodDiff(dateFrom, dateTo) / intv
    t_format = "%Y-%m-%d %H:%M"
    tl = []
    for i in range(intv+1):
        if (isinstance(dateFrom, int)):
            t = (datetime.fromtimestamp(dateFrom/1000) + diff * i)
            tl.append(int(time.mktime(t.timetuple())*1000))
        else:
            t = (datetime.strptime(dateFrom, t_format) +
                 diff * i).strftime(t_format)
            tl.append(
                int(time.mktime(datetime.strptime(t, t_format).timetuple())*1000))

    return tl


def queryIndex(es, datefrom, dateto, idx):
    query = {
        "query": {
            "bool": {
                    "filter": [
                    {
                        "range": {
                            "timestamp": {
                            "gte": datefrom,
                            "lt": dateto
                            }
                        }
                    }
                ]
            }
        }
      }
    try:
        # print(idx, str(query).replace("\'", "\""))
        data = scan(client=es,index=idx,query=query)
        ret_data = {}
        count = 0
        last_entry = 0

        for item in data:
            if not count%50000 and count>0:
                logging.info(idx,count)
            ret_data[count] = item
            ret_data[count] = ret_data[count]['_source']
            count+=1


        return ret_data
    except Exception as e:
        print(e)



In [2]:
es = ConnectES()

# INDICES = ['ps_packetloss', 'ps_owd', 'ps_throughput', 'ps_trace']
dateFrom, dateTo, idx = ['2023-01-30 18:15', '2023-01-30 20:15', 'ps_throughput']
start, end = GetTimeRanges(dateFrom, dateTo, 1)
data = queryIndex(es, start, end, idx)
df = pd.DataFrame(data).T
df.head()

  es = Elasticsearch([{'host': 'atlas-kibana.mwt2.org', 'port': 9200, 'scheme': 'https'}],


Success


Unnamed: 0,push,MA,src,dest,src_host,dest_host,ipv6,src_site,src_VO,src_production,dest_production,timestamp,throughput,dest_site,dest_VO
0,False,2001:718:401:6025:1::191,2001:718:401:6025:1::191,2a00:139c:5:693:0:43:1:8e,ps02-b.farm.particle.cz,perfsonar-test-v1683-portal.gridka.de,True,praguelcg2,ATLAS,True,True,1675105879000,3162099445.0,,
1,False,2400:4500:0:2::164c,2400:4500:0:2::164c,2001:67c:1bec:236::188,lhc-bandwidth.twgrid.org,btw-bw.t1.grid.kiae.ru,True,Taiwan-LCG2,ATLAS,True,True,1675107697000,484027154.0,RRC-KI-T1,ATLAS
2,False,193.60.193.3,193.60.193.3,143.167.3.116,perfsonar.dur.scotgrid.ac.uk,lcgperf.shef.ac.uk,False,UKI-SCOTGRID-DURHAM,ATLAS,True,True,1675103389000,3752229011.0,UKI-NORTHGRID-SHEF-HEP,ATLAS
3,False,147.213.204.117,147.213.204.117,193.48.83.97,perfsonar03-iep-grid.saske.sk,lpsc-perfsonar.in2p3.fr,False,,,False,True,1675105686000,898839778.0,IN2P3-LPSC,ATLAS
4,False,2001:48a8:68f7:8001:192:41:236:32,2001:48a8:68f7:8001:192:41:236:32,2001:638:700:f0c8::1:1d,psmsu02.aglt2.org,perfson2.zeuthen.desy.de,True,AGLT2,ATLAS,True,True,1675104439000,828794105.0,DESY-ZN,ATLAS
