In [None]:
from elasticsearch import Elasticsearch
import pandas as pd
import traceback  # Pour afficher les erreurs détaillées

# Connexion à Elasticsearch (modifie l'URL si nécessaire)
es = Elasticsearch("http://localhost:9200")

INDEX_NAME = "application-logs"
BATCH_SIZE = 1000  # Nombre d'éléments par batch


def permit_deny_by_ip():
    """Récupère le nombre de PERMIT et DENY par IP source en paginant avec composite."""
    query = {
        "size": 0,
        "aggs": {
            "group_by_ip": {
                "composite": {
                    "size": BATCH_SIZE,
                    "sources": [
                        {"ipsrc": {"terms": {"field": "ipsrc.keyword"}}}
                    ]
                },
                "aggs": {
                    "permit": {"filter": {"term": {"action.keyword": "PERMIT"}}},
                    "deny": {"filter": {"term": {"action.keyword": "DENY"}}},

                    "permit_proto_TCP": {
                        "filter": {
                            "bool": {
                                "must": [
                                    {"term": {"action.keyword": "PERMIT"}},
                                    {"term": {"proto.keyword": "TCP"}}
                                ]
                            }
                        }
                    },
                    "permit_proto_UDP": {
                        "filter": {
                            "bool": {
                                "must": [
                                    {"term": {"action.keyword": "PERMIT"}},
                                    {"term": {"proto.keyword": "UDP"}}
                                ]
                            }
                        }
                    },

                    "nombre_port_dest": {"cardinality": {"field": "portdst.keyword"}},
                    "nombre_port_src": {"cardinality": {"field": "portsrc.keyword"}},

                    # Classification des ports
                    "port_dst_well_known": {"filter": {"range": {"portdst.keyword": {"lte": 1023}}}},
                    "port_dst_registered": {"filter": {"range": {"portdst.keyword": {"gte": 1024, "lte": 49151}}}},
                    "port_dst_dynamic_private": {"filter": {"range": {"portdst.keyword": {"gte": 49152}}}},
                },
            }
        }
    }

    after_key = None
    data = []

    try:
        while True:
            if after_key:
                query["aggs"]["group_by_ip"]["composite"]["after"] = after_key

            result = es.search(index=INDEX_NAME, body=query)
            buckets = result["aggregations"]["group_by_ip"]["buckets"]

            if not buckets:
                break

            for bucket in buckets:
                data.append({
                    "IP_Source": bucket["key"]["ipsrc"],
                    "PERMIT": bucket["permit"]["doc_count"],
                    "PERMIT_TCP": bucket["permit_proto_TCP"]["doc_count"],
                    "PERMIT_UDP": bucket["permit_proto_UDP"]["doc_count"],
                    "DENY": bucket["deny"]["doc_count"],
                    "Nb_Port_Dest": bucket["nombre_port_dest"]["value"],
                    "Nb_Port_Src": bucket["nombre_port_src"]["value"],
                    "Port_Dest_Well_Known": bucket["port_dst_well_known"]["doc_count"],
                    "Port_Dest_Registered": bucket["port_dst_registered"]["doc_count"],
                    "Port_Dest_Dynamic_Private": bucket["port_dst_dynamic_private"]["doc_count"],
                })

            after_key = result["aggregations"]["group_by_ip"].get("after_key")

        df = pd.DataFrame(data)
        print(f"✅ Extraction terminée : {len(df)} résultats récupérés.")
        return df

    except Exception as e:
        print(f"❌ Erreur lors de la requête Elasticsearch: {e}")
        traceback.print_exc()
        return pd.DataFrame()


# Exécuter la fonction et afficher un aperçu des résultats
df_permit_deny = permit_deny_by_ip()

# Sauvegarde optionnelle en CSV
# df_permit_deny.to_csv("resultats_permit_deny.csv", index=False)

df_permit_deny.head()


✅ Extraction terminée : 25268 résultats récupérés.


Unnamed: 0,IP_Source,PERMIT,PERMIT_TCP,PERMIT_UDP,DENY,Nb_Port Dest,Nb_Port_Src,Port_Dest_Well_Known,Port_Dest_Registered,Port_Dest_Dynamic_Private
0,1.0.255.245,0,0,0,1,1,1,0,1,0
1,1.1.136.123,0,0,0,1,1,1,0,1,0
2,1.10.247.195,0,0,0,1,1,1,0,1,0
3,1.11.45.8,2,2,0,0,1,2,0,0,2
4,1.117.220.131,0,0,0,2,1,1,0,2,0


In [94]:
df_permit_deny.value_counts("PERMIT TCP").head(10)

PERMIT TCP
0    25268
Name: count, dtype: int64

In [None]:
from elasticsearch import Elasticsearch
import pandas as pd

# Connexion à Elasticsearch (modifie selon ta config)
es = Elasticsearch("http://localhost:9200")  # Mets l'URL correcte

INDEX_NAME = "application-logs"
BATCH_SIZE = 1000 # Nombre d'éléments par batch


def permit_deny_by_ip():
    """Récupère le nombre de PERMIT et DENY par IP source en paginant avec composite."""
    query = {
        "size": 0,
        "aggs": {
            "group_by_ip": {
                "composite": {
                    "size": BATCH_SIZE,
                    "sources": [
                        {"ipsrc": {"terms": {"field": "ipsrc.keyword"}}}
                    ]
                },
                "aggs": {
                    "permit": {"filter": {"term": {"action.keyword": "PERMIT"}}},
                    "deny": {"filter": {"term": {"action.keyword": "DENY"}}},

                    "permit_proto_TCP": {"filter": {"bool": {"must": [{"term": {"action.keyword": "PERMIT"}}, {"term": {"protocol.keyword": "TCP"}}} }}},
                    "permit_proto_UDP": {"filter": {"bool": {"must": [{"term": {"action.keyword": "PERMIT"}}, {"term": {"protocol.keyword": "UDP"}}} }}},

                    # "nombre_ip_dest": {"cardinality": {"field": "ipdst.keyword"}},
                    # "nombre_ip_src": {"cardinality": {"field": "ipsrc.keyword"}},
                    "nombre_port_dest": {"cardinality": {"field": "portdst.keyword"}},
                    "nombre_port_src": {"cardinality": {"field": "portsrc.keyword"}},
                    # The Well-Known Ports, 0 through 1023
                    "port_dst_well_known": {"filter": {"range": {"portdst": {"lte": 1023}}}},
                    #The Registered Ports, 1024 through 49151
                    "port_dst_registered": {"filter": {"range": {"portdst": {"gte": 1024, "lte": 49151}}}},
                    #The Dynamic and/or Private Ports, 49152 through 65535
                    "port_dst_dynamic_private": {"filter": {"range": {"portdst": {"gte": 49152}}}},
                    
                },
            }
        }
    }

    after_key = None
    data = []

    try:
        while True:
            if after_key:
                query["aggs"]["group_by_ip"]["composite"]["after"] = after_key

            result = es.search(index=INDEX_NAME, body=query)
            buckets = result["aggregations"]["group_by_ip"]["buckets"]

            if not buckets:
                break

            for bucket in buckets:
                data.append({
                    "IP Source": bucket["key"]["ipsrc"],
                    "PERMIT": bucket["permit"]["doc_count"],
                    "PERMIT TCP": bucket["permit_proto_TCP"]["doc_count"],
                    "PERMIT UDP": bucket["permit_proto_UDP"]["doc_count"],
                    "DENY": bucket["deny"]["doc_count"],
                    "Nb Port Dest": bucket["nombre_port_dest"]["value"],
                    "Nb Port Src": bucket["nombre_port_src"]["value"],
                    "Port Dest Well-Known": bucket["port_dst_well_known"]["doc_count"],
                    "Port Dest Registered": bucket["port_dst_registered"]["doc_count"],
                    "Port Dest Dynamic/Private": bucket["port_dst_dynamic_private"]["doc_count"],

                })

            after_key = result["aggregations"]["group_by_ip"].get("after_key")

        return pd.DataFrame(data)

    except Exception as e:
        print(f"Erreur lors de la requête Elasticsearch: {e}")
        return pd.DataFrame()



# Exécuter les fonctions et fusionner les résultats
df_permit_deny = permit_deny_by_ip()

df_permit_deny.head()

SyntaxError: closing parenthesis '}' does not match opening parenthesis '[' (3211790352.py, line 27)

In [63]:
import streamlit as st
from elasticsearch import Elasticsearch
import pandas as pd

es = Elasticsearch("http://localhost:9200")
logs = []
tab =  pd.DataFrame()


In [64]:
# Fonction pour regrouper les logs par IP source
def group_by_ip_source():
    query = {
        "size": 0,  # Ne pas retourner de documents individuels
        "aggs": {
            "group_by_ip": {
                "terms": {
                    "field": "ipsrc.keyword",  # Grouper par IP source
                    "size": 100000  # Limiter à 10 résultats (ajustable)
                }
            }
        }
    }
    result = es.search(index="application-logs", body=query)  # Remplacez "application-logs" par votre index
    return result["aggregations"]["group_by_ip"]["buckets"]

# Affichage dans Streamlit
st.title("Regroupement des logs par IP source")

# Appel de la fonction
gb = group_by_ip_source()
ips = [item['key'] for item in gb ]
tab['ip_source'] = ips
tab['count'] = [item['doc_count'] for item in gb]
tab.head()





Unnamed: 0,ip_source,count
0,3.224.220.101,99567
1,23.22.35.162,97989
2,52.70.240.171,74575
3,51.79.181.158,68722
4,103.89.91.86,61109


In [65]:
# Nombre  de permit  et  deny  par  IP source
def permit_deny_by_ip():
    query = {
        "size": 0,
        "aggs": {
            "group_by_ip": {
                "terms": {
                    "field": "ipsrc.keyword",
                    "size": 100000
                },
                "aggs": {
                    "permit": {
                        "filter": {
                            "term": {
                                "action.keyword": "PERMIT"
                            }
                        }
                    },

                    "deny": {
                        "filter": {
                            "term": {
                                "action.keyword": "DENY"
                            }
                        }
                    }                    
                }
            }
        }
    }
    result = es.search(index="application-logs", body=query)
    return result["aggregations"]["group_by_ip"]["buckets"]

def ip_by_ip():
    query = {
        "size": 0,
        "aggs": {
            "group_by_ip": {
                "terms": {
                    "field": "ipsrc.keyword",
                    "size": 100000
                },
                "aggs": {

                    "nombre_ip_dest": {
                        "cardinality": {
                            "field": "ipdst.keyword"
                        }
                    },
                    "nombre_ip_src": {
                        "cardinality": {
                            "field": "ipsrc.keyword"
                        }
                    }
                }
            }
        }
    }
    result = es.search(index="application-logs", body=query)
    return result["aggregations"]["group_by_ip"]["buckets"]


def port_by_ip():
    query = {
        "size": 0,
        "aggs": {
            "group_by_ip": {
                "terms": {
                    "field": "ipsrc.keyword",
                    "size": 100000
                },
                "aggs": {
                    "nombre_port_dest": {
                        "cardinality": {
                            "field": "portdst.keyword"
                        }
                    },
                    "nombre_port_src": {
                        "cardinality": {
                            "field": "portsrc.keyword"
                    }
                    }                    
                }
            }
        }
    }
    result = es.search(index="application-logs", body=query)
    return result["aggregations"]["group_by_ip"]["buckets"]



# Appel de la fonction
pd = permit_deny_by_ip()
# ip = ip_by_ip()
port = port_by_ip()


tab.head()

ApiError: ApiError(429, 'search_phase_execution_exception', '[request] Data too large, data for [<reused_arrays>] would be [417629520/398.2mb], which is larger than the limit of [322122547/307.1mb]')

In [27]:
tab['ip'].nunique()

KeyError: 'ip'

In [None]:
for  ip in ips:
    

In [None]:

# Extraire uniquement les IP


# Afficher les IP
print(ips)

['3.224.220.101', '23.22.35.162', '52.70.240.171', '51.79.181.158', '103.89.91.86', '103.151.123.241', '103.139.44.2', '157.90.182.29', '79.124.60.150', '141.98.81.151', '89.248.163.109', '77.90.185.64', '76.167.102.53', '66.249.68.7', '51.79.191.240', '66.249.68.8', '176.113.115.104', '2.57.149.141', '66.249.68.1', '141.98.11.73', '138.2.229.118', '45.129.14.236', '35.166.42.212', '77.90.185.152', '47.76.57.30', '103.130.12.117', '146.75.146.0', '64.18.161.90', '66.249.77.98', '192.42.116.220', '192.42.116.175', '194.26.135.132', '76.93.190.0', '8.218.254.250', '124.156.207.67', '149.34.245.213', '194.26.135.123', '92.204.174.122', '39.173.95.32', '66.249.77.99', '146.75.146.1', '77.90.185.71', '42.240.131.249', '79.124.58.130', '79.124.58.150', '141.142.176.111', '104.28.111.146', '173.207.154.120', '89.248.165.235', '52.90.114.207', '141.142.176.110', '141.142.176.112', '77.90.185.155', '108.59.13.8', '185.220.101.45', '104.28.85.115', '73.222.216.69', '141.142.176.113', '72.132.36.