In [2]:
import pandas as pd
from datetime import datetime, timedelta

# Load the CSV data into a DataFrame
csv_file = '/home/shouei/GreenSecurity-FirstExperiment/SplunkResearch/resources/logs_distribution_june.csv'

df = pd.read_csv(csv_file)

# Convert the '_time' column to datetime
df['_time'] = pd.to_datetime(df['_time'])

# Define the bucket interval (e.g., weekly)
def get_bucket_start_date(date):
    # This function returns the start of the week for a given date
    return date.floor('D')

df['bucket_start_date'] = df['_time'].apply(lambda x: get_bucket_start_date(x))

# Group by the bucket start date
groups = df.groupby('bucket_start_date')

# Save each group to a separate CSV file
for bucket_start_date, group in groups:
    start_time = group['_time'].min().strftime('%Y-%m-%d_%H-%M-%S')
    end_time = group['_time'].max().strftime('%Y-%m-%d_%H-%M-%S')
    bucket_name = f'bucket_{start_time}_{end_time}.csv'
    group.drop(columns=['bucket_start_date'], inplace=True)  # Drop the temporary 'bucket_start_date' column
    group.to_csv(f"/home/shouei/GreenSecurity-FirstExperiment/SplunkResearch/resources/output_buckets/{bucket_name}", index=False)
    print(f'Saved {bucket_name}')


Saved bucket_2024-05-26_12-59-56_2024-05-26_23-59-59.csv
Saved bucket_2024-05-27_00-00-00_2024-05-27_23-59-57.csv
Saved bucket_2024-05-28_00-00-01_2024-05-28_23-59-52.csv
Saved bucket_2024-05-29_00-00-00_2024-05-29_23-59-44.csv
Saved bucket_2024-05-30_00-00-00_2024-05-30_23-59-42.csv
Saved bucket_2024-05-31_00-00-01_2024-05-31_23-59-44.csv
Saved bucket_2024-06-01_00-00-00_2024-06-01_23-59-48.csv
Saved bucket_2024-06-02_00-00-01_2024-06-02_23-59-55.csv
Saved bucket_2024-06-03_00-00-00_2024-06-03_23-59-59.csv
Saved bucket_2024-06-04_00-00-00_2024-06-04_23-59-51.csv
Saved bucket_2024-06-05_00-00-00_2024-06-05_23-59-59.csv
Saved bucket_2024-06-06_00-00-00_2024-06-06_23-59-59.csv
Saved bucket_2024-06-07_00-00-00_2024-06-07_23-59-53.csv
Saved bucket_2024-06-08_00-00-00_2024-06-08_23-59-59.csv
Saved bucket_2024-06-09_00-00-00_2024-06-09_23-59-47.csv
Saved bucket_2024-06-10_00-00-01_2024-06-10_23-59-59.csv
Saved bucket_2024-06-11_00-00-03_2024-06-11_23-59-55.csv
Saved bucket_2024-06-12_00-00-0

In [6]:
df['_time'].dt.floor('D').unique()

<DatetimeArray>
['2024-05-19 00:00:00+03:00', '2024-05-20 00:00:00+03:00',
 '2024-05-21 00:00:00+03:00', '2024-05-22 00:00:00+03:00',
 '2024-05-23 00:00:00+03:00', '2024-05-24 00:00:00+03:00',
 '2024-05-25 00:00:00+03:00']
Length: 7, dtype: datetime64[ns, UTC+03:00]

In [16]:
import splunklib.client as client
import splunklib.results as results
import csv

# --- Splunk connection details ---
HOST = "localhost"          # or your Splunk server
PORT = 8089                 # Splunk management port
USERNAME = "shouei"
PASSWORD = "123456789"
INDEX = "main"


# --- Time range (edit these) ---
EARLIEST_TIME = "2025-01-01T00:00:00"  # ISO format
LATEST_TIME = "2025-03-01T23:59:59"

# --- Splunk search query ---
QUERY = r"""
search index=main host IN (LB228-1, LB-003-31, LB-105-14, DT-117-01, LB-003-13, LB-003-27, LB-101-28, LB-101-23, LB-003-3, LB-105-16)
| bin _time span=1m
| stats count by source EventCode _time
"""

# --- Connect to Splunk ---
service = client.connect(
    host=HOST,
    port=PORT,
    username=USERNAME,
    password=PASSWORD
)

# --- Run the search with a time range ---
job = service.jobs.create(
    QUERY,
    earliest_time=EARLIEST_TIME,
    latest_time=LATEST_TIME,
    exec_mode="blocking"
)


In [21]:

# --- Get results ---
results_reader = splunk_results.JSONResultsReader(job.results(output_mode="json", count=0))

# --- Write to CSV file ---
output_file = "splunk_results.csv"
with open(output_file, "w", newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    header_written = False
    for result in results_reader:
        if isinstance(result, dict):
            if not header_written:
                writer.writerow(result.keys())
                header_written = True
            writer.writerow(result.values())

print(f"✅ Results saved to {output_file}")


✅ Results saved to splunk_results.csv


In [22]:
import splunklib.results as splunk_results
for x in  splunk_results.JSONResultsReader(job.results(output_mode="json", count=0)):
    print(x)

{'source': 'WinEventLog:Application', 'EventCode': '0', '_time': '2025-01-01T00:15:00.000+02:00', 'count': '2'}
{'source': 'WinEventLog:Application', 'EventCode': '0', '_time': '2025-01-01T06:35:00.000+02:00', 'count': '2'}
{'source': 'WinEventLog:Application', 'EventCode': '0', '_time': '2025-01-01T06:36:00.000+02:00', 'count': '2'}
{'source': 'WinEventLog:Application', 'EventCode': '0', '_time': '2025-01-01T06:51:00.000+02:00', 'count': '2'}
{'source': 'WinEventLog:Application', 'EventCode': '0', '_time': '2025-01-01T07:00:00.000+02:00', 'count': '18'}
{'source': 'WinEventLog:Application', 'EventCode': '0', '_time': '2025-01-01T07:01:00.000+02:00', 'count': '12'}
{'source': 'WinEventLog:Application', 'EventCode': '0', '_time': '2025-01-01T07:05:00.000+02:00', 'count': '8'}
{'source': 'WinEventLog:Application', 'EventCode': '0', '_time': '2025-01-01T07:10:00.000+02:00', 'count': '5'}
{'source': 'WinEventLog:Application', 'EventCode': '0', '_time': '2025-01-01T08:00:00.000+02:00', 'cou

In [11]:
import pandas as pd

In [26]:
splunk_df = pd.read_csv("/home/shouei/GreenSecurity-FirstExperiment/SplunkResearch/src/splunk_results.csv")
splunk_df.head()

Unnamed: 0,source,EventCode,_time,count
0,WinEventLog:Application,0,2025-01-01 00:15:00.000 IST,2
1,WinEventLog:Application,0,2025-01-01 06:35:00.000 IST,2
2,WinEventLog:Application,0,2025-01-01 06:36:00.000 IST,2
3,WinEventLog:Application,0,2025-01-01 06:51:00.000 IST,2
4,WinEventLog:Application,0,2025-01-01 07:00:00.000 IST,18


In [40]:
import pandas as pd

splunk_df = pd.read_csv("/home/shouei/GreenSecurity-FirstExperiment/SplunkResearch/src/splunk_results.csv")

# Step 1: strip spaces
splunk_df['_time'] = splunk_df['_time'].astype(str).str.strip()

# Step 2: replace IST with +02:00
splunk_df['_time'] = splunk_df['_time'].str.replace(' IST', '+00:00', regex=False)

# Step 3: parse using %z (timezone offset)
splunk_df['_time'] = pd.to_datetime(
    splunk_df['_time'],
    format='%Y-%m-%d %H:%M:%S.%f%z',
    errors='coerce'
)

# Step 4: convert to naive datetime (keeps same wall clock time)
splunk_df['_time'] = splunk_df['_time'].dt.tz_convert(None)

print(splunk_df['_time'].head())
print(splunk_df.info())


0   2025-01-01 00:15:00
1   2025-01-01 06:35:00
2   2025-01-01 06:36:00
3   2025-01-01 06:51:00
4   2025-01-01 07:00:00
Name: _time, dtype: datetime64[ns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 249397 entries, 0 to 249396
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   source     249397 non-null  object        
 1   EventCode  249397 non-null  int64         
 2   _time      249397 non-null  datetime64[ns]
 3   count      249397 non-null  int64         
dtypes: datetime64[ns](1), int64(2), object(1)
memory usage: 7.6+ MB
None


In [25]:
splunk_df

Unnamed: 0,source,EventCode,_time,count
0,WinEventLog:Application,0,NaT,2
1,WinEventLog:Application,0,NaT,2
2,WinEventLog:Application,0,NaT,2
3,WinEventLog:Application,0,NaT,2
4,WinEventLog:Application,0,NaT,18
...,...,...,...,...
249392,WinEventLog:System,98,NaT,2
249393,WinEventLog:System,98,NaT,11
249394,WinEventLog:System,98,NaT,2
249395,WinEventLog:System,98,NaT,2


In [41]:
splunk_df.to_csv("/home/shouei/GreenSecurity-FirstExperiment/SplunkResearch/src/splunk_results.csv", index=False)

In [None]:
curl -k -u shouei:123456789 \
  https://localhost:8089/services/search/jobs/export \
  -d search='search index=main host IN (LB228-1, LB-003-31, LB-105-14, DT-117-01, LB-003-13, LB-003-27, LB-101-28, LB-101-23, LB-003-3, LB-105-16)
  | bin _time span=1m
  | stats count by source EventCode _time' \
  -d earliest_time="2025-01-01T00:00:00.000+02:00" \
  -d latest_time="2025-02-30T23:59:59.000+02:00" \
  -d output_mode=csv \
  -o splunk_results.csv
