In [2]:
import pandas as pd

# Define the URL of the log file
with open('.server') as f:
    url = f.readline().strip()
url = f"{url}/logfile.log"

# Read the log file directly from the URL into a pandas DataFrame
# We'll specify a custom separator that's unlikely to appear in the log file
# This way, each line of the log file will be read as a single string
raw_log_df = pd.read_csv(url, sep='\x1A', header=None, engine='python', names=['raw'])

# Filter out the undesired lines (i.e., those that do not start with a timestamp)
valid_log_lines = raw_log_df['raw'].str.match(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} -')
filtered_log_df = raw_log_df[valid_log_lines]

# Now split the remaining log entries into their constituent parts
log_df = filtered_log_df['raw'].str.extract(r'(?P<asctime>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}) - (?P<name>.*?) - (?P<levelname>.*?) - (?P<message>.*?)$')

# Convert the 'asctime' column to datetime
log_df['asctime'] = pd.to_datetime(log_df['asctime'], format='%Y-%m-%d %H:%M:%S,%f')

# Display the resulting DataFrame
log_df

Unnamed: 0,asctime,name,levelname,message
5,2023-10-05 20:25:43.329,__main__,INFO,Data Fetched Successfully.
6,2023-10-05 20:25:44.504,__main__,INFO,Data for Mehrabad Scraped Successfully!
7,2023-10-05 20:25:44.951,__main__,INFO,Data Fetched Successfully.
8,2023-10-05 20:25:45.381,__main__,WARNING,Encountered a None object for field 'flight_re...
9,2023-10-05 20:25:45.381,__main__,WARNING,Encountered a None object for field 'flight_re...
...,...,...,...,...
2981,2023-10-07 22:00:21.690,__main__,INFO,Data Fetched Successfully.
2982,2023-10-07 22:00:21.728,__main__,INFO,Data for Zanjan Scraped Successfully!
2983,2023-10-07 22:00:22.137,__main__,INFO,Data Fetched Successfully.
2984,2023-10-07 22:00:22.187,__main__,INFO,Data for Arak Scraped Successfully!
