# Time Series Project

### Importing Libraries

In [1]:
import pandas as pd
import datetime as dt
import re

### Reading Data

In [2]:
# read data from a text file
with open("C:/Users/Samuel/Downloads/2023-02_ASManager-1.txt", 'r') as file:
    data = file.read()

# define a regular expression pattern to extract relevant information
pattern = '\[Local: ([^\]]+)\] \[UTC: ([^\]]+)\] (\[ControllerId: ([0-9]+)\] )?(.*)'

# create an empty DataFrame
df = pd.DataFrame(columns=['timestamp', 'message'])

# loop through each line in the data and extract relevant information
for line in data.split('\n'):
    match = re.match(pattern, line)
    if match:
        timestamp = pd.Timestamp(match.group(1))
        message = match.group(5)
        df = df.append({'timestamp': timestamp, 'message': message}, ignore_index=True)

# set the timestamp column as the index of the DataFrame
df = df.set_index('timestamp')

# sort the index in ascending order
df = df.sort_index()

df.head(50)

Unnamed: 0_level_0,message
timestamp,Unnamed: 1_level_1
2023-02-02 09:55:14,ASManager Start
2023-02-02 09:55:15,ASManagerService Start
2023-02-02 09:55:48,Database Connected
2023-02-02 09:56:02,GeoWebServer Start
2023-02-02 09:56:07,Controller Connected
2023-02-02 09:56:25,Controller Connect Failed
2023-02-02 09:57:02,[HostId: 0001] ASManagerService Connect Failed
2023-02-02 09:57:05,[HostId: 0001] ASManagerService Connected
2023-02-02 10:31:43,Controller Disconnected
2023-02-02 10:32:08,Controller Connect Failed


### EDA

In [3]:
df.shape

(719, 1)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 719 entries, 2023-02-02 09:55:14 to 2023-02-28 06:39:21
Data columns (total 1 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   message  719 non-null    object
dtypes: object(1)
memory usage: 11.2+ KB


In [5]:
df['message'].value_counts()
#displays unique rows

Controller Connected                              123
ASManager Start                                    89
[HostId: 0001] ASManagerService Connected          84
ASManagerService Start                             69
Database Connected                                 67
GeoWebServer Start                                 66
Controller Connect Failed                          57
[HostId: 0001] ASManagerService Disconnected       42
[HostId: 0001] ASManagerService Connect Failed     37
Controller Disconnected                            25
ASManager Exit                                     17
GeoWebServer Exit                                  17
ASManager Exit (Forced)                            15
ASManagerService Exit                               5
Database Disconnected                               4
Controller Connect Failed (Login Failed)            2
Name: message, dtype: int64

### When ASManager Start

In [6]:
#Creating a new df from set out criterias
df_as_manager_start = df[df['message'] == 'ASManager Start']
df_as_manager_start.iloc[:5].reset_index()

Unnamed: 0,timestamp,message
0,2023-02-02 09:55:14,ASManager Start
1,2023-02-02 10:59:19,ASManager Start
2,2023-02-02 14:07:30,ASManager Start
3,2023-02-03 09:55:51,ASManager Start
4,2023-02-03 12:12:25,ASManager Start


### When ASManagerService Start

In [7]:
df_asmanager_service_start = df[df['message'] == 'ASManagerService Start']
df_asmanager_service_start.iloc[:5].reset_index()

Unnamed: 0,timestamp,message
0,2023-02-02 09:55:15,ASManagerService Start
1,2023-02-02 10:59:37,ASManagerService Start
2,2023-02-02 14:07:41,ASManagerService Start
3,2023-02-03 09:55:51,ASManagerService Start
4,2023-02-03 12:12:29,ASManagerService Start


### When Database Connected

In [8]:
df_database_connected = df[df['message'] == 'Database Connected']
df_database_connected.iloc[:5].reset_index()

Unnamed: 0,timestamp,message
0,2023-02-02 09:55:48,Database Connected
1,2023-02-02 11:02:39,Database Connected
2,2023-02-02 14:09:16,Database Connected
3,2023-02-03 09:56:25,Database Connected
4,2023-02-03 12:12:55,Database Connected


### When GeoWebServer Start

In [9]:
df_geo_web_server_start = df[df['message'] == 'GeoWebServer Start']
df_geo_web_server_start.iloc[:5].reset_index()

Unnamed: 0,timestamp,message
0,2023-02-02 09:56:02,GeoWebServer Start
1,2023-02-02 11:04:00,GeoWebServer Start
2,2023-02-02 14:10:11,GeoWebServer Start
3,2023-02-03 09:56:35,GeoWebServer Start
4,2023-02-03 12:13:11,GeoWebServer Start


### When Controller Connected

In [10]:
df_controller_connected = df[df['message'] == 'Controller Connected']
df_controller_connected.iloc[:5].reset_index()

Unnamed: 0,timestamp,message
0,2023-02-02 09:56:07,Controller Connected
1,2023-02-02 10:44:15,Controller Connected
2,2023-02-02 11:04:27,Controller Connected
3,2023-02-02 11:46:04,Controller Connected
4,2023-02-02 12:14:11,Controller Connected


### When Controller Disconnected

In [11]:
df_controller_disconnected = df[df['message'] == 'Controller Disconnected']
df_controller_disconnected.iloc[:5].reset_index()

Unnamed: 0,timestamp,message
0,2023-02-02 10:31:43,Controller Disconnected
1,2023-02-02 11:37:15,Controller Disconnected
2,2023-02-02 12:07:19,Controller Disconnected
3,2023-02-02 12:23:24,Controller Disconnected
4,2023-02-11 12:30:12,Controller Disconnected


### When Controller Connect Failed

In [12]:
df_controller_connect_failed = df[df['message'] == 'Controller Connect Failed']
df_controller_connect_failed.iloc[:5].reset_index()

Unnamed: 0,timestamp,message
0,2023-02-02 09:56:25,Controller Connect Failed
1,2023-02-02 10:32:08,Controller Connect Failed
2,2023-02-02 11:04:33,Controller Connect Failed
3,2023-02-02 11:37:17,Controller Connect Failed
4,2023-02-02 12:07:46,Controller Connect Failed
