<a href="https://colab.research.google.com/github/rajkumarsingh19/Log-parsing-Python-scripts/blob/main/Logs_scripts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [34]:
def create_user(first_name: str, last_name: str, age: int) -> dict:
  email = f"{first_name.lower()}{last_name.lower()}@gmail.com"
  if not isinstance(first_name, str):
    raise TypeError("first_name must be a string")

  if not isinstance(last_name, str):
    raise TypeError("last_name must be a string")

  if not isinstance(age, int):
    raise TypeError("age must be an integer")
  return {
      "first_name": first_name,
      "last_name": last_name,
      "email": email,
      "age": age,
  }


user1: dict = create_user("rajkumar", "singh", 38)
print(user1)


{'first_name': 'rajkumar', 'last_name': 'singh', 'email': 'rajkumarsingh@gmail.com', 'age': 38}


In [38]:
from pydantic import validate_call

@validate_call
def create_user(first_name: str, last_name: str, age: int, tag: str = "") -> dict:
    email = f"{first_name.lower()}.{last_name.lower()}{tag}@gmail.com"
    return {
        "first_name": first_name,
        "last_name": last_name,
        "email": email,
        "age": age,
        "tag": tag
    }

user = create_user("Rajkumar", "Singh", 38, "1985")
print(user)

{'first_name': 'Rajkumar', 'last_name': 'Singh', 'email': 'rajkumar.singh1985@gmail.com', 'age': 38, 'tag': '1985'}


In [None]:
import re

text = "Event Time: 2025-12-07 14:20:55 Cell: 12345, Event Time: 2025-12-08 14:20:55 Cell: 12345"
pattern = r"\d{4}-\d{2}-\d{2}"

date = re.search(pattern, text)
print(date.group())

2025-12-07
2025-12-07


In [None]:
import re

text = "Event Time: 2025-12-07 14:20:55 Cell: 12345, Event Time: 2025-12-08 14:20:55 Cell: 12345"
pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}"

for match in re.finditer(pattern, text):
    print(match.group())

2025-12-07 14:20:55
2025-12-08 14:20:55


In [None]:
import re

# Strict IPv4 regex
ip_pattern = r"\b(?:(?:25[0-5]|2[0-4]\d|1?\d?\d)\.){3}(?:25[0-5]|2[0-4]\d|1?\d?\d)\b"

# Datetime regex
datetime_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}"

# Combine both regex in a single pattern using OR (|)
pattern = re.compile(fr"{ip_pattern}|{datetime_pattern}")

text = (
    "Successful ping from 192.168.1.25 but not from 999.999.999.999 "
    "Event Time: 2025-12-07 14:20:55 Cell: 12345, "
    "Event Time: 2025-12-08 14:20:55 Cell: 12345"
)

matches = pattern.findall(text)
print(matches)


['192.168.1.25', '2025-12-07 14:20:55', '2025-12-08 14:20:55']


In [None]:
import re

ip_pattern = r"\b(?:(?:25[0-5]|2[0-4]\d|1?\d?\d)\.){3}(?:25[0-5]|2[0-4]\d|1?\d?\d)\b"
dt_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}"

pair_pattern = re.compile(fr"({ip_pattern}).*?({dt_pattern})")

text = (
    "Successful ping from 192.168.1.25 Event Time: 2025-12-07 14:20:55 Cell: 12345"
)

result = pair_pattern.search(text)

if result:
    print("IP:", result.group(1))
    print("Datetime:", result.group(2))


In [41]:
import re

system_log = "2025-12-07 12:45:01 ERROR Connection failed on 192.168.1.10 ,2025-12-07 12:45:05 INFO Retrying connection"

pattern = re.compile(
    r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\s+(INFO|WARN|ERROR)\s+(.*?)(?=,\d{4}-\d{2}-\d{2}|\Z)"
)

matches = pattern.findall(system_log)

for ts, level, msg in matches:
    print(f"Timestamp: {ts}, Level: {level}, Message: {msg.strip()}")



Timestamp: 2025-12-07 12:45:01, Level: ERROR, Message: Connection failed on 192.168.1.10
Timestamp: 2025-12-07 12:45:05, Level: INFO, Message: Retrying connection


In [9]:
"""
VoLTE SIP Log Parser
Extracts SIP Method (INVITE/BYE/CANCEL), caller number, and failure cause.
This script contains sample SIP logs inside the code so you can run it directly.
"""

import re

# ✅ Sample SIP logs included inside the code (no file needed)
sip_logs = """
INVITE sip:12345@ims.com SIP/2.0 | From: +919876543210 | Cause: 487 Request Terminated
BYE sip:98765@ims.com SIP/2.0 | From: +918888888888 | Cause: 480 Temporarily Unavailable
CANCEL sip:55555@ims.com SIP/2.0 | From: +917777777777 | Cause: 408 Request Timeout
INVITE sip:22222@ims.com SIP/2.0 | From: +916666666666 | Cause: 486 Busy Here
"""

# ✅ Regex pattern to extract:
# Method → INVITE / BYE / CANCEL
# Caller → +91xxxxxxxxxx
# Cause  → Failure text
pattern = re.compile(
    r"(INVITE|BYE|CANCEL).*?From:\s+(\+\d+).*?Cause:\s+(.*)"
)

print("=== Parsed SIP Failure Logs ===\n")

for line in sip_logs.split("\n"):
    m = pattern.search(line)
    if m:
        method, caller, cause = m.groups()
        print(f"Method: {method}")
        print(f"Caller: {caller}")
        print(f"Cause : {cause}")
        print("-" * 40)


=== Parsed SIP Failure Logs ===

Method: INVITE
Caller: +919876543210
Cause : 487 Request Terminated
----------------------------------------
Method: BYE
Caller: +918888888888
Cause : 480 Temporarily Unavailable
----------------------------------------
Method: CANCEL
Caller: +917777777777
Cause : 408 Request Timeout
----------------------------------------
Method: INVITE
Caller: +916666666666
Cause : 486 Busy Here
----------------------------------------


In [8]:
import re

text = "My name is Raj and I am 30"

pattern = re.compile(r"My name is (.*) and I am (\d+)")
match = pattern.search(text)

print(match.groups())

('Raj', '30')


In [10]:
"""
System Error Log Parser
Extracts timestamp, LEVEL (ERROR/FAIL), and full message.
This version has sample logs defined inside the script itself.
It writes the extracted errors to errors.csv
"""

import csv
import re

# -----------------------------------------
# ✅ Sample logs included directly inside the code
# -----------------------------------------
sample_logs = """
12:30:01 INFO System initialized successfully
12:31:15 ERROR Disk read failure on /dev/sda1
12:32:20 WARN Temperature threshold reached
12:33:05 FAIL Service xyz crashed unexpectedly
12:34:55 INFO Restarting service xyz
12:35:10 ERROR Network unreachable for 192.168.0.5
"""

# -----------------------------------------
# Regex:
# Group 1 → timestamp
# Group 2 → level (ERROR / FAIL)
# -----------------------------------------
pattern = re.compile(r"(\d{2}:\d{2}:\d{2}).*(ERROR|FAIL).*")

# -----------------------------------------
# Write output to CSV
# -----------------------------------------
with open("errors.csv", "w", newline="") as outfile:
    writer = csv.writer(outfile)
    writer.writerow(["timestamp", "level", "message"])   # header

    for line in sample_logs.split("\n"):
        m = pattern.search(line)
        if m:
            timestamp = m.group(1)
            level = m.group(2)
            writer.writerow([timestamp, level, line.strip()])
            print(f"Captured: {timestamp} | {level} | {line.strip()}")

print("\nerrors.csv generated successfully!")


Captured: 12:31:15 | ERROR | 12:31:15 ERROR Disk read failure on /dev/sda1
Captured: 12:33:05 | FAIL | 12:33:05 FAIL Service xyz crashed unexpectedly
Captured: 12:35:10 | ERROR | 12:35:10 ERROR Network unreachable for 192.168.0.5

errors.csv generated successfully!


# A simple REST API script (using requests)

In [11]:
import requests

url = "https://api.github.com/users/octocat"

response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    print("Login:", data["login"])
    print("ID:", data["id"])
else:
    print("Request failed:", response.status_code)


Login: octocat
ID: 583231


# Example 1 — GET Request (Basic API Call)

In [12]:
import requests

url = "https://httpbin.org/get"
response = requests.get(url)

print("Status:", response.status_code)
print("Response JSON:", response.json())


Status: 200
Response JSON: {'args': {}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br, zstd', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.32.4', 'X-Amzn-Trace-Id': 'Root=1-6936492a-58fe9378334354640a0a75c6'}, 'origin': '35.225.17.228', 'url': 'https://httpbin.org/get'}


# Example 2 — GET With Params (Query Parameters)

In [13]:
import requests

url = "https://httpbin.org/get"
params = {"name": "raj", "city": "Delhi"}

response = requests.get(url, params=params)

print("URL Sent:", response.url)
print("Response:", response.json())


URL Sent: https://httpbin.org/get?name=raj&city=Delhi
Response: {'args': {'city': 'Delhi', 'name': 'raj'}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br, zstd', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.32.4', 'X-Amzn-Trace-Id': 'Root=1-6936498a-4748648209a53b614000018f'}, 'origin': '35.225.17.228', 'url': 'https://httpbin.org/get?name=raj&city=Delhi'}


# Example 3 — POST Request (Sending Data)

In [14]:
import requests

url = "https://httpbin.org/post"
payload = {"username": "raj", "password": "1234"}

response = requests.post(url, json=payload)

print("Status:", response.status_code)
print("Sent JSON:", payload)
print("Server Reply:", response.json())


Status: 200
Sent JSON: {'username': 'raj', 'password': '1234'}
Server Reply: {'args': {}, 'data': '{"username": "raj", "password": "1234"}', 'files': {}, 'form': {}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br, zstd', 'Content-Length': '39', 'Content-Type': 'application/json', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.32.4', 'X-Amzn-Trace-Id': 'Root=1-693649d9-4a043ae51f6797c21021cc49'}, 'json': {'password': '1234', 'username': 'raj'}, 'origin': '35.225.17.228', 'url': 'https://httpbin.org/post'}


In [16]:
r = requests.get('https://api.github.com/user', auth=('user', 'pass'))
r.status_code

401

In [17]:
r.headers['content-type']

'application/json; charset=utf-8'

In [18]:
r.encoding

'utf-8'

In [19]:
r.text

'{\r\n  "message": "Requires authentication",\r\n  "documentation_url": "https://docs.github.com/rest",\r\n  "status": "401"\r\n}'

In [33]:
r_JSON = r.json()
print(r_JSON)


{'message': 'Requires authentication', 'documentation_url': 'https://docs.github.com/rest', 'status': '401'}


In [21]:
help(r)

Help on Response in module requests.models object:

class Response(builtins.object)
 |  The :class:`Response <Response>` object, which contains a
 |  server's response to an HTTP request.
 |
 |  Methods defined here:
 |
 |  __bool__(self)
 |      Returns True if :attr:`status_code` is less than 400.
 |
 |      This attribute checks if the status code of the response is between
 |      400 and 600 to see if there was a client error or a server error. If
 |      the status code, is between 200 and 400, this will return True. This
 |      is **not** a check to see if the response code is ``200 OK``.
 |
 |  __enter__(self)
 |
 |  __exit__(self, *args)
 |
 |  __getstate__(self)
 |      Helper for pickle.
 |
 |  __init__(self)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |
 |  __iter__(self)
 |      Allows you to use a response as an iterator.
 |
 |  __nonzero__(self)
 |      Returns True if :attr:`status_code` is less than 400.
 |
 |      This attribute checks if 