In [None]:
import pandas as pd
import re
import requests

# Retrieve text from the URL
url = 'https://pdreport.slocity.org/policelog/rpcdsum.txt'
response = requests.get(url)
text = response.text

# Filtering lines
filtered_lines = []
for line in text.split('\n'):
    if not all(char == '-' or char == '=' or char == '\'' or char == ';' or char == ',' or char == '•' or
               char == '`' or char == '_' or char == '"' or char.isspace() for char in line):
        filtered_lines.append(line)
        if "CALL COMMENTS:" in line:
            filtered_lines.append("-" * len(line) + "\n")

text = '\n'.join(filtered_lines)
filtered_text_lines = text.split('\n')[3:]
filtered_text = '\n'.join(filtered_text_lines)

# Split text into incidents using the row of "-" signs as delimiter
incidents = re.split(r'\n-{2,}', filtered_text)

# Function to extract information for each incident
def extract_incident_info(incident_text):
    incident_id = re.search(r'\b(\d{9})\b', incident_text)
    incident_id = incident_id.group(1) if incident_id else ''
    date = re.search(r'\b(\d{2}/\d{2}/\d{2})\b', incident_text)
    date = date.group(1) if date else ''
    received = re.search(r'Received:?(\d{2}:\d{2})', incident_text, re.IGNORECASE)
    received = received.group(1) if received else ''
    dispatched = re.search(r'Dispatched:?(\d{2}:\d{2})', incident_text, re.IGNORECASE)
    dispatched = dispatched.group(1) if dispatched else ''
    arrived = re.search(r'Arrived:?(\d{2}:\d{2})', incident_text, re.IGNORECASE)
    arrived = arrived.group(1) if arrived else ''
    cleared = re.search(r'Cleared:?(\d{2}:\d{2})', incident_text, re.IGNORECASE)
    cleared = cleared.group(1) if cleared else ''
    incident_type = re.search(r'Type:? (.+?)\s+Location:', incident_text, re.IGNORECASE)
    incident_type = incident_type.group(1) if incident_type else ''
    address = re.search(r'Addr:? (.+?)\s+Clearance Code:', incident_text, re.IGNORECASE)
    address = address.group(1) if address else ''
    comment = re.search(r'CALL COMMENTS:? (.+)', incident_text, re.IGNORECASE)
    comment = comment.group(1).strip() if comment else ''

    return [incident_id, date, received, dispatched, arrived, cleared, incident_type, address, comment]

# Extract information for each incident and append to a list
incident_data = [extract_incident_info(incident_text) for incident_text in incidents]

# Create DataFrame
df = pd.DataFrame(incident_data, columns=['IncidentID', 'Date', 'Received', 'Dispatched', 'Arrived', 'Cleared', 'Type', 'Address', 'Comment'])

def extract_grid(address):
    match = re.search(r'GRID\s+([A-Z]-\d+)', address)
    if match:
        return match.group(1)
    else:
        return 'N/A'

df['Address'].fillna('', inplace=True)
df['Grid'] = df['Address'].apply(extract_grid)

   IncidentID      Date Received Dispatched Arrived Cleared           Type  \
0   240306014  03/06/24    07:04      07:06   07:40   07:40      Loitering   
1   240306015  03/06/24    07:17      07:19   07:23   07:24   POSTING VEHS   
2   240306016  03/06/24    07:20                                     Theft   
3   240306017  03/06/24    08:09      08:12   08:21   08:34      Loitering   
4   240306018  03/06/24    08:11      08:13   08:24   08:25      Loitering   
..        ...       ...      ...        ...     ...     ...            ...   
82  240307011  03/07/24    04:20      04:21   04:29   04:47     Suspicious   
83  240307012  03/07/24    04:57              04:57   04:57  9-1-1 ABANDON   
84  240307013  03/07/24    05:31              05:32   05:36  Towed Vehicle   
85  240307014  03/07/24    06:22      06:24   06:34   06:50    Trespassing   
86             03/06/24                                                      

                                      Address  \
0   35 PRADO; 

In [None]:
df

Unnamed: 0,IncidentID,Date,Received,Dispatched,Arrived,Cleared,Type,Address,Comment,Grid
0,240306014,03/06/24,07:04,07:06,07:40,07:40,Loitering,35 PRADO; SEWAGE TREATMENT PLANT; GRID I-,RP REQ SUBJ PACK UP & MOVE ALONG,
1,240306015,03/06/24,07:17,07:19,07:23,07:24,POSTING VEHS,"600 BLK HIGUERA; GRID J-10; GRID J-09, Sa",MARCH 7 0700-1300,J-10
2,240306016,03/06/24,07:20,,,,Theft,"2710 MEADOW; GRID K-13, San Luis Obispo,",1021 RP,K-13
3,240306017,03/06/24,08:09,08:12,08:21,08:34,Loitering,"273 MADONNA # C; GRID I-13, San Luis Obis",RP REQ SUBJ MOVE ALONG,I-13
4,240306018,03/06/24,08:11,08:13,08:24,08:25,Loitering,"3001 BROAD; BORAHS AWARDS; GRID L-13, San",RP REQ SUBJ MOVE ALONG,L-13
...,...,...,...,...,...,...,...,...,...,...
82,240307011,03/07/24,04:20,04:21,04:29,04:47,Suspicious,"165 SUBURBAN; GRID I-17, San Luis Obispo,","RP LOCKED IN BEDROOM, HEARS SOMEONE WALKING IN...",I-17
83,240307012,03/07/24,04:57,,04:57,04:57,9-1-1 ABANDON,4558 BROAD; PRECISION OVERHEAD GARAGE DOO,IMMED DISCONNECT,
84,240307013,03/07/24,05:31,,05:32,05:36,Towed Vehicle,"753 LAWRENCE; GRID L-13, San Luis Obispo,",SILV DODGE RAM L/53164U3,L-13
85,240307014,03/07/24,06:22,06:24,06:34,06:50,Trespassing,12424 LOS OSOS VALLEY; CHEVRON; GRID H-17,TRANSIENT FEMALE HANGING OUT AT BACK OF THE STORE,H-17
