In [1]:
import pandas as pd 

df = pd.read_csv("../data/combined.csv")

df.drop('Unnamed: 10', axis=1, inplace=True)
# rename columns
df.rename(columns={'Date of Shout': 'date_of_shout',
                     'Time of Shout': 'time_of_shout',
                     'Pager Code': 'pager_code',
                     'Location of Shout': 'location_of_shout',
                     'Shout Details': 'shout_details',
                     'Time Boat Launched': 'time_boat_launched',
                     'Time boat returned': 'time_boat_returned',
                     'Crew on Board': 'crew_on_board',
                     'Crew on Shore': 'crew_on_shore',
                     'Weather at time of shout': 'weather_at_time_of_shout'
                     }, inplace=True)

# df.head()
# df.drop('shout_details',axis=1, inplace=True)

# convert all the date columns to string
for col in df.columns:
    df[col] = df[col].astype(str)


In [2]:
# Custom function to convert date to datetime with two different formats
def convert_date_to_datetime(date_str):
    """Convert 'date_of_shout' column to datetime
    There are two formats of dates 
    format 1 : 16/07/2023
    format 2 :2023-09-05T00:00:00.000Z
    I want all the dates to be in the format 2023-09-05
    """
    try:
        return pd.to_datetime(date_str, format='%d/%m/%Y').strftime('%Y-%m-%d')
    except ValueError:
        return pd.to_datetime(date_str).strftime('%Y-%m-%d')
    


In [3]:
def convert_time_to_datetime(time_str):
    """Convert 'time_of_shout','time_boat_launched','time_boat_returned' columns into datetime
    There are two formats of dates 
    format 1 : 15:05	
    format 2 : 1456
    I want all the dates to be in a standardised format
    """
    # print("Cleaned time string:", time_str)  # Print cleaned time string

    # Remove non-digit characters
    time_str = ''.join(filter(str.isdigit, time_str))
    
    try:
        # Try parsing time with format 'HH:MM'
        return pd.to_datetime(time_str.zfill(4), format='%H%M').strftime('%H:%M')
    except ValueError:
        return None  # Return None if conversion fails


In [4]:
# Convert 'time_of_shout', 'time_boat_launched', and 'time_boat_returned' columns to datetime
df['time_of_shout'] = df['time_of_shout'].apply(convert_time_to_datetime)
df['time_boat_launched'] = df['time_boat_launched'].apply(convert_time_to_datetime)
df['time_boat_returned'] = df['time_boat_returned'].apply(convert_time_to_datetime)

# Convert 'date_of_shout' column to datetime
df['date_of_shout'] = df['date_of_shout'].apply(convert_date_to_datetime)


In [5]:
df.head()

Unnamed: 0,date_of_shout,time_of_shout,pager_code,location_of_shout,shout_details,time_boat_launched,time_boat_returned,crew_on_board,crew_on_shore,weather_at_time_of_shout
0,2023-07-21,15:05,9992167,Inchtavvanch channel,"Call on channel 16 saying swimmer in water, in...",15:20,16:15,RB TR AMCD,GH DON,Lovely
1,2023-07-19,18:44,333,Ardlui,17' Fletcher speedboat with 1 male occupant ha...,19:00,20:15,"RB, CG, TR, AB jnr, AMcL","RO, AB snr","Sunny, windy"
2,2023-07-16,11:10,999,South of Inverbeg,Concern for missing kayaker that hasnt return...,11:10,12:15,Rb RO GH,Na,West wind heavy showers
3,2023-07-08,09:44,333,Duckbay,Reports of a small craft adrift out in front o...,09:55,10:30,"DS, AM, AJM","TR, DO",Dry and warm
4,2023-07-05,18:49,222,Boturich shore,Crew were paged to a small bayliner with an en...,19:04,20:04,"ABsnr, EMc, JB, CG","RB, AM","Dry, sunny"


In [6]:
# save the cleaned data to a new csv file
df.to_csv('../data/preprocessed_dates_time_data.csv', index=False)