In [1]:
import pandas as pd
import re

# Sample DataFrame with Latitude and Longitude in DMS format
data = {
    'Latitude_DMS': ['352933N', None, '401512S'],
    'Longitude_DMS': ['0974927W', '1220556W', None]
}

df = pd.DataFrame(data)
df


Unnamed: 0,Latitude_DMS,Longitude_DMS
0,352933N,0974927W
1,,1220556W
2,401512S,


In [3]:
# Function to convert DMS to Decimal Degrees
def dms_to_dd(dms):

    # Handle NaN values
    if pd.isna(dms):  
        return None
    
    match = re.match(r"(\d{2,3})(\d{2})(\d{2})([NSWE])", dms)

    # Return None if format is incorrect
    if not match:
        return None  

    # Degrees, Minutes, Seconds
    degrees, minutes, seconds, direction = match.groups()
    
    # Convert to decimal degrees
    decimal_degrees = int(degrees) + int(minutes) / 60 + int(seconds) / 3600
    
    # Apply negative sign for South and West coordinates
    if direction in ['S', 'W']:
        decimal_degrees *= -1

    return decimal_degrees

# Apply conversion to both Latitude and Longitude columns
df['Latitude_DD'] = df['Latitude_DMS'].apply(dms_to_dd)
df['Longitude_DD'] = df['Longitude_DMS'].apply(dms_to_dd)

# Handling Null Values:
# Replace NaNs with 0 (or use another strategy)
df.fillna({'Latitude_DD': 0, 
           'Longitude_DD': 0}, 
           inplace = True)  

# Output
df

Unnamed: 0,Latitude_DMS,Longitude_DMS,Latitude_DD,Longitude_DD
0,352933N,0974927W,35.4925,-97.824167
1,,1220556W,0.0,-122.098889
2,401512S,,-40.253333,0.0


# Retry

In [4]:
import pandas as pd
import re

# Sample DataFrame
data = {
    'Latitude': ['352933N', None, '401512S'],
    'Longitude': ['0974927W', '1220556W', None]
}

df = pd.DataFrame(data)
df


Unnamed: 0,Latitude,Longitude
0,352933N,0974927W
1,,1220556W
2,401512S,


In [6]:
# Print missing values before processing
print(f"""There are '{df['Latitude'].isnull().sum()}' missing values in the latitude column,
whilst the longitude column has '{df['Longitude'].isnull().sum()}' missing values.""")

There are '1' missing values in the latitude column,
whilst the longitude column has '1' missing values.


In [7]:

# Function to convert DMS to Decimal Degrees
def dms_to_dd(dms):
    # Handle NaN values
    if pd.isna(dms):  
        return None  # Use None instead of NaN for consistency
    
    match = re.match(r"(\d{2,3})(\d{2})(\d{2})([NSWE])", dms)
    
    # Return None if format is incorrect
    if not match:
        return None  

    # Extract Degrees, Minutes, Seconds, Direction
    degrees, minutes, seconds, direction = match.groups()
    
    # Convert to decimal degrees
    decimal_degrees = int(degrees) + int(minutes) / 60 + int(seconds) / 3600
    
    # Apply negative sign for South and West coordinates
    if direction in ['S', 'W']:
        decimal_degrees *= -1

    return decimal_degrees

# Apply conversion to both Latitude and Longitude columns
df['Latitude_DD'] = df['Latitude'].apply(dms_to_dd)
df['Longitude_DD'] = df['Longitude'].apply(dms_to_dd)

# Handling Null Values:
# Fill NaNs in converted columns
df.fillna({'Latitude_DD': 0, 'Longitude_DD': 0}, inplace=True)

# Output DataFrame
print(df.head(3))


  Latitude Longitude  Latitude_DD  Longitude_DD
0  352933N  0974927W    35.492500    -97.824167
1     None  1220556W     0.000000   -122.098889
2  401512S      None   -40.253333      0.000000
