In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import geopandas as gpd
import os
import glob
import plotly.graph_objects as go
from scipy.stats import ttest_rel, ttest_ind, mannwhitneyu




In [3]:
# Load and merge all relevant CSV files
folder_path = 'C:/Users/Noeh/Downloads'
csv_files = glob.glob(os.path.join(folder_path, 'AntarcticIceber*.csv'))
df_list = [pd.read_csv(file) for file in csv_files]
merged_df = pd.concat(df_list, ignore_index=True)

# Filter for iceberg A23A and keep only relevant columns
filtered_df = merged_df[merged_df['Iceberg'] == 'A23A'][['Latitude', 'Longitude', 'Last Update']]
filtered_df['Last Update'] = pd.to_datetime(filtered_df['Last Update'], errors='coerce')
filtered_df = filtered_df.dropna(subset=['Latitude', 'Longitude', 'Last Update'])
filtered_df = filtered_df.sort_values('Last Update')
filtered_df['Date'] = filtered_df['Last Update'].dt.date
filtered_df = filtered_df.groupby('Date', as_index=False).first()

# Set marker size for map visualization
filtered_df['Size'] = 4

# Create animated scatter plot on map with trajectory line
fig = px.scatter_mapbox(
    filtered_df,
    lat='Latitude',
    lon='Longitude',
    size='Size',
    size_max=8,
    hover_data={'Last Update': True},
    animation_frame=filtered_df['Last Update'].dt.strftime('%Y-%m-%d'),
    zoom=2,
    height=600 
)

fig.add_trace(go.Scattermapbox(
    lat=filtered_df['Latitude'],
    lon=filtered_df['Longitude'],
    mode='lines',
    line=dict(color='blue', width=2),
    name='Trajectory Line',
    hoverinfo='none'
))

fig.update_layout(
    mapbox_style='open-street-map',
    title='Trajectory of Iceberg A23A (Jan 2023 - Present)',
    margin={'r':0, 't':40, 'l':0, 'b':0}
)

fig.show()

# Check for duplicate dates or rows
duplicate_rows = filtered_df.duplicated()
print(f"Total duplicate rows: {duplicate_rows.sum()}")
print(filtered_df[duplicate_rows])

duplicates_by_date = filtered_df.duplicated(subset='Date', keep=False)
print(f"Entries with duplicate dates: {duplicates_by_date.sum()}")
print(filtered_df[duplicates_by_date].sort_values('Date'))


*scatter_mapbox* is deprecated! Use *scatter_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



Total duplicate rows: 0
Empty DataFrame
Columns: [Date, Latitude, Longitude, Last Update, Size]
Index: []
Entries with duplicate dates: 0
Empty DataFrame
Columns: [Date, Latitude, Longitude, Last Update, Size]
Index: []
