In [1]:
import pandas as pd

excel_path = "/Users/carterwebb/Desktop/kelps/AllYearsAllSurveys_ExcelMaster_20250918_full_FOR CARTER.xlsx"

year_dfs = pd.read_excel(excel_path, sheet_name=None)

## First We Need To See How Many Beds Have Enought Data ## 
bed_years_map = {}

for yr, df in year_dfs.items(): 
    if 'Bed Name' in df.columns: #find the name column
        for bed in df['Bed Name'].dropna().unique(): #get rid of duplicates and NAN
            bed_years_map.setdefault(bed, []).append(str(yr)) #attach the bed name to the years list we created first and fill the dictionary 

print("\n | Years per Bed Name |\n")
for bed, years in sorted(bed_years_map.items()): #sort dictionary 
    years_sorted = ", ".join(sorted(years)) # sort years 
    print(f"{bed}: {years_sorted}") #print nicely 


 | Years per Bed Name |

Aiston Preserve: 2018, 2019, 2020, 2021, 2022, 2023, 2024
Alden Bank: 2018, 2019, 2020
Ben Ure: 2016, 2017, 2018
Biz Point: 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024
Biz Point South: 2024
Cherry Point-Gulf Rd: 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024
Clallam Bay: 2017, 2018, 2019, 2020, 2021, 2022, 2023
Coffin Rocks: 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024
Ebey's Landing: 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024
Edmond 2 (B): 2015
Edmond 3 (A): 2015
Edmond 3 (C): 2015
Edmonds 1: 2017, 2018, 2019, 2020, 2021, 2022, 2024
Edmonds 2: 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024
Edmonds 3: 2016, 2017, 2018, 2019, 2020, 2021, 2022
Fawn Island: 2016, 2017
Freshwater Bay 1: 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024
Freshwater Bay 2: 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024
Freshwater Bay 3: 2018, 2022, 2023, 2024
Hastie Lake: 2015
Hat Island: 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024
Hat Isl

In [2]:
def build_bed_dfs(excel_path, desired_cols=None):
    """
    Load the Excel file and its multiple sheets (2016–2024), keeping selected columns.
    Also creates standalone DataFrame for each target bed directly in the globals.
    """

## Select Target Beds That Have Greatest Data ## 
    target_beds = [
        'Aiston Preserve','Biz Point','Cherry Point-Gulf Rd','Clallam Bay','Coffin Rocks',
        "Ebey's Landing",'Freshwater Bay 1','Freshwater Bay 2','Hat Island','Hoypus Point',
        'Lowell','Lummi SW','North Beach East','Polnell Point','Possession Point',
        'Shannon Point East','Shannon Point West'
    ]

## Simplify Columns By Setting Default Columns## 
    if desired_cols is None:
        desired_cols = [
            'Bed Name', 'Site Code', 'Survey Date', 'Survey Day', "NWSC Max Ext",
            'Survey Month', 'Survey Year', 'Acres',
            'Temp', 'Temp1 Shore Edge', 'Temp1 Water Edge',
            'Temp2 Shore Edge', 'Temp2 Water Edge',
            'Ave Temp Shore Edge', 'Ave Temp Water Edge'
        ]

## Read Sheets As One then Concat Them Into One DF ## 
    dfs = pd.read_excel(excel_path, sheet_name=None)
    all_data = pd.concat(
        [df[[c for c in desired_cols if c in df.columns]] for df in dfs.values()],
        ignore_index=True
    )

## Standardize DF Names ## 
    for bed in target_beds:
        ## Filter Columns with Target Bed Name ##  
        mask = all_data['Bed Name'] == bed
        if mask.any():
            ## Standardize Bed Names ## 
            var_name = (
                bed.replace(" ", "_")
                   .replace("-", "_")
                   .replace("’", "")
                   .replace("'", "")
            )
## Boolean mask (True/False for each row) loading the previously selected Target Bed rows ##
            globals()[var_name] = all_data.loc[mask].copy()
            print(f"✅ Created DataFrame: {var_name} ({mask.sum()} rows)")

## First Time Using Globals ## Bassically Calls all Variables Defined at Present. 

#Simply Put Globals Now...
## globals() == {
  #  'Lowell': DataFrame,
  #  'Polnell_Point': DataFrame,
  #  'Biz_Point': DataFrame,
  #  ...
  #  'latlon_clean': DataFrame,
  #  'pd': Library          
  #  ...
# }

In [3]:
## Load Path and Use Function ## 
bed_dfs = build_bed_dfs(excel_path)

✅ Created DataFrame: Aiston_Preserve (9 rows)
✅ Created DataFrame: Biz_Point (27 rows)
✅ Created DataFrame: Cherry_Point_Gulf_Rd (13 rows)
✅ Created DataFrame: Clallam_Bay (8 rows)
✅ Created DataFrame: Coffin_Rocks (31 rows)
✅ Created DataFrame: Ebeys_Landing (27 rows)
✅ Created DataFrame: Freshwater_Bay_1 (10 rows)
✅ Created DataFrame: Freshwater_Bay_2 (9 rows)
✅ Created DataFrame: Hat_Island (10 rows)
✅ Created DataFrame: Hoypus_Point (25 rows)
✅ Created DataFrame: Lowell (19 rows)
✅ Created DataFrame: Lummi_SW (9 rows)
✅ Created DataFrame: North_Beach_East (34 rows)
✅ Created DataFrame: Polnell_Point (21 rows)
✅ Created DataFrame: Possession_Point (26 rows)
✅ Created DataFrame: Shannon_Point_East (27 rows)
✅ Created DataFrame: Shannon_Point_West (25 rows)


In [4]:
Hoypus_Point

Unnamed: 0,Bed Name,Site Code,Survey Date,Survey Day,NWSC Max Ext,Survey Month,Survey Year,Acres,Temp,Temp1 Shore Edge,Temp1 Water Edge,Temp2 Shore Edge,Temp2 Water Edge,Ave Temp Shore Edge,Ave Temp Water Edge
40,Hoypus Point,HOYP,2016-07-19,19.0,0.0,7.0,2016.0,4.042001,14.0,,,,,,
41,Hoypus Point,HOYP,2016-07-22,22.0,0.0,7.0,2016.0,1.13414,12.0,,,,,,
42,Hoypus Point,HOYP,2016-08-19,19.0,1.0,8.0,2016.0,3.712156,14.0,,,,,,
43,Hoypus Point,HOYP,2016-09-14,14.0,0.0,9.0,2016.0,3.39351,13.0,,,,,,
151,Hoypus Point,HOYP,2017-06-25,25.0,0.0,6.0,2017.0,1.097827,9.0,,,,,,
152,Hoypus Point,HOYP,2017-07-25,25.0,1.0,7.0,2017.0,3.605778,10.0,,,,,,
211,Hoypus Point,HOYP,2018-07-17,17.0,0.0,7.0,2018.0,4.852531,,,,,,,
212,Hoypus Point,HOYP,2018-07-17,17.0,0.0,7.0,2018.0,4.501536,,,,,,,
213,Hoypus Point,HOYP,2018-07-17,17.0,0.0,7.0,2018.0,2.968767,13.0,,,,,,
214,Hoypus Point,HOYP,2018-08-14,14.0,0.0,8.0,2018.0,7.48867,13.0,,,,,,


In [5]:
## Looking Into Each Data Frame For Info ##
for var_name in [
    'Aiston_Preserve','Biz_Point','Cherry_Point_Gulf_Rd','Clallam_Bay','Coffin_Rocks',
    'Ebeys_Landing','Freshwater_Bay_1','Freshwater_Bay_2','Hat_Island','Hoypus_Point',
    'Lowell','Lummi_SW','North_Beach_East','Polnell_Point','Possession_Point',
    'Shannon_Point_East','Shannon_Point_West'
]:
    print(f"\n | {var_name} |\n")
    globals()[var_name].info()


 | Aiston_Preserve |

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 251 to 489
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Bed Name             9 non-null      object        
 1   Site Code            9 non-null      object        
 2   Survey Date          9 non-null      datetime64[ns]
 3   Survey Day           9 non-null      float64       
 4   NWSC Max Ext         9 non-null      float64       
 5   Survey Month         9 non-null      float64       
 6   Survey Year          9 non-null      float64       
 7   Acres                9 non-null      float64       
 8   Temp                 2 non-null      float64       
 9   Temp1 Shore Edge     7 non-null      object        
 10  Temp1 Water Edge     7 non-null      object        
 11  Temp2 Shore Edge     6 non-null      object        
 12  Temp2 Water Edge     7 non-null      object        
 13  Ave Temp Shore Ed

In [6]:
# Save Lat Lon Incase We Need it In the Future ## 
latlon_clean.to_csv("/Users/carterwebb/Desktop/kelps/data/cleaned_data/latlon_clean.csv", index=False)
print("✅ Saved latlon_clean.csv")

## Couldnt Get the Old List To Call Correctly, Not Sure What Happened, Simply Remade it To Save ## 
bed_vars = [
    'Aiston_Preserve','Biz_Point','Cherry_Point_Gulf_Rd','Clallam_Bay','Coffin_Rocks',
    'Ebeys_Landing','Freshwater_Bay_1','Freshwater_Bay_2','Hat_Island','Hoypus_Point',
    'Lowell','Lummi_SW','North_Beach_East','Polnell_Point','Possession_Point',
    'Shannon_Point_East','Shannon_Point_West'
]

for bed in bed_vars:
    if bed in globals():
        df = globals()[bed]
        if 'Bed Name' not in df.columns:
            df['Bed Name'] = bed
        df.to_csv(f"/Users/carterwebb/Desktop/kelps/data/cleaned_data/{bed}.csv", index=False)
        print(f"✅ Saved {bed}.csv ({len(df)} rows)")
    else:
        print(f"⚠️ {bed} not found")

## Concat For Future Use If Neccesary / Making Percent Change Metrics ## 
combined = pd.concat([globals()[b] for b in bed_vars if b in globals()], ignore_index=True)
combined.to_csv("/Users/carterwebb/Desktop/kelps/data/cleaned_data/AllBeds_Clean.csv", index=False)
print(f"✅ Saved AllBeds_Clean.csv ({len(combined)} total rows)")

NameError: name 'latlon_clean' is not defined