# Statistical Analysis

This notebook conducts statistical analysis on data loaded from Google Sheets.

In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Reload the module to get latest changes
import importlib
import data_loader
importlib.reload(data_loader)
from data_loader import load_google_sheet_data, load_all_sheets, get_sheet_gids_from_url

## Load Data from Google Sheets

In [25]:
# Load your data from multiple tabs
SHEET_URL = "https://docs.google.com/spreadsheets/d/1EX83SL2E6Cg_Y1y88e8svOvqS6SjZd11rIKaeX_3ABs/edit?gid=0#gid=0"

# Tab GIDs from your Google Sheet
sheet_gids = {
    'n8n_form': 0,
    'tomorrow_io': 1768369633,
    'tempest_forecast': 707129769,
    'open_meteo': 2081410240,
    'tempest_observations': 1081072154,
    'nws_forecast': 1164189304,
    'coosdp': 329259310,
}

# Load all sheets
all_sheets = load_all_sheets(SHEET_URL, sheet_gids)

print(f"Loaded {len(all_sheets)} sheets:")
for name, sheet_df in all_sheets.items():
    print(f"\n{name.upper()}:")
    print(f"  Shape: {sheet_df.shape}")
    print(f"  Columns: {list(sheet_df.columns)}")
    if not sheet_df.empty:
        print(f"  Sample data:")
        display(sheet_df.head(2))

# Store individual dataframes for easy access
if all_sheets:
    df_form = all_sheets.get('n8n_form')
    df_tomorrow = all_sheets.get('tomorrow_io') 
    df_tempest_forecast = all_sheets.get('tempest_forecast')
    df_open_meteo = all_sheets.get('open_meteo')
    df_tempest_obs = all_sheets.get('tempest_observations')
    df_nws = all_sheets.get('nws_forecast')
    df_coosdp = all_sheets.get('coosdp')

Loading sheet: n8n_form
Loading sheet: tomorrow_io
Loading sheet: tempest_forecast
Loading sheet: open_meteo
Loading sheet: tempest_observations
Loading sheet: nws_forecast
Loading sheet: coosdp
Loaded 7 sheets:

N8N_FORM:
  Shape: (31, 7)
  Columns: ['submittedAt', 'score', 'conditions', 'waterConditions', 'visibility', 'notes', 'formMode']
  Sample data:


Unnamed: 0,submittedAt,score,conditions,waterConditions,visibility,notes,formMode
0,2025-08-13T19:37:49.555-07:00,üåïüåïüåïüåïüåó,‚õÖ Partly Cloudy,üå¨Ô∏è Textured,üëÄ‚úÖ Good,Great sunset,production
1,2025-08-14T07:03:44.798-07:00,üåïüåïüåó,üå´Ô∏è Light Fog,üíß Rippled,üëÄ‚ûñ Acceptable,Wet and dreary,production



TOMORROW_IO:
  Shape: (22, 22)
  Columns: ['submittedAt', 'time', 'temperature', 'temperatureApparent', 'weatherCode', 'humidity', 'dewPoint', 'precipitationProbability', 'rainIntensity', 'rainAccumulation', 'windSpeed', 'windGust', 'windDirection', 'pressureSeaLevel', 'pressureSurfaceLevel', 'altimeterSetting', 'cloudCover', 'cloudBase', 'cloudCeiling', 'visibility', 'uvIndex', 'evapotranspiration']
  Sample data:


Unnamed: 0,submittedAt,time,temperature,temperatureApparent,weatherCode,humidity,dewPoint,precipitationProbability,rainIntensity,rainAccumulation,...,windDirection,pressureSeaLevel,pressureSurfaceLevel,altimeterSetting,cloudCover,cloudBase,cloudCeiling,visibility,uvIndex,evapotranspiration
0,2025-08-19T08:26:01.308-07:00,2025-08-19T15:00:00Z,13.5,13.5,1000,94,12.6,0,0,0,...,183,1017.13,1013.84,1015.31,3,16.0,16.0,15.18,1,0.023
1,2025-08-19T15:40:37.044-07:00,2025-08-19T22:00:00Z,15.3,15.3,1000,100,14.5,0,0,0,...,267,1016.37,1014.08,1015.55,9,16.0,16.0,15.46,7,0.327



TEMPEST_FORECAST:
  Shape: (22, 21)
  Columns: ['submittedAt', 'air_temperature', 'conditions', 'feels_like', 'icon', 'local_day', 'local_hour', 'precip', 'precip_icon', 'precip_probability', 'precip_type', 'relative_humidity', 'sea_level_pressure', 'station_pressure', 'time', 'uv', 'wind_avg', 'wind_direction', 'wind_direction_cardinal', 'wind_gust', 'time_iso']
  Sample data:


Unnamed: 0,submittedAt,air_temperature,conditions,feels_like,icon,local_day,local_hour,precip,precip_icon,precip_probability,...,relative_humidity,sea_level_pressure,station_pressure,time,uv,wind_avg,wind_direction,wind_direction_cardinal,wind_gust,time_iso
0,2025-08-19T08:26:01.308-07:00,15,Clear,15,clear-day,19,9,0,chance-rain,0,...,78,1016.7,1016.3,1755619200,4,2,300,WNW,3,2025-08-19T16:00:00.000Z
1,2025-08-19T15:40:37.044-07:00,17,Clear,17,clear-day,19,16,0,chance-rain,0,...,75,1016.8,1016.4,1755644400,7,4,290,WNW,9,2025-08-19T23:00:00.000Z



OPEN_METEO:
  Shape: (22, 22)
  Columns: ['submittedAt', 'time', 'temperature_2m', 'relative_humidity_2m', 'dew_point_2m', 'snow_depth', 'apparent_temperature', 'precipitation_probability', 'precipitation', 'rain', 'showers', 'weather_code', 'pressure_msl', 'surface_pressure', 'cloud_cover', 'cloud_cover_low', 'cloud_cover_mid', 'cloud_cover_high', 'visibility', 'wind_speed_10m', 'wind_gusts_10m', 'wind_direction_10m']
  Sample data:


Unnamed: 0,submittedAt,time,temperature_2m,relative_humidity_2m,dew_point_2m,snow_depth,apparent_temperature,precipitation_probability,precipitation,rain,...,pressure_msl,surface_pressure,cloud_cover,cloud_cover_low,cloud_cover_mid,cloud_cover_high,visibility,wind_speed_10m,wind_gusts_10m,wind_direction_10m
0,2025-08-19T08:26:01.308-07:00,2025-08-19T15:00Z,14.8,90,13.2,0,15.4,0,0,0,...,1016.4,1016.4,5,5,0,0,13400,2.7,3.6,247
1,2025-08-19T15:40:37.044-07:00,2025-08-19T23:00Z,19.2,77,15.0,0,19.9,0,0,0,...,1015.2,1015.2,6,6,0,0,17100,15.2,27.4,292



TEMPEST_OBSERVATIONS:
  Shape: (33, 19)
  Columns: ['submittedAt', 'air_temperature', 'barometric_pressure', 'brightness', 'dew_point', 'feels_like', 'precip', 'precip_accum_last_1hr', 'precip_accum_local_day', 'pressure_trend', 'relative_humidity', 'solar_radiation', 'uv', 'wind_avg', 'wind_chill', 'wind_direction', 'wind_gust', 'wind_lull', 'obs_ts_iso']
  Sample data:


Unnamed: 0,submittedAt,air_temperature,barometric_pressure,brightness,dew_point,feels_like,precip,precip_accum_last_1hr,precip_accum_local_day,pressure_trend,relative_humidity,solar_radiation,uv,wind_avg,wind_chill,wind_direction,wind_gust,wind_lull,obs_ts_iso
0,2025-08-13T19:37:49.555-07:00,14.9,1012.8,1562,13.6,14.9,0,0,0,steady,92,13,0.13,3.2,14.9,286,4.5,1.1,2025-08-14T02:36:52.000Z
1,2025-08-14T07:03:44.798-07:00,13.9,1013.4,4175,13.3,13.9,0,0,0,steady,96,35,0.34,0.9,13.9,288,1.3,0.6,2025-08-14T14:15:41.000Z



NWS_FORECAST:
  Shape: (31, 10)
  Columns: ['submittedAt', 'temperature', 'probabilityOfPrecipitation', 'dewpoint', 'relativeHumidity', 'windSpeed', 'windDirection', 'shortForecast', 'startTime', 'endTime']
  Sample data:


Unnamed: 0,submittedAt,temperature,probabilityOfPrecipitation,dewpoint,relativeHumidity,windSpeed,windDirection,shortForecast,startTime,endTime
0,2025-08-14T07:03:44.798-07:00,55,0,12.777778,100,5 mph,NW,Patchy Fog,2025-08-14T07:00:00-07:00,2025-08-14T08:00:00-07:00
1,2025-08-14T14:38:17.950-07:00,56,0,13.333333,100,20 mph,WNW,Sunny,2025-08-14T14:00:00-07:00,2025-08-14T15:00:00-07:00



COOSDP:
  Shape: (30, 10)
  Columns: ['submittedAt', 'solar_radiation_wm2', 'precip_mm', 'humidity_pct', 'air_temp_f', 'pressure_mbar', 'wind_speed_mph', 'wind_gust_mph', 'wind_direction_deg', 'time']
  Sample data:


Unnamed: 0,submittedAt,solar_radiation_wm2,precip_mm,humidity_pct,air_temp_f,pressure_mbar,wind_speed_mph,wind_gust_mph,wind_direction_deg,time
0,2025-08-14T07:03:44.798-07:00,7.0,0.0,95.0,57.56,1015.6,1.565855,3.802792,186.0,2025-08-14T13:40:00Z
1,2025-08-14T14:38:17.950-07:00,549.0,0.0,94.0,58.064,1015.6,7.158196,6.710809,248.0,2025-08-14T21:10:00Z


## Data Exploration and Analysis

In [26]:
# Basic data exploration
if 'df' in locals() and df is not None:
    print("Dataset Overview:")
    print(f"Rows: {len(df)}")
    print(f"Columns: {len(df.columns)}")
    print(f"\nColumn names: {list(df.columns)}")
    
    print("\nMissing values:")
    print(df.isnull().sum())
    
    print("\nBasic statistics:")
    display(df.describe())

Dataset Overview:
Rows: 31
Columns: 7

Column names: ['submittedAt', 'score', 'conditions', 'waterConditions', 'visibility', 'notes', 'formMode']

Missing values:
submittedAt         0
score               0
conditions          0
waterConditions     0
visibility          0
notes              21
formMode            0
dtype: int64

Basic statistics:


Unnamed: 0,submittedAt,score,conditions,waterConditions,visibility,notes,formMode
count,31,31,31,31,31,10,31
unique,31,7,6,6,3,10,1
top,2025-08-13T19:37:49.555-07:00,üåïüåïüåïüåï,‚òÄÔ∏è Sunny,üíß Rippled,üëÄ‚úÖ Good,Great sunset,production
freq,1,11,16,12,19,1,31


In [27]:
# Add your statistical analysis here
# Example: correlation analysis, hypothesis testing, etc.