Before running, be sure to mount your drive and change the path to the path of your LITESC folder only (the code will navigate from there)

Created by Paige Bartels


In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
from datetime import datetime, timedelta

# Mounting your drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [328]:
## Buoy Import and Processing
# Paige's path: path='/content/drive/MyDrive/Colab Notebooks/2024 LITESC Project Experimental Plan/Photos and Data/Buoy Data - Corrected CSV/'
path='/content/drive/MyDrive/Colab Notebooks/2024 LITESC Project Experimental Plan/'
fn='Photos and Data/Buoy Data - Corrected CSV/buoydata_45023.csv' # 45025 for L'Anse, 45023 for McLain
ds=path+fn

# See this link for description of values: https://www.ndbc.noaa.gov/faq/measdes.shtml#stdmet
data=pd.read_csv(ds, header=1, skiprows=[2]) #Loading in data as pandas DataFrame
data=data.replace(to_replace='MM',value='NaN') # Replacing "MM" values with NaN

# Shifting time from EDT (UTC-4), accounting for day and month changes
data['hh']=data['hh']+4
data.loc[data['hh']>=24,'DD']=data['DD']+1
data.loc[data['hh']>=24,'hh']=data['hh']-24
data.loc[data['DD']>30,'MM']=data['MM']+1

# Creating two-digit (padded) times
data['hh']=data['hh'].astype('str')
data['MM']=data['MM'].astype('str')
data['mm']=data['mm'].astype('str')
data['DD']=data['DD'].astype('str')

data.loc[data['hh'].astype('int')<10,'hh']='0'+data['hh']
data.loc[data['mm'].astype('int')<10,'mm']='0'+data['mm']
data.loc[data['MM'].astype('int')<10,'MM']='0'+data['MM']
data.loc[data['DD'].astype('int')<10,'DD']='0'+data['DD']

# Adding datetime
data['Date']=data['#YY'].astype('str')+data['MM']+data['DD']+' '+data['hh']+':'+data['mm']

# Reversing data, so chronological
data = data.iloc[::-1]
data.reset_index(drop=True, inplace=True) # Resetting index of table

if fn[-5]=='5': # L'Anse
  # Selecting IOP days for L'Anse (45025)
  iop_days=data.where(data['MM'].astype('int')==10).where(data['DD'].astype('int')>=14).where(data['DD'].astype('int')<=15)
  iop_days=iop_days.dropna()
  site="L'Anse"
  days='October 14-15, 2024'

else:
  # Selecting IOP days for McLain (45023)
  iop_days=data.where(data['MM'].astype('int')==10).where(data['DD'].astype('int')>=12).where(data['DD'].astype('int')<=13)
  iop_days=iop_days.dropna()
  site='McLain'
  days='October 12-13, 2024'

if len(iop_days['mm'])%3!=0: # creating NaNs for missing data
  errors=0
  for i in iop_days.index:
    check=i-iop_days.index[0]+errors
    if i>iop_days.index[-1]-1:
      break
    if check==0 or check%3==0:
      if iop_days['mm'][i]!='00':
        iop_days.loc[(iop_days.index+1+errors)[0]]=[2024.0, 10, iop_days['DD'][i+1], iop_days['hh'][i+1],'00',np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,iop_days['Date'][i+1][0:12]+'00']
        errors+=1
    elif check%3==1:
      if iop_days['mm'][i]!='20':
        iop_days.loc[(iop_days.index+1+errors)[0]] = [2024.0, 10, iop_days['DD'][i-1], iop_days['hh'][i-1],'20',np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,iop_days['Date'][i-1][0:12]+'20']
        errors+=1
    elif check%3==2:
      if iop_days['mm'][i]!='40':
        iop_days.loc[(iop_days.index+1+errors)[0]] = [2024.0, 10, iop_days['DD'][i-1], iop_days['hh'][i-1],'40',np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,iop_days['Date'][i-1][0:12]+'40']
        errors+=1
  iop_days.index = iop_days.index + errors  # shifting index
  iop_days=iop_days.sort_values(by=['DD','hh','mm'])
  iop_days.reset_index(drop=True, inplace=True) # Resetting index of table
  iop_days=iop_days[1:]

  print(f'Buoy Errors: {errors}')
  print('Buoy read-in complete')

Buoy Errors: 2
Buoy read-in complete


In [329]:
from ast import Num
# Reading in HOBO data files
filen='Photos and Data/HOBO_fixed/'
file_list=glob.glob(path+filen+'*')
file_list=file_list[:-1] # the last file is a .ipynb file (not a csv)

# From HOBO .ipynb file
# Loops to read in the data. Will be divided into two parts
big_data_dict = {}

for fileName in file_list:
  data_read = pd.read_csv(fileName, header = 0, on_bad_lines='skip')
  day=fileName.split('/')[-1].split('_')[0][-2:]
  site=fileName.split('/')[-1].split('_')[1].split('.')[0]
  big_data_dict[f"10_{day}_24:{site}"] = data_read

# Code block to clean up the data
for key in big_data_dict.keys():
  big_data_dict[key][big_data_dict[key].columns[1]] = pd.to_datetime(big_data_dict[key][big_data_dict[key].columns[1]]) # converting to datetime
  big_data_dict[key][big_data_dict[key].columns[1]] = big_data_dict[key][big_data_dict[key].columns[1]] + timedelta(hours=4) # conversion from EDT to UTC

  data_only = big_data_dict[key][big_data_dict[key].columns[2:]]
  data_only=data_only.replace(to_replace='-800',value=np.NaN)
  big_data_dict[key][big_data_dict[key].columns[2:]] = data_only # converting all -800 values to nans

  multiple=False
  num=1440
  if "McLain" in key or "Lanse" in key or "L'Anse" in key:
    multiple=True
    num=1440*2

  print(key)

  # Adding padding to be same resolution
  # Creating Hours
  hr=(np.ones(num))
  hr=hr.astype('str')
  for i in range(0,24):
    h=str(i)
    r='0'
    if i>=10:
      r=''
    pad=i*60
    hr[pad:60+pad]=f'{r}{h}'

  if multiple:
    for i in range(0,24):
      h=str(i)
      r='0'
      if i>=10:
        r=''
      pad=i*60 + 1440
      hr[pad:60+pad]=f'{r}{h}'

  # Creating minutes
  min=np.ones(num)
  min=min.astype('str')
  for i in range(0,24):
    for j in range(0,60):
      m=str(j)
      n='0'
      if j>=10:
        n=''
      pad=i*60
      min[pad+j]=f'{n}{m}'
  if multiple:
    for i in range(0,24):
      for j in range(0,60):
        m=str(j)
        n='0'
        if j>=10:
          n=''
        pad=i*60 + 1440
        min[pad+j]=f'{n}{m}'

  # Finding the index of where data starts and ends in the new resolution
  firstTime=big_data_dict[key]['TimeUTC'][0]
  firsthr=firstTime.split(' ')[1].split(':')[0]
  firstmin=firstTime.split(' ')[1].split(':')[1]
  firstInd=int(firsthr)*60+int(firstmin)-1

  numInd=len(big_data_dict[key])
  lastInd=firstInd+numInd

  endPad=(num-(lastInd+1))

  # Cutting down to 24 hr UTC days
  if lastInd>num:
    i=lastInd-num
    endPad=0
    lastTime=big_data_dict[key]['TimeUTC'][len(big_data_dict[key])-(1+(lastInd-num))]
    try:
       lasthr=lastTime.split(' ')[1].split(':')[0]
       lastmin=lastTime.split(' ')[1].split(':')[1]
       lastInd=int(lasthr)*60+int(lastmin)-1
       i=lastInd-num
    except:
      lastTime=big_data_dict[key]['TimeUTC'][len(big_data_dict[key])-(1+(lastInd-num-1))]
      lasthr=lastTime.split(' ')[1].split(':')[0]
      lastmin=lastTime.split(' ')[1].split(':')[1]
      lastInd=int(lasthr)*60+int(lastmin)-1
      i+=1
    #print(i)
    big_data_dict[key]=big_data_dict[key][:-i]
 #print(big_data_dict[key])

  # Creating new dataframe to add padding
  df = pd.DataFrame(columns = ['TimeUnix', 'TimeUTC','SolarRadiation','Temperature','DewPoint','RelativeHumidity','WindDirection','WindSpeed','GustSpeed','Pressure','HH','MM'])
  for col in df.columns.tolist():
    if col=='HH':
      df[col]=hr
    elif col=='MM':
      df[col]=min
    else:
      df[col]= (np.full(firstInd+1, np.nan)).tolist()+big_data_dict[key][col].tolist()+(np.full(endPad,np.nan)).tolist()

  big_data_dict[key]=df
  #print(df)


10_13_24:McLain
10_12_24:Lily
10_13_24:Swedetown
10_12_24:Swedetown
10_15_24:Lanse
10_14_24:Beach
10_14_24:Cliff
10_15_24:Beach
10_15_24:Cliff


In [333]:
# Combining Data into Plot
keys=[]
size=1.5
colors=['#d55e00','#cc79a7','#0072b2','#f0e442','#009e73']
if fn[-5]=='5': # L'Anse
  day=14
  site="L'Anse"
  site1='Lanse'
  site2='Cliff'
  site3='Beach'
  for key in big_data_dict.keys():
    if key.split(':')[1]==site1 or key.split(':')[1]==site2 or key.split(':')[1]==site3:
      keys.append(key)

else:
  day=12
  site='McLain'
  site1=site
  site2='Lily'
  site3='Swedetown'
  for key in big_data_dict.keys():
    if key.split(':')[1]==site1 or key.split(':')[1]==site2 or key.split(':')[1]==site3:
      keys.append(key)

for i in range(0,2):
  # Buoy Data
  newData=iop_days.where(iop_days['DD'].astype('int')==day)
  if day==14 or day==12:
    newData=newData[:68]
  else:
    newData=newData[-69:]

  fig=plt.figure(dpi=150,figsize=(10,5))
  ax=plt.axes()

  # HOBO Data
  hobos={}
  k=0
  for key in keys:
    hobo=big_data_dict[key]
    dates=hobo['TimeUTC']
    sites=key.split(':')[1]
    day=int(day)

    if sites!='McLain' and sites!='Lanse' and key.split('_')[1]!=str(day):
      continue

    if sites=="McLain" or sites=='Lanse':
      if day==14 or day==12:
        hobo=hobo[:1440]
      else:
        hobo=hobo[1440:]

    dates=hobo['TimeUTC']
    time=(hobo['HH']+':'+hobo['MM'])
    if sites=='Lanse':
      sites="L'Anse"
    elif sites=='Beach':
      sites='Second Sand Beach'
    elif sites=='Cliff':
      sites='Baraga Cliffs'
    elif sites=='Lily':
      sites='Lily Pond'
    ax.plot(time,hobo['Temperature'].astype('float64'),label=sites,linewidth=size,c=colors[k])
    k+=1

  buoyTime=time.tolist()[::20]
  buoyTime=buoyTime[3:-1]

  try:
    ax.plot(buoyTime,newData['WTMP'].astype('float64'),label='Bouy Lake',linewidth=size+0.5,c='gold')
    ax.plot(buoyTime,newData['ATMP'].astype('float64'),label='Buoy Air',linewidth=size+0.5,c=colors[4])
  except:
    print(len(buoyTime))
    print(len(newData['WTMP']))
    ax.plot(buoyTime,newData['WTMP'][:-1].astype('float64'),label='Bouy Lake',linewidth=size+0.5,c='gold')
    ax.plot(buoyTime,newData['ATMP'][:-1].astype('float64'),label='Buoy Air',linewidth=size+0.5,c=colors[4])

  zoom=False
  if zoom:
    skip=60
  else:
    skip=180
  ax.set_title(f"October {day}, 2024")
  xlabels=((hobo['HH'].astype('str')+':'+hobo['MM'].astype('str'))[::skip])
  ax.set_xticks(np.arange(0,1440,skip),labels=xlabels, rotation=45)
  plt.suptitle(f"{site} HOBO and Buoy Temperatures")
  ax.set_ylabel('Temperature (degC)')
  ax.set_xlabel('UTC Time (EDT+4)')
  pos = ax.get_position()
  ax.set_position([pos.x0, pos.y0, pos.width * 0.9, pos.height])
  ax.legend(loc='center right', bbox_to_anchor=(1.3, 0.9))
  if zoom:
    ax.set_xlim('14:00','23:00')
  #plt.show()
  plt.close()
  day+=1

68
69
