In [1]:
import pandas as pd
from datetime import datetime
import numpy as np



A client asks you to develop a frost risk model for their strawberry farm in Plant City, Florida.  As a first step in that process, you would like to understand the occurrence of these events by month and some information about the atmospheric conditions that generate frost in the location based on climatological records.  You remember that your ATMS 523 professor back in graduate school taught you some tricks that might be able to help. Using the code provided from Module 3, load in the GHCN-D daily temperature records from Plant City (Station USC00087205).

Strawberries are planted around October 1 and ready for harvest by the end of January. What is the mean risk of frost and freeze, defined as the mean number of days per month over the period 1991-2020 that the temperature has been observed to be less than or equal to 32 and 28 degrees Fahrenheit, respectively, that might damage the plants for each month during this period? (25 points)


In [2]:

dfraw = pd.read_parquet(
    f"s3://noaa-ghcn-pds/parquet/by_station/STATION={'USC00087205'}/",
    storage_options={"anon": True},  # passed to `s3fs.S3FileSystem`
)
# Make date the index
dfraw['DATE'] = dfraw['DATE'].apply(lambda x: datetime.strptime(x, '%Y%m%d'))
dfraw = dfraw.set_index('DATE').sort_index()

dfraw.head(10)



Unnamed: 0_level_0,ID,DATA_VALUE,M_FLAG,Q_FLAG,S_FLAG,OBS_TIME,ELEMENT
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1892-09-01,USC00087205,322,,,6,,TMAX
1892-09-01,USC00087205,206,,,6,,TMIN
1892-09-02,USC00087205,317,,,6,,TMAX
1892-09-02,USC00087205,206,,,6,,TMIN
1892-09-03,USC00087205,317,,,6,,TMAX
1892-09-03,USC00087205,211,,,6,,TMIN
1892-09-04,USC00087205,322,,,6,,TMAX
1892-09-04,USC00087205,217,,,6,,TMIN
1892-09-05,USC00087205,333,,,6,,TMAX
1892-09-05,USC00087205,211,,,6,,TMIN


In [3]:
#only minimum temp data needed, from 1991-2020, months of oct nov dec and jan. 
df1= dfraw[(dfraw.index.year >= 1991) & (dfraw.index.year <= 2020)]
df2= df1[df1.index.month.isin([1, 10, 11, 12])]

df = df2[(df2['ELEMENT'] == 'TMIN')]

df['temperature'] = (9/5)*(df['DATA_VALUE']/10) + 32

# Filtering days with temperature <= 32°F
cold_days_32 = df[df['temperature'] <= 32]

# Filtering days with temperature <= 28°F
cold_days_28 = df[df['temperature'] <= 28]

# Group by month and count days for temperature <= 32°F
monthly_cold_days_32 = cold_days_32.groupby(pd.Grouper(freq='M')).size()

# Group by month and count days for temperature <= 28°F
monthly_cold_days_28 = cold_days_28.groupby(pd.Grouper(freq='M')).size()

# Calculate mean days per month across years for each threshold
mean_cold_days_per_month_32 = monthly_cold_days_32.groupby(monthly_cold_days_32.index.month).mean()
mean_cold_days_per_month_28 = monthly_cold_days_28.groupby(monthly_cold_days_28.index.month).mean()

mean_cold_days = pd.DataFrame({
    'Avg Days <= 32°F (FROST)': mean_cold_days_per_month_32,
    'Avg Days <= 28°F (FREEZE)': mean_cold_days_per_month_28
})


########################
#ANSWER TO QUESTION 1 ##
########################

print(mean_cold_days)


      Avg Days <= 32°F (FROST)  Avg Days <= 28°F (FREEZE)
DATE                                                     
1                     1.931034                   0.652174
2                     0.000000                   0.000000
3                     0.000000                   0.000000
4                     0.000000                   0.000000
5                     0.000000                   0.000000
6                     0.000000                   0.000000
7                     0.000000                   0.000000
8                     0.000000                   0.000000
9                     0.000000                   0.000000
10                    0.000000                   0.000000
11                    0.035714                   0.000000
12                    0.642857                   0.217391


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['temperature'] = (9/5)*(df['DATA_VALUE']/10) + 32


 #Part 2
 
 To begin to explore the seasonal to sub-seasonal prediction of freeze events at this site, using code you adapt from Module 4, obtain the monthly ENSO and NAO index for each freeze event identified in part 1.  Compute the number of freeze events for ENSO index values < -0.5, -0.5 to 0.5, and > 0.5, and print or display these values to a table.  Repeat for NAO index values in the same ranges. Do you see any relationship of freezes at this location with these two climate indicators? (25 points)


In [4]:
#Load in ENSO and NAO Data

enso = pd.read_csv('https://www.esrl.noaa.gov/psd/data/correlation/censo.data',delim_whitespace=True,header=None,skiprows=1,skipfooter=2, engine='python')
nao = pd.read_csv('https://www.esrl.noaa.gov/psd/data/correlation/nao.data',delim_whitespace=True,header=None,skiprows=1,skipfooter=3, engine='python')

enso.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,1948,0.26,0.43,0.72,0.21,0.24,0.41,0.08,0.26,0.45,-0.64,-0.21,0.58
1,1949,0.18,0.13,-0.55,0.15,0.43,0.0,-0.06,0.08,-0.42,-0.59,-0.29,-0.91
2,1950,-0.74,-1.7,-1.43,-1.29,-1.22,-1.69,-1.69,-0.96,-0.95,-1.27,-1.12,-1.61
3,1951,-1.13,-0.69,-0.24,0.33,0.71,0.19,1.32,1.01,1.1,1.15,0.92,0.83
4,1952,0.64,0.56,0.07,0.71,-0.5,-0.74,-0.54,-0.12,-0.03,-0.06,-0.02,0.41


In [5]:
#Format ENSO Data
enso_new=pd.DataFrame()
enso_new['Date']=pd.date_range(start=pd.to_datetime('1948-01-01'),end=pd.to_datetime('2023-12-31'),freq="MS")
enso_new = enso_new.set_index('Date')
len(enso.loc[:,1:].stack().values)
enso_new['ENSO']=enso.loc[:,1:].stack().values


In [6]:
#Do the number of freeze events based on ENSO criteria! 

enso_df = enso_new['1991-01-01':'2020-12-31']

cold_days_28 = df[df['temperature'] <= 28]

cold_days_28.index = cold_days_28.index.to_period('M')
enso_df.index = enso_df.index.to_period('M')

enso_df
cold_days_28.index.name = 'Date'

enso_freeze = pd.merge(enso_df,cold_days_28,left_index=True, right_index=True, how='inner')

enso_freeze.head(10)

# Count rows where ENSO < -0.5
count_less_than_minus_half = (enso_freeze['ENSO'] < -0.5).sum()

# Count rows where ENSO is between -0.5 and 0.5 (inclusive)
count_between_minus_half_and_half = ((enso_freeze['ENSO'] >= -0.5) & (enso_freeze['ENSO'] <= 0.5)).sum()

# Count rows where ENSO > 0.5
count_greater_than_half = (enso_freeze['ENSO'] > 0.5).sum()

# Print results
print(f"Count of freeze events when ENSO < -0.5: {count_less_than_minus_half}")
print(f"Count of freeze events when ENSO between -0.5 and 0.5: {count_between_minus_half_and_half}")
print(f"Count of freeze events when ENSO > 0.5: {count_greater_than_half}")

print('Analysis: It appears Freeze Events are more likely during La-Nina Events! ')


Count of freeze events when ENSO < -0.5: 11
Count of freeze events when ENSO between -0.5 and 0.5: 4
Count of freeze events when ENSO > 0.5: 5
Analysis: It appears Freeze Events are more likely during La-Nina Events! 


In [7]:
# REPEAT EVERYTHING NOW FOR NAO 

#Format NAO Data
nao_new=pd.DataFrame()
nao_new['Date']=pd.date_range(start=pd.to_datetime('1948-01-01'),end=pd.to_datetime('2024-12-31'),freq="MS")
nao_new = nao_new.set_index('Date')
len(nao.loc[:,1:].stack().values)
nao_new['NAO']=nao.loc[:,1:].stack().values



In [8]:
#Do the number of freeze events based on NAO criteria! 

nao_df = nao_new['1991-01-01':'2020-12-31']

cold_days_28 = df[df['temperature'] <= 28]

cold_days_28.index = cold_days_28.index.to_period('M')
nao_df.index = nao_df.index.to_period('M')

nao_df
cold_days_28.index.name = 'Date'

nao_freeze = pd.merge(nao_df,cold_days_28,left_index=True, right_index=True, how='inner')

nao_freeze.head(10)

# Count rows where NAO < -0.5
count_less_than_minus_half = (nao_freeze['NAO'] < -0.5).sum()

# Count rows where NAO is between -0.5 and 0.5 (inclusive)
count_between_minus_half_and_half = ((nao_freeze['NAO'] >= -0.5) & (nao_freeze['NAO'] <= 0.5)).sum()

# Count rows where NAO> 0.5
count_greater_than_half = (nao_freeze['NAO'] > 0.5).sum()

# Print results
print(f"Count of freeze events when NAO < -0.5: {count_less_than_minus_half}")
print(f"Count of freeze events when NAO between -0.5 and 0.5: {count_between_minus_half_and_half}")
print(f"Count of freeze events when NAO > 0.5: {count_greater_than_half}")

print('Analysis: It appears Freeze Events are more likely when NAO is more negative (below -0.5)! ')

Count of freeze events when NAO < -0.5: 11
Count of freeze events when NAO between -0.5 and 0.5: 6
Count of freeze events when NAO > 0.5: 3
Analysis: It appears Freeze Events are more likely when NAO is more negative (below -0.5)! 
