# [Plant Hardiness Zones](https://kcaylor.github.io/EDS-217-Essential-Python.github.io/course-materials/eod-practice/eod-day7.html)

end of day, 09/11/2024

In [None]:
import matplotlib.pyplot as plt

In [None]:
import pandas as pd
import matplotlib as plt
import seaborn as sns

zipcode_data = pd.read_csv('https://prism.oregonstate.edu/projects/phm_data/phzm_us_zipcode_2023.csv')
survey_data = pd.read_csv('https://prism.oregonstate.edu/projects/public/phm/2012/phm_us_zipcode_2012.csv')
zipcodes = pd.read_csv('http://uszipcodelist.com/zip_code_database.csv')


print(zipcode_data.head())
print(survey_data.head())
print(zipcodes.head())

In [None]:
# Ensure the zipcode or zip column in all datasets is a 5-character string, filling in any zeros that were dropped.
zipcode_data.dtypes

# need to add leading 0s to all values that are less than 5 characters
# string.zfill(5)

zipcode_data['zipcode'] = zipcode_data['zipcode'].values.astype('str')
zipcode_data.dtypes

zipcode_data['zipcode'] = zipcode_data['zipcode'].str.zfill(5)
zipcode_data['zipcode']

In [None]:
survey_data['zipcode'] = survey_data['zipcode'].values.astype('str')
survey_data['zipcode'] = survey_data['zipcode'].str.zfill(5)
survey_data.head()

In [None]:
# Combine the 2012 and 2023 data together by adding a year column and then stacking them together.

print(zipcode_data.columns)
print(survey_data.columns)

zipcode_data['year'] = 2023
survey_data['year'] = 2012

In [None]:
survey_data.head()

In [None]:
#merged = pd.merge(zipcode_data, survey_data, on = 'year', how = 'outer')

stacked = pd.concat([zipcode_data, survey_data])
stacked

In [None]:
# In the combined plant hardiness dataframe: 
# create two new columns, trange_min and trange_max, containing the min and max temperatures of the trange column 
# Remove the original trange column

stacked[['trange_min', 'trash', 'trange_max']] = stacked['trange'].str.split(expand = True)


In [None]:
stacked = stacked.drop(['trange', 'trash'], axis=1)
stacked

In [None]:
# On average, how much has the minimum temperature in a zip code changed from 2012 to 2023?
# groupy by year - take the mean of trange min 


stacked[['trange_min', 'trange_max']] = stacked[['trange_min', 'trange_max']].values.astype('int')
stacked.dtypes


#min_temp_change = stacked.groupby('year')['trange_min'].agg('mean')
#min_temp_change


In [None]:
#min_temp_change = stacked.groupby('year')['trange_min'].mean()
#in_temp_change

In [None]:
zipcodes['zip'] = zipcodes['zip'].values.astype('str')
zipcodes['zip'] = zipcodes['zip'].str.zfill(5)
zipcodes.rename(columns={'zip':'zipcode'}, inplace=True)

zipcodes.head()

In [None]:
# Merge together the combined plant hardiness dataset and the zipcode dataset by zipcode.

hardiness_zips = pd.merge(stacked, zipcodes, on = 'zipcode', how = 'outer')
hardiness_zips.head()

In [None]:

map_data = hardiness_zips[hardiness_zips['longitude'] < -60]
map_data

In [None]:
# Create two scatter plot where:
# the x axis is the longitude, the y axis is the latitude, the color is based on the minimum temperature in 2012 for one and 2023 for the other 
# Only look at longitude < -60

#ax = sns.relplot(
    #data=map_data, kind="scatter",
    #x="longitude", y="latitude", col="year",
    #hue="trange_min"
#)


# Create scatter plot for 2012
plt.figure(figsize=(10, 6))
sns.scatterplot(data=map_data[map_data['year'] == 2012], 
                x='longitude', y='latitude', hue='trange_min', palette='coolwarm', s=10)
plt.title('Minimum Temperature in 2012 by Latitude and Longitude')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.legend(title='Min Temp (2012)')
plt.show()

# Create scatter plot for 2023
plt.figure(figsize=(10, 6))
sns.scatterplot(data=map_data[map_data['year'] == 2023], 
                x='longitude', y='latitude', hue='trange_min', palette='coolwarm', s=10)
plt.title('Minimum Temperature in 2023 by Latitude and Longitude')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.legend(title='Min Temp (2023)')
plt.show()

In [None]:
# Now create a single scatter plot where you look at the difference between the minimum temperature in 2012 and 2023
# Only look at longitude < -60. Color any zipcodes where you do not have information from both years in grey

# Find the difference in minimum temperature between 2023 and 2012
df_diff = map_data.pivot_table(index=['zipcode', 'latitude', 'longitude'], 
                                  columns='year', values='trange_min').reset_index()
df_diff['temp_diff'] = df_diff[2023] - df_diff[2012]

# Create a scatter plot showing the difference
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df_diff, x='longitude', y='latitude', hue='temp_diff', 
                palette='coolwarm', s=10, edgecolor='gray', legend='full')
plt.title('Temperature Difference (2023 - 2012) by Latitude and Longitude')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.legend(title='Temp Difference')
plt.show()

In [None]:
#stacked_pivot = hardiness_zips.pivot(index= ['zipcode', 'latitude', 'longitude'], columns = 'year', values = 'trange_min')

In [None]:
#stacked_pivot.reset_index()

In [None]:
stacked_pivot['diff'] = stacked_pivot[2023.0] - stacked_pivot[2012.0]
stacked_pivot

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(data=stacked_pivot[stacked_pivot['year'] == 2023], 
                x='longitude', y='latitude', hue='trange_min', palette='coolwarm', s=10)
plt.title('Minimum Temperature in 2023 by Latitude and Longitude')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.legend(title='Min Temp (2023)')
plt.show()