In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import re
import plotly.express as px

In [26]:
# !pip install -U kaleido

Collecting kaleido
  Downloading kaleido-0.2.1-py2.py3-none-macosx_10_11_x86_64.whl (85.2 MB)
[K     |████████████████████████████████| 85.2 MB 3.3 MB/s eta 0:00:011.4 MB/s eta 0:00:06��██████████████▉  | 79.4 MB 12.1 MB/s eta 0:00:01
[?25hInstalling collected packages: kaleido
Successfully installed kaleido-0.2.1


### Planting Zone:

In [2]:
url = 'https://raw.githubusercontent.com/waldoj/frostline/050a887ca34459b9aed986f244a2f92835a21cde/hardiness_zones.csv'

df = pd.read_csv(url, dtype={'zip_code': 'string'})

In [3]:
df

Unnamed: 0,zip_code,zone,city,state,latitude,longitude
0,00705,12b,AIBONITO,PR,18.14,-66.26
1,00610,12b,ANASCO,PR,18.28,-67.14
2,00612,12b,ARECIBO,PR,18.45,-66.73
3,00601,11b,ADJUNTAS,PR,18.16,-66.72
4,00602,12b,AGUADA,PR,18.38,-67.18
...,...,...,...,...,...,...
29749,30293,8a,WOODBURY,GA,32.98,-84.58
29750,30188,7b,WOODSTOCK,GA,34.10,-84.51
29751,30189,7b,WOODSTOCK,GA,34.10,-84.51
29752,30582,7a,YOUNG HARRIS,GA,34.93,-83.84


In [86]:
df.zip_code.unique()

<StringArray>
['00705', '00610', '00612', '00601', '00602', '00603', '00703', '00704',
 '07675', '07677',
 ...
 '30185', '30292', '30680', '30187', '30683', '30293', '30188', '30189',
 '30582', '30295']
Length: 29754, dtype: string

In [4]:
df['subzone'] = df.zone.str[-1]
df['zone']=df.zone.str[:-1].astype('int')

In [26]:
fig = px.scatter_geo(df, lat="latitude", lon="longitude", color="zone",
                     height=700, width=1000)  
#                      projection="natural earth" hover_name="zone", size="zone",
fig.update_layout(
        title_text = 'Planting Zone',
        showlegend = True,
        geo = dict(
            scope = 'usa',
            landcolor = '#0A6007',
        )
    )
fig.update_geos(
    visible=True,
    resolution=50
)

#,
#     showlakes=True, lakecolor="Blue",
#     showrivers=True, rivercolor="Blue",
#     showocean = True,oceancolor="#2695A0",
fig.show()


In [27]:
fig.write_image('us_planting_zone.png')

### Temperature, precipitation

In [10]:
df_temp = pd.read_csv('temp_city_mavg_geo.csv', index_col=0)
sub = df_temp[df_temp['month']==1]
sub

Unnamed: 0,month,tmax,tmin,prcp,city_name,ID,Name,Lat,Lon
0,1,25.180000,6.267742,0.016194,Aberdeen,USW00014929,Aberdeen,45.4558,-98.4131
12,1,58.469677,34.203871,0.037548,Abilene,USW00013962,Abilene,32.4106,-99.6822
13,1,58.469677,34.203871,0.037548,Abilene,USW00013962,Abilene,32.4106,-99.6822
36,1,57.228739,35.792137,0.184689,Aiken,USC00380072,Aiken,33.5500,-81.6969
37,1,57.228739,35.792137,0.184689,Aiken,USC00380072,Aiken,33.5500,-81.6969
...,...,...,...,...,...,...,...,...,...
5352,1,70.149032,47.836774,0.014065,Yuma,USW00003145,Yuma,32.6500,-114.6167
5353,1,70.149032,47.836774,0.014065,Yuma,USW00003145,Yuma,32.6500,-114.6167
5354,1,70.149032,47.836774,0.014065,Yuma,USW00003145,Yuma,32.6500,-114.6167
5388,1,39.943441,24.869247,0.088080,Zaneville,USW00093824,Zaneville,39.9444,-81.8922


In [13]:
sub = sub.drop_duplicates()
sub

Unnamed: 0,month,tmax,tmin,prcp,city_name,ID,Name,Lat,Lon
0,1,25.180000,6.267742,0.016194,Aberdeen,USW00014929,Aberdeen,45.4558,-98.4131
12,1,58.469677,34.203871,0.037548,Abilene,USW00013962,Abilene,32.4106,-99.6822
36,1,57.228739,35.792137,0.184689,Aiken,USC00380072,Aiken,33.5500,-81.6969
60,1,37.357419,23.829032,0.080194,Akron,USW00014895,Akron,40.9167,-81.4333
84,1,34.049677,19.036774,0.088194,Albany,USW00014735,Albany,42.7431,-73.8092
...,...,...,...,...,...,...,...,...,...
5268,1,43.236129,27.128387,0.095806,Wilmington,USW00013781,Wilmington,39.6728,-75.6008
5304,1,45.519355,23.430968,0.033226,Winnemucca,USW00024128,Winnemucca,40.9017,-117.8081
5328,1,33.915973,20.381112,0.110558,Worcester,USW00094746,Worcester,42.2706,-71.8731
5352,1,70.149032,47.836774,0.014065,Yuma,USW00003145,Yuma,32.6500,-114.6167


In [17]:
sub.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 210 entries, 0 to 5388
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   month      210 non-null    int64  
 1   tmax       210 non-null    float64
 2   tmin       210 non-null    float64
 3   prcp       210 non-null    float64
 4   city_name  199 non-null    object 
 5   ID         199 non-null    object 
 6   Name       199 non-null    object 
 7   Lat        199 non-null    float64
 8   Lon        199 non-null    float64
dtypes: float64(5), int64(1), object(3)
memory usage: 16.4+ KB


In [35]:
fig_temp = px.scatter_geo(sub, lat="Lat", lon="Lon", color="tmax", size="tmax",
                     height=700, width=1000)
#                      projection="natural earth" hover_name="zone", 
fig_temp.update_layout(
        title_text = 'Max temperature by city',
        showlegend = True,
        geo = dict(
            scope = 'usa',
            landcolor = '#0A6007',
        )
    )
fig_temp.update_geos(
    visible=True,
    resolution=50
)

#,
#     showlakes=True, lakecolor="Blue",
#     showrivers=True, rivercolor="Blue",
#     showocean = True,oceancolor="#2695A0",
fig_temp.show()


In [36]:
fig_temp.write_image('max_temp.png')

In [37]:
fig_prec = px.scatter_geo(sub, lat="Lat", lon="Lon", color="prcp", size="prcp",
                     height=700, width=1000)
#                      projection="natural earth" hover_name="zone", 
fig_prec.update_layout(
        title_text = 'Precipitation by city',
        showlegend = True,
        geo = dict(
            scope = 'usa',
            landcolor = '#0A6007',
        )
    )
fig_prec.update_geos(
    visible=True,
    resolution=50
)

#,
#     showlakes=True, lakecolor="Blue",
#     showrivers=True, rivercolor="Blue",
#     showocean = True,oceancolor="#2695A0",
fig_prec.show()


In [38]:
fig_prec.write_image('prec.png')

In [39]:
sub['tmin_justed'] = sub['tmin'] + abs(sub.tmin.min())
sub.tmin_justed.min()

0.0

In [40]:
fig_mintemp = px.scatter_geo(sub, lat="Lat", lon="Lon", color="tmin_justed", size="tmin_justed",
                     height=700, width=1000)
#                      projection="natural earth" hover_name="zone", 
fig_mintemp.update_layout(
        title_text = 'Min Temperature by City',
        showlegend = True,
        geo = dict(
            scope = 'usa',
            landcolor = '#0A6007',
        )
    )
fig_mintemp.update_geos(
    visible=True,
    resolution=50
)

#,
#     showlakes=True, lakecolor="Blue",
#     showrivers=True, rivercolor="Blue",
#     showocean = True,oceancolor="#2695A0",
fig_mintemp.show()


In [41]:
fig_mintemp.write_image('min_temp.png')

In [48]:
station_id = pd.read_csv('city_temp_prec.csv', index_col=0).drop(columns=['year', 'month', 'tmax', 'tmin', 'prcp'])
station_id 
station_id = station_id.replace({'name':{'EUGENE': 'Eugene'}})
station_id['name'] = station_id['name'].apply(lambda x: re.sub(r"(\w)([A-Z])", r"\1 \2", x))

station_id = station_id.drop_duplicates()
station_id

Unnamed: 0,ID,name
0,USW00024021,Lander
0,USW00024018,Cheyenne
0,USW00014897,Wausau
0,USW00094973,Hayward
0,USW00014991,Eau Claire
...,...,...
0,USW00013896,Muscle Shoals
0,USW00013895,Montgomery
0,USW00013838,Mobile
0,USW00003856,Huntsville


In [49]:
temp = temp.merge(station_id, how='left', left_on = 'city_name', right_on = 'name')
temp

Unnamed: 0,name_x,month,year,tmax,tmin,prcp,name_split,state_name,city_name,ID,name_y
0,Aberdeen,1,2019.0,25.180000,6.267742,0.016194,[Aberdeen],,Aberdeen,USW00014929,Aberdeen
1,Aberdeen,2,2019.0,24.024507,2.581773,0.017259,[Aberdeen],,Aberdeen,USW00014929,Aberdeen
2,Aberdeen,3,2019.0,41.161935,20.584516,0.028581,[Aberdeen],,Aberdeen,USW00014929,Aberdeen
3,Aberdeen,4,2019.0,55.457333,29.265333,0.048533,[Aberdeen],,Aberdeen,USW00014929,Aberdeen
4,Aberdeen,5,2019.0,70.983226,44.136774,0.062065,[Aberdeen],,Aberdeen,USW00014929,Aberdeen
...,...,...,...,...,...,...,...,...,...,...,...
2515,Zaneville,8,2019.0,83.398065,62.996129,0.126774,[Zaneville],,Zaneville,USW00093824,Zaneville
2516,Zaneville,9,2019.0,78.850000,56.886000,0.092467,[Zaneville],,Zaneville,USW00093824,Zaneville
2517,Zaneville,10,2019.0,67.499785,46.855140,0.113112,[Zaneville],,Zaneville,USW00093824,Zaneville
2518,Zaneville,11,2019.0,51.367333,32.136667,0.102267,[Zaneville],,Zaneville,USW00093824,Zaneville


In [53]:
# temp = temp.drop(columns=['state_name'])# 'name_y', 'year', 'name_split', 'name_x'
# temp.to_csv('temp_city_stationID.csv')
temp

Unnamed: 0,month,tmax,tmin,prcp,city_name,ID
0,1,25.180000,6.267742,0.016194,Aberdeen,USW00014929
1,2,24.024507,2.581773,0.017259,Aberdeen,USW00014929
2,3,41.161935,20.584516,0.028581,Aberdeen,USW00014929
3,4,55.457333,29.265333,0.048533,Aberdeen,USW00014929
4,5,70.983226,44.136774,0.062065,Aberdeen,USW00014929
...,...,...,...,...,...,...
2515,8,83.398065,62.996129,0.126774,Zaneville,USW00093824
2516,9,78.850000,56.886000,0.092467,Zaneville,USW00093824
2517,10,67.499785,46.855140,0.113112,Zaneville,USW00093824
2518,11,51.367333,32.136667,0.102267,Zaneville,USW00093824


In [54]:
# zone['city'] = zone.city.str.title()
zone

Unnamed: 0,zip_code,zone,city,state,latitude,longitude
0,00705,12b,AIBONITO,PR,18.14,-66.26
1,00610,12b,ANASCO,PR,18.28,-67.14
2,00612,12b,ARECIBO,PR,18.45,-66.73
3,00601,11b,ADJUNTAS,PR,18.16,-66.72
4,00602,12b,AGUADA,PR,18.38,-67.18
...,...,...,...,...,...,...
29749,30293,8a,WOODBURY,GA,32.98,-84.58
29750,30188,7b,WOODSTOCK,GA,34.10,-84.51
29751,30189,7b,WOODSTOCK,GA,34.10,-84.51
29752,30582,7a,YOUNG HARRIS,GA,34.93,-83.84


In [7]:
# !ls

city_temp_prec.csv     state_ab.csv           temp_mon_avg.csv
city_temp_precip.ipynb station_state.csv      [34mtemp_prec[m[m


In [18]:
# df = pd.read_csv('station_state.csv')
# df

Unnamed: 0,CITY,STATE,ID,START_DATE
0,Birmingham,Alabama,USW00013876,193001
1,Huntsville,Alabama,USW00003856,195809
2,Mobile,Alabama,USW00013894,194801
3,Montgomery,Alabama,USW00013895,194801
4,Anchorage,Alaska,USW00026451,195204
...,...,...,...,...
210,Wausau,Wisconsin,USW00014897,194112
211,Casper,Wyoming,USW00024089,194808
212,Cheyenne,Wyoming,USH00481675,189501
213,Lander,Wyoming,USW00024021,194801


In [55]:
# state = pd.read_csv('state_ab.csv')
# state = state.drop(columns=['Abbrev'])
# state

In [19]:
# df = df.merge(state, how='left', left_on = ['STATE'], right_on = ['State'])

Unnamed: 0,CITY,STATE,ID,START_DATE,State,Code
0,Birmingham,Alabama,USW00013876,193001,Alabama,AL
1,Huntsville,Alabama,USW00003856,195809,Alabama,AL
2,Mobile,Alabama,USW00013894,194801,Alabama,AL
3,Montgomery,Alabama,USW00013895,194801,Alabama,AL
4,Anchorage,Alaska,USW00026451,195204,Alaska,AK
...,...,...,...,...,...,...
210,Wausau,Wisconsin,USW00014897,194112,Wisconsin,WI
211,Casper,Wyoming,USW00024089,194808,Wyoming,WY
212,Cheyenne,Wyoming,USH00481675,189501,Wyoming,WY
213,Lander,Wyoming,USW00024021,194801,Wyoming,WY


In [23]:
# df = df.drop(columns=['State', 'START_DATE', 'STATE'])
df_station = df

In [67]:
df_station.to_csv('city_state_code.csv')

In [57]:
# df = temp.merge(df_station, how='left', left_on='ID', right_on='ID')
# df

Unnamed: 0,month,tmax,tmin,prcp,city_name,ID,CITY,Code
0,1,25.180000,6.267742,0.016194,Aberdeen,USW00014929,,
1,2,24.024507,2.581773,0.017259,Aberdeen,USW00014929,,
2,3,41.161935,20.584516,0.028581,Aberdeen,USW00014929,,
3,4,55.457333,29.265333,0.048533,Aberdeen,USW00014929,,
4,5,70.983226,44.136774,0.062065,Aberdeen,USW00014929,,
...,...,...,...,...,...,...,...,...
2515,8,83.398065,62.996129,0.126774,Zaneville,USW00093824,,
2516,9,78.850000,56.886000,0.092467,Zaneville,USW00093824,,
2517,10,67.499785,46.855140,0.113112,Zaneville,USW00093824,,
2518,11,51.367333,32.136667,0.102267,Zaneville,USW00093824,,


In [63]:
len(temp.ID.unique())

200

In [66]:
# dft = df_temp.replace({'Name':{'EUGENE': 'Eugene'}})
# dft.Name.unique()
dft['Name'] = dft['Name'].apply(lambda x: re.sub(r"(\w)([A-Z])", r"\1 \2", str(x)))
# dft

In [71]:
# dft = dft.drop(columns=['city_name'])
dft.to_csv('good_temp.csv')

In [72]:
dft

Unnamed: 0,month,tmax,tmin,prcp,ID,Name,Lat,Lon
0,1,25.180000,6.267742,0.016194,USW00014929,Aberdeen,45.4558,-98.4131
1,2,24.024507,2.581773,0.017259,USW00014929,Aberdeen,45.4558,-98.4131
2,3,41.161935,20.584516,0.028581,USW00014929,Aberdeen,45.4558,-98.4131
3,4,55.457333,29.265333,0.048533,USW00014929,Aberdeen,45.4558,-98.4131
4,5,70.983226,44.136774,0.062065,USW00014929,Aberdeen,45.4558,-98.4131
...,...,...,...,...,...,...,...,...
5407,10,67.499785,46.855140,0.113112,USW00093824,Zaneville,39.9444,-81.8922
5408,11,51.367333,32.136667,0.102267,USW00093824,Zaneville,39.9444,-81.8922
5409,11,51.367333,32.136667,0.102267,USW00093824,Zaneville,39.9444,-81.8922
5410,12,44.502581,27.864516,0.089871,USW00093824,Zaneville,39.9444,-81.8922


In [73]:
df

Unnamed: 0,zip_code,zone,city,state,latitude,longitude,subzone
0,00705,12,Aibonito,PR,18.14,-66.26,b
1,00610,12,Anasco,PR,18.28,-67.14,b
2,00612,12,Arecibo,PR,18.45,-66.73,b
3,00601,11,Adjuntas,PR,18.16,-66.72,b
4,00602,12,Aguada,PR,18.38,-67.18,b
...,...,...,...,...,...,...,...
29749,30293,8,Woodbury,GA,32.98,-84.58,a
29750,30188,7,Woodstock,GA,34.10,-84.51,b
29751,30189,7,Woodstock,GA,34.10,-84.51,b
29752,30582,7,Young Harris,GA,34.93,-83.84,a


In [76]:
joined = df.merge(dft, how='inner', left_on = 'city', right_on = 'Name')
joined.drop_duplicates()

Unnamed: 0,zip_code,zone,city,state,latitude,longitude,subzone,month,tmax,tmin,prcp,ID,Name,Lat,Lon
0,10001,7,New York,NY,40.71,-73.99,b,1,41.082581,29.467742,0.095677,USW00094728,New York,40.7789,-73.9692
1,10001,7,New York,NY,40.71,-73.99,b,2,45.293547,32.461256,0.135946,USW00094728,New York,40.7789,-73.9692
2,10001,7,New York,NY,40.71,-73.99,b,3,50.217419,35.764516,0.138581,USW00094728,New York,40.7789,-73.9692
3,10001,7,New York,NY,40.71,-73.99,b,4,61.341333,45.450667,0.142333,USW00094728,New York,40.7789,-73.9692
4,10001,7,New York,NY,40.71,-73.99,b,5,70.906452,54.409032,0.146710,USW00094728,New York,40.7789,-73.9692
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91689,28810,7,Asheville,NC,35.57,-82.54,a,8,83.272903,64.612903,0.262710,USW00003812,Asheville,35.4319,-82.5375
91692,28810,7,Asheville,NC,35.57,-82.54,a,9,80.076667,59.964667,0.132267,USW00003812,Asheville,35.4319,-82.5375
91695,28810,7,Asheville,NC,35.57,-82.54,a,10,71.212903,50.203871,0.226516,USW00003812,Asheville,35.4319,-82.5375
91698,28810,7,Asheville,NC,35.57,-82.54,a,11,58.743333,36.028667,0.105600,USW00003812,Asheville,35.4319,-82.5375


In [77]:
sub_joined = joined[joined['month']==1]
sub_joined

Unnamed: 0,zip_code,zone,city,state,latitude,longitude,subzone,month,tmax,tmin,prcp,ID,Name,Lat,Lon
0,10001,7,New York,NY,40.71,-73.99,b,1,41.082581,29.467742,0.095677,USW00094728,New York,40.7789,-73.9692
12,10002,7,New York,NY,40.71,-73.99,b,1,41.082581,29.467742,0.095677,USW00094728,New York,40.7789,-73.9692
24,10003,7,New York,NY,40.71,-73.99,b,1,41.082581,29.467742,0.095677,USW00094728,New York,40.7789,-73.9692
36,10004,7,New York,NY,40.71,-73.99,b,1,41.082581,29.467742,0.095677,USW00094728,New York,40.7789,-73.9692
48,10005,7,New York,NY,40.71,-73.99,b,1,41.082581,29.467742,0.095677,USW00094728,New York,40.7789,-73.9692
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91633,28806,7,Asheville,NC,35.57,-82.54,a,1,48.823871,30.751613,0.135355,USW00003812,Asheville,35.4319,-82.5375
91634,28806,7,Asheville,NC,35.57,-82.54,a,1,48.823871,30.751613,0.135355,USW00003812,Asheville,35.4319,-82.5375
91668,28810,7,Asheville,NC,35.57,-82.54,a,1,48.823871,30.751613,0.135355,USW00003812,Asheville,35.4319,-82.5375
91669,28810,7,Asheville,NC,35.57,-82.54,a,1,48.823871,30.751613,0.135355,USW00003812,Asheville,35.4319,-82.5375


In [84]:
fig_joined = px.scatter_geo(sub_joined, lat="Lat", lon="Lon", color="tmax", size="tmax", 
                     height=700, width=1000)
#                      projection="natural earth" hover_name="zone", 
fig_joined.update_layout(
        title_text = 'joined zone and weather',
        showlegend = True,
        geo = dict(
            scope = 'usa',
            landcolor = '#0A6007',
        )
    )
fig_joined.update_geos(
    visible=True,
    resolution=50,
)

fig_joined.show()
