In [1]:
!pip install geocoder

Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl.metadata (14 kB)
Collecting ratelim (from geocoder)
  Downloading ratelim-0.1.6-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.6/98.6 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [2]:
import pandas as pd
from geocoder import arcgis

WEATHER = '/kaggle/input/weather-data/weather_data.csv'

df = pd.read_csv(filepath_or_buffer=WEATHER, parse_dates=['Date_Time'])
# we need to give the geocoder a little extra information for it to get San Jose right
df['Location'] = df['Location'].apply(lambda x: 'San Jose CA' if x == 'San Jose' else x)
location_df = df[['Location']].groupby(by='Location').first().reset_index()
location_df['latlng'] = location_df['Location'].apply(func=lambda x: arcgis(location=x).latlng)
location_df['latitude'] = location_df['latlng'].apply(func=lambda x: x[0])
location_df['longitude'] = location_df['latlng'].apply(func=lambda x: x[1])
df = df.merge(right=location_df, on='Location', how='inner').drop(columns=['latlng'])
df['Location'] = df['Location'].apply(func=lambda x: 'San Jose' if x == 'San Jose CA' else x)
df.head()

Unnamed: 0,Location,Date_Time,Temperature_C,Humidity_pct,Precipitation_mm,Wind_Speed_kmh,latitude,longitude
0,San Diego,2024-01-14 21:12:46,10.683001,41.195754,4.020119,8.23354,32.71576,-117.163817
1,San Diego,2024-05-17 15:22:10,8.73414,58.319107,9.111623,27.715161,32.71576,-117.163817
2,San Diego,2024-05-11 09:30:59,11.632436,38.820175,4.607511,28.732951,32.71576,-117.163817
3,Philadelphia,2024-02-26 17:32:39,-8.628976,54.074474,3.18372,26.367303,39.95106,-75.16562
4,San Antonio,2024-04-29 13:23:51,39.808213,72.899908,9.598282,29.898622,29.425171,-98.494614


In [3]:
df.dtypes

Location                    object
Date_Time           datetime64[ns]
Temperature_C              float64
Humidity_pct               float64
Precipitation_mm           float64
Wind_Speed_kmh             float64
latitude                   float64
longitude                  float64
dtype: object

In [4]:
from plotly import express

mean_df = df.drop(columns=['Date_Time']).groupby(by='Location').mean().reset_index()
mean_df

Unnamed: 0,Location,Temperature_C,Humidity_pct,Precipitation_mm,Wind_Speed_kmh,latitude,longitude
0,Chicago,15.009208,60.088798,5.003164,14.999501,41.883229,-87.632398
1,Dallas,15.009097,59.9599,4.997508,15.022001,32.777977,-96.796215
2,Houston,14.942985,59.967598,4.997757,14.989935,29.760803,-95.369506
3,Los Angeles,15.081063,60.072151,5.004491,15.03512,34.052238,-118.243344
4,New York,15.013389,60.044688,4.993057,14.989398,40.713047,-74.00723
5,Philadelphia,15.035892,60.044902,5.008561,14.990663,39.95106,-75.16562
6,Phoenix,12.792479,60.048643,6.087485,14.983904,33.448204,-112.072585
7,San Antonio,15.027761,59.932798,5.008091,14.956579,29.425171,-98.494614
8,San Diego,14.933539,60.072074,5.006593,15.037633,32.71576,-117.163817
9,San Jose,14.956108,59.98663,4.987398,14.971369,37.334794,-121.888145


In [5]:
df.columns

Index(['Location', 'Date_Time', 'Temperature_C', 'Humidity_pct',
       'Precipitation_mm', 'Wind_Speed_kmh', 'latitude', 'longitude'],
      dtype='object')

In [6]:
from plotly import express
for color in ['Temperature_C', 'Humidity_pct', 'Precipitation_mm', 'Wind_Speed_kmh',]:
    express.scatter_mapbox(data_frame=mean_df, lat='latitude', lon='longitude', color=color, hover_name='Location', mapbox_style='open-street-map', zoom=3.8, title='mean ' + color, height=800).update_traces(marker={'size': 15}).show()