In [32]:
import requests
import re
import pandas as pd
import os
from datetime import date
import datetime
import json

# Settings

In [33]:
read_weather_from_file = False

# Read the data

In [34]:
df_sc = pd.read_csv(os.path.join("data", "sc_export.csv"))

# Parse timestamps

In [35]:
#date,hour = zip(*df_sc['timestamp'].map(lambda x: x.split('T')))
df_sc['timestamp'] = df_sc['timestamp'].map(lambda d:datetime.datetime.strptime(d,'%Y-%m-%dT%H:%M:%S'))

In [36]:
df_sc['day'] = df_sc['timestamp'].map(date.weekday)
df_sc['hour'] = pd.DatetimeIndex(df_sc['timestamp']).hour
df_sc['date'] = pd.DatetimeIndex(df_sc['timestamp']).date
df_sc['time'] = pd.DatetimeIndex(df_sc['timestamp']).hour + pd.DatetimeIndex(df_sc['timestamp']).minute/60 + pd.DatetimeIndex(df_sc['timestamp']).second/3600
df_sc['weekend'] = [1 if d == 5 or d==6 else 0 for d in df_sc['day']]
df_sc['yday'] = df_sc['date'].map(lambda x: x.timetuple().tm_yday)

In [37]:
# get the current day of the year
#doy = datetime.today().timetuple().tm_yday

# "day of year" ranges for the northern hemisphere
spring = range(80, 172)
summer = range(172, 264)
fall = range(264, 355)
# winter = everything else

def get_season(doy):
    if doy in spring:
        season = 'spring'
    elif doy in summer:
        season = 'summer'
    elif doy in fall:
        season = 'fall'
    else:
        season = 'winter'
    return season

df_sc['season'] = [get_season(x) for x in df_sc['yday']]

In [38]:
#df_sc['name'] = [str(lat)+","+str(lon) for lat, lon in zip(df_sc['lat'], df_sc['lon'])]

# Select relevant columns

In [39]:
selected = ["sensor_id", 
            "sensor_type", 
            "location", 
            "lat", "lon", 
            "timestamp", 
            "date", 
            "time", "hour", 
            "day", "weekend", 
            "season",
            "P1", "P2"]

df = df_sc[selected]

# Get weather data

In [9]:
weather_groups = {'Clear': 'dry',
                  'Partially cloudy': 'dry',
                  'Rain, Partially cloudy': 'wet',
                  'Rain': 'wet',
                  'Rain, Overcast': 'wet',
                  'Overcast': 'wet',
                  'Snow, Rain, Partially cloudy': 'wet'}

In [10]:
if not read_weather_from_file:
    with open('api_keys.json') as f:
        API_KEY = json.loads(f.read())['weather']

    lat_lon_s_id = set(zip(df_sc.lat, df_sc.lon, df_sc.sensor_id))
    days = df_sc.date.unique()

    df_weather = -1

    for lat,lon,sensor_id in lat_lon_s_id:
        min_day = min(days)
        max_day = max(days)
        print(min_day, max_day, lat, lon)
        try:
            url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{lat}%2C{lon}/{min_day}/{max_day}?unitGroup=metric&include=days&key={API_KEY}&contentType=csv"

            df2 = pd.read_csv(url)
            df2['sensor_id'] = sensor_id
            if type(df_weather) == int:
                df_weather = df2
            else:
                df_weather = pd.concat([df_weather, df2],ignore_index=True)
        except:
            print("Something is wrong...")

    df_weather.to_csv(os.path.join("data", "weather.csv"), index=False)
else:
    df_weather = pd.read_csv(os.path.join("data", "weather.csv"))


2021-11-29 2022-09-02 46.042 14.592
Something is wrong...
2021-11-29 2022-09-02 46.096 14.47
Something is wrong...


AttributeError: 'int' object has no attribute 'to_csv'

In [11]:
df_weather['weather'] = [weather_groups[x] for x in df_weather["conditions"]]

TypeError: 'int' object is not subscriptable

In [12]:
df_weather.rename(columns={"datetime": "date"}, inplace=True)

AttributeError: 'int' object has no attribute 'rename'

In [13]:
df_weather['date'] = pd.DatetimeIndex(df_weather['date']).date

TypeError: 'int' object is not subscriptable

In [14]:
selected = ["sensor_id","date", "temp", "tempmax", "tempmin", "conditions", "weather"]

In [15]:
df_weather = df_weather[selected]

TypeError: 'int' object is not subscriptable

In [40]:
if type(df_weather) == int:
    df_weather = pd.DataFrame(columns=selected)

# Add weather to sensor data

In [41]:
df = pd.merge(df, df_weather, how='left', on=['sensor_id', 'date'])

In [42]:
df.to_csv(os.path.join("data", "sc_weather.csv"), index=False)

In [43]:
df

Unnamed: 0,sensor_id,sensor_type,location,lat,lon,timestamp,date,time,hour,day,weekend,season,P1,P2,temp,tempmax,tempmin,conditions,weather
0,39982,SDS011,25670,44.782,10.356,2021-09-01 00:01:25,2021-09-01,0.023611,0,2,0,summer,4.88,4.13,,,,,
1,39982,SDS011,25670,44.782,10.356,2021-09-01 00:03:51,2021-09-01,0.064167,0,2,0,summer,5.35,4.90,,,,,
2,39982,SDS011,25670,44.782,10.356,2021-09-01 00:06:19,2021-09-01,0.105278,0,2,0,summer,5.05,4.75,,,,,
3,39982,SDS011,25670,44.782,10.356,2021-09-01 00:08:47,2021-09-01,0.146389,0,2,0,summer,5.28,4.85,,,,,
4,39982,SDS011,25670,44.782,10.356,2021-09-01 00:11:17,2021-09-01,0.188056,0,2,0,summer,4.97,4.45,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
543142,39982,SDS011,25670,44.782,10.356,2022-09-02 23:43:33,2022-09-02,23.725833,23,4,0,summer,5.45,4.95,,,,,
543143,39982,SDS011,25670,44.782,10.356,2022-09-02 23:46:03,2022-09-02,23.767500,23,4,0,summer,8.68,5.95,,,,,
543144,39982,SDS011,25670,44.782,10.356,2022-09-02 23:48:41,2022-09-02,23.811389,23,4,0,summer,6.50,5.85,,,,,
543145,39982,SDS011,25670,44.782,10.356,2022-09-02 23:51:49,2022-09-02,23.863611,23,4,0,summer,7.63,6.28,,,,,
