In [1]:
import requests
import pandas as pd
import datetime
from datetime import timezone
from dateutil import parser as parser
from dateutil.relativedelta import *

In [2]:
yr_headers = {
    'User-Agent': 'https://github.com/saraalms/weather',
    'sitename': 'https://github.com/saraalms/weather',
    'email': 'saraalms@kth.se',
    'comment': 'This is just a student project, nothing commercial! Please contact me if you have any doubts or questions'
}

# File names

In [None]:
yr_csv_name = 'yr.csv'
smhi_csv_name = 'smhi.csv'

URLS

In [3]:
class Urls:

    def __init__(self, yr_headers):
        self.smhi_base = 'https://opendata-download-metfcst.smhi.se'
        self.smhi_params = '/api/category/pmp3g/version/2/parameter.json'
        self.smhi_valid_times = '/api/category/pmp3g/version/2/validtime.json'
        self.yr_headers = yr_headers

    def get_smhi_point_url(self, lon, lat):
        return self.smhi_base + f'/api/category/pmp3g/version/2/geotype/point/lon/{lon}/lat/{lat}/data.json'
    
    def get_yr_point_url(self, lon, lat):
        return f'https://api.met.no/weatherapi/locationforecast/2.0/complete?lat={lat}&lon={lon}'

    def get_smhi_params(self):
        return self.smhi_base + self.smhi_params

    def get_smhi_valid_times(self):
        return self.smhi_base + self.smhi_valid_times

    def get_yr_params(self, compact=False):
        if compact:
            respons = requests.get(f'https://api.met.no/weatherapi/locationforecast/2.0/compact?lat={59.3417}&lon={18.0549}', headers=self.yr_headers)
        else:
            respons = requests.get(f'https://api.met.no/weatherapi/locationforecast/2.0/complete?lat={59.3417}&lon={18.0549}', headers=self.yr_headers)
        return list(respons.json()['properties']['meta']['units'].keys())

In [4]:
class TimeStamps:

    def __init__(self):
        pass

    def str_to_datetime(self, str_time):
        return parser.parse(str_time)
        
    def datetime_to_str(self, datetime_time):
        return datetime_time.strftime("%Y-%m-%dT%H:%M:%SZ")

    def datetime_diff(self, datetime_0, datetime_1):
        return relativedelta(datetime_1, datetime_0)

    def str_time_diff(self, time_0, time_1):
        return self.datetime_diff(self.str_to_datetime(time_1), self.str_to_datetime(time_0))
    
    def get_current_datetime(self):
        return self.str_to_datetime(self.datetime_to_str(datetime.datetime.utcnow()))
    
    def get_current_str_time(self):
        return self.datetime_to_str(datetime.datetime.utcnow())

In [5]:
class DataParser:

    def __init__(self) -> None:
        self.time_staps = TimeStamps()

    def parse_yr_json(self, json_object):
        dict_of_forecast_dict = {}
        idx = 0

        company = 'yr'
        lat = yr_response['geometry']['coordinates'][0]
        lon = yr_response['geometry']['coordinates'][1]
        time_retrieved = self.time_staps.get_current_str_time()
        
        for forecast in yr_response['properties']['timeseries']:
            forecast_dict = {'company': company, 'latitude': lat, 'longitude': lon}
            forecast = yr_response['properties']['timeseries'][8]
            forecast_dict['time_retrieved'] = time_retrieved
            time_of_forecast = forecast['time']
            forecast_dict['time_of_forecast'] = time_of_forecast
            time_diff = self.time_staps.str_time_diff(time_retrieved, time_of_forecast)
            forecast_dict['time_diff_days'] = time_diff.days
            forecast_dict['time_diff_hours'] = time_diff.hours
            forecast_dict['time_diff_minutes'] = time_diff.minutes

            for key, val in forecast['data']['instant']['details'].items():
                forecast_dict[key] = val

            dict_of_forecast_dict[idx] = forecast_dict
            idx += 1
            
        return dict_of_forecast_dict
        
    def dict_to_df(self, dict_of_dicts, df_to_add_to=False):
        if type(df_to_add_to) == pd.core.frame.DataFrame:
            df = df_to_add_to
        else:
            df = pd.DataFrame()
        
        for forecast_dict in dict_of_dicts.values():
            df = df.append(forecast_dict, ignore_index=True)
        
        return df

Get parameters

In [6]:
my_urls = Urls(yr_headers)

valid_times = requests.get(my_urls.get_smhi_valid_times()).json()

parameter_response = requests.get(my_urls.get_smhi_params()).json()['parameter']
smhi_parameter_dict = {}
smhi_parameter_list = []
for element in parameter_response:
    smhi_parameter_list.append(element['name'])
    smhi_parameter_dict[element['name']] = element

yr_parameter_list = my_urls.get_yr_params()

param_df = pd.read_csv('smhi_yr_dict.csv', delimiter=';')
param_dict = dict(zip(param_df.smhi_name, param_df.yr_name))


Get locations with Name, Latitud, Longitud and HÃ¶jd (m)

In [7]:
locations_df = pd.read_csv('smhi_locations.csv', sep=';')
locations_df = locations_df.where(locations_df['Aktiv']=='Ja').dropna().drop(columns=['Id', 'Aktiv'])

selected_locations = ['Stockholm']

In [8]:
for location in selected_locations:
    lat = locations_df.where(locations_df['Namn'] == location).dropna()['Latitud'].values[0]
    lon = locations_df.where(locations_df['Namn'] == location).dropna()['Longitud'].values[0]
    smhi_response = requests.get(my_urls.get_smhi_point_url(lon, lat))
    smhi_response = smhi_response.json()
    yr_response = requests.get(my_urls.get_yr_point_url(lon, lat), headers=yr_headers)
    yr_response = yr_response.json()

In [9]:
my_parser = DataParser()
yr_df = pd.read_csv(yr_csv_name, sep=';')
dict_of_yr_dicts = my_parser.parse_yr_json(yr_response)
yr_df = my_parser.dict_to_df(dict_of_yr_dicts, df_to_add_to=yr_df)

yr_df.to_csv(yr_csv_name, sep=';')