In [1]:
import requests
from dotenv import  load_dotenv
import os

load_dotenv()

def call_api_to_get_data(date, enddate, city = "Ho+Chi+Minh+City"):
    
    link = "http://api.worldweatheronline.com/premium/v1/past-weather.ashx?q={city}&date={date}&enddate={enddate}&key={api_key}&format=json&tp=1".format(
        date =date,
        enddate = enddate,
        city = city,
        api_key = os.getenv("api_key")
    )
    response = requests.get(link)
    return response.json()

In [19]:
import pandas as pd

df_humidity = pd.read_csv("historical-hourly-weather-dataset/humidity.csv")
df_pressure = pd.read_csv("historical-hourly-weather-dataset/pressure.csv")
df_temperature = pd.read_csv("historical-hourly-weather-dataset/temperature.csv")
df_weather_desc = pd.read_csv("historical-hourly-weather-dataset/weather_description.csv")
df_wind_dir = pd.read_csv("historical-hourly-weather-dataset/wind_direction.csv")
df_wind_speed = pd.read_csv("historical-hourly-weather-dataset/wind_speed.csv")

In [6]:
from datetime import datetime, timedelta
import calendar

def get_lst_first_day_last_day(start_date = datetime(2012, 10, 1), end_date = datetime(2017, 11, 30)):

    date_format = "%Y-%m-%d"

    current_date = start_date
    dates_list = []

    while current_date <= end_date:
        last_day_of_month = calendar.monthrange(current_date.year, current_date.month)[1]
        
        first_day_of_month = current_date.replace(day=1)
        last_day_of_month = current_date.replace(day=last_day_of_month)
        
        dates_list.append((first_day_of_month.strftime(date_format), last_day_of_month.strftime(date_format)))
        
        current_date = last_day_of_month + timedelta(days=1)
    
    return dates_list


In [10]:
import numpy as np 

def handle_append_data(json, humidity_arr, time_arr, wind_dir_arr, wind_speed_arr, pressure_arr, weather_desc_arr , temp_arr):

    for day_data in json["data"]["weather"]:
        
        date = day_data["date"]
        for hour_data in day_data['hourly']:
            time = "{date} {time}:00:00".format(date = date, time = int(int(hour_data["time"])/100))
            wind_speed_ms = round(float(hour_data["windspeedKmph"])* (10/36),2)
            wind_dir = int(hour_data["winddirDegree"])
            humidity = int(hour_data["humidity"])
            pressure = int(hour_data["pressure"])
            tempK = int(hour_data["tempC"]) + 273.15
            weather_desc = hour_data["weatherDesc"][0]["value"]
            
            time_arr = np.append(time_arr, time)
            humidity_arr = np.append(humidity_arr ,  humidity)
            wind_speed_arr = np.append(wind_speed_arr, wind_speed_ms)
            wind_dir_arr = np.append(wind_dir_arr, wind_dir)
            pressure_arr = np.append(pressure_arr, pressure)
            temp_arr = np.append(temp_arr, tempK)
            weather_desc_arr = np.append(weather_desc_arr, weather_desc)
    
    return humidity_arr, time_arr, wind_dir_arr, wind_speed_arr, pressure_arr, weather_desc_arr , temp_arr


In [70]:
from IPython.display import clear_output, display

humidity_arr = np.array([])
time_arr = np.array([])
wind_dir_arr = np.array([])
wind_speed_arr = np.array([])
pressure_arr = np.array([])
temp_arr = np.array([])
weather_desc_arr = np.array([], dtype = object)
count = 0
dates_list = get_lst_first_day_last_day()
for date in dates_list:
    clear_output(wait = True)
    print(count / len(dates_list) * 100)
    count += 1
    json = call_api_to_get_data(date[0], date[1])
    humidity_arr, time_arr, wind_dir_arr, wind_speed_arr, pressure_arr, weather_desc_arr , temp_arr = handle_append_data(
        json, humidity_arr, time_arr, wind_dir_arr, wind_speed_arr, pressure_arr, weather_desc_arr , temp_arr)



98.38709677419355


In [71]:
df_hcm_humidity = pd.DataFrame({"datetime" : time_arr, "Ho Chi Minh City" : humidity_arr})
df_hcm_pressure = pd.DataFrame({"datetime" : time_arr, "Ho Chi Minh City" : pressure_arr})
df_hcm_wind_dir = pd.DataFrame({"datetime" : time_arr, "Ho Chi Minh City" : wind_dir_arr})
df_hcm_wind_speed = pd.DataFrame({"datetime" : time_arr, "Ho Chi Minh City" : wind_speed_arr})
df_hcm_temp= pd.DataFrame({"datetime" : time_arr, "Ho Chi Minh City" : temp_arr})
df_hcm_weather_desc = pd.DataFrame({"datetime" : time_arr, "Ho Chi Minh City" : weather_desc_arr})

df_humidity = pd.merge(df_humidity, df_hcm_humidity, on="datetime")
df_pressure = pd.merge(df_pressure, df_hcm_pressure, on="datetime")
df_wind_dir = pd.merge(df_wind_dir, df_hcm_wind_dir, on="datetime")
df_wind_speed = pd.merge(df_wind_speed, df_hcm_wind_speed, on="datetime")
df_temperature = pd.merge(df_temperature, df_hcm_temp, on="datetime")
df_weather_desc = pd.merge(df_weather_desc, df_hcm_weather_desc, on="datetime")

In [33]:
df_humidity.to_csv("historical-hourly-weather-dataset/humidity.csv", index= False)
df_pressure.to_csv("historical-hourly-weather-dataset/pressure.csv", index= False)
df_wind_dir.to_csv("historical-hourly-weather-dataset/wind_direction.csv", index= False)
df_wind_speed.to_csv("historical-hourly-weather-dataset/wind_speed.csv", index= False)
df_temperature.to_csv("historical-hourly-weather-dataset/temperature.csv", index= False)
df_weather_desc.to_csv("historical-hourly-weather-dataset/weather_description.csv", index= False)

In [82]:
print(df_weather_desc["Ho Chi Minh City"].unique())
print(df_weather_desc["Vancouver"].unique())

['Patchy rain possible' 'Moderate or heavy rain shower' 'Cloudy'
 'Moderate rain at times' 'Heavy rain at times' 'Partly cloudy' 'Clear'
 'Sunny' 'Overcast']
[nan 'mist' 'broken clouds' 'sky is clear' 'light rain' 'few clouds' 'fog'
 'overcast clouds' 'light intensity shower rain' 'light intensity drizzle'
 'scattered clouds' 'proximity shower rain' 'moderate rain'
 'heavy intensity rain' 'heavy snow' 'shower rain' 'snow'
 'heavy shower snow' 'light intensity drizzle rain' 'light snow'
 'very heavy rain' 'smoke' 'thunderstorm with heavy rain'
 'light shower snow' 'haze' 'thunderstorm with light rain' 'dust'
 'thunderstorm' 'heavy intensity shower rain' 'thunderstorm with rain'
 'sleet' 'drizzle' 'shower snow' 'light shower sleet'
 'ragged thunderstorm' 'proximity thunderstorm']


In [15]:
import pandas as pd 

df_country = pd.read_csv("historical-hourly-weather-dataset/city_attributes.csv")

lst_city = list(df_country.City.unique())
lst_city.append("Ho Chi Minh City")


In [17]:
import numpy as np

df_new_humidity = pd.DataFrame()
df_new_pressure = pd.DataFrame()
df_new_temperature = pd.DataFrame()
df_new_weather_desc = pd.DataFrame()
df_new_wind_dir = pd.DataFrame()
df_new_wind_speed = pd.DataFrame()

init = True

for city in lst_city:
    
    lst_dates = get_lst_first_day_last_day(datetime(2023, 1,1), datetime(2023, 10 , 1))
    
    humidity_arr = np.array([])
    time_arr = np.array([])
    wind_dir_arr = np.array([])
    wind_speed_arr = np.array([])
    pressure_arr = np.array([])
    temp_arr = np.array([])
    weather_desc_arr = np.array([], dtype = object)
    count = 0
    for date in lst_dates:
        print(city, count/ len(lst_dates) * 100)
        count += 1
        json = call_api_to_get_data(date[0], date[1], city)
        humidity_arr, time_arr, wind_dir_arr, wind_speed_arr, pressure_arr, weather_desc_arr , temp_arr = handle_append_data(
            json, humidity_arr, time_arr, wind_dir_arr, wind_speed_arr, pressure_arr, weather_desc_arr , temp_arr)
        
    df_city_humidity = pd.DataFrame({"datetime" : time_arr, city : humidity_arr})
    df_city_pressure = pd.DataFrame({"datetime" : time_arr, city : pressure_arr})
    df_city_wind_dir = pd.DataFrame({"datetime" : time_arr, city : wind_dir_arr})
    df_city_wind_speed = pd.DataFrame({"datetime" : time_arr, city : wind_speed_arr})
    df_city_temp= pd.DataFrame({"datetime" : time_arr, city : temp_arr})
    df_city_weather_desc = pd.DataFrame({"datetime" : time_arr , city : weather_desc_arr})
    if init:
        df_new_humidity = df_city_humidity.copy()
        df_new_pressure = df_city_pressure.copy()
        df_new_wind_dir = df_city_wind_dir.copy()
        df_new_wind_speed = df_city_wind_speed.copy()
        df_new_temperature = df_city_temp.copy()
        df_new_weather_desc = df_city_weather_desc.copy()
        init = False 
    else :
        df_new_humidity = pd.merge(df_new_humidity, df_city_humidity, on="datetime")
        df_new_pressure = pd.merge(df_new_pressure, df_city_pressure, on="datetime")
        df_new_wind_dir = pd.merge(df_new_wind_dir, df_city_wind_dir, on="datetime")
        df_new_wind_speed = pd.merge(df_new_wind_speed, df_city_wind_speed, on="datetime")
        df_new_temperature = pd.merge(df_new_temperature, df_city_temp, on="datetime")
        df_new_weather_desc = pd.merge(df_new_weather_desc, df_city_weather_desc, on="datetime")    
    
        

Vancouver 0.0
Vancouver 10.0
Vancouver 20.0
Vancouver 30.0
Vancouver 40.0
Vancouver 50.0
Vancouver 60.0
Vancouver 70.0
Vancouver 80.0
Vancouver 90.0
Portland 0.0
Portland 10.0
Portland 20.0
Portland 30.0
Portland 40.0
Portland 50.0
Portland 60.0
Portland 70.0
Portland 80.0
Portland 90.0
San Francisco 0.0
San Francisco 10.0
San Francisco 20.0
San Francisco 30.0
San Francisco 40.0
San Francisco 50.0
San Francisco 60.0
San Francisco 70.0
San Francisco 80.0
San Francisco 90.0
Seattle 0.0
Seattle 10.0
Seattle 20.0
Seattle 30.0
Seattle 40.0
Seattle 50.0
Seattle 60.0
Seattle 70.0
Seattle 80.0
Seattle 90.0
Los Angeles 0.0
Los Angeles 10.0
Los Angeles 20.0
Los Angeles 30.0
Los Angeles 40.0
Los Angeles 50.0
Los Angeles 60.0
Los Angeles 70.0
Los Angeles 80.0
Los Angeles 90.0
San Diego 0.0
San Diego 10.0
San Diego 20.0
San Diego 30.0
San Diego 40.0
San Diego 50.0
San Diego 60.0
San Diego 70.0
San Diego 80.0
San Diego 90.0
Las Vegas 0.0
Las Vegas 10.0
Las Vegas 20.0
Las Vegas 30.0
Las Vegas 40.0
La

In [32]:
df_humidity = pd.concat([df_humidity, df_new_humidity], axis= 0)
df_pressure = pd.concat([df_pressure, df_new_pressure], axis= 0)
df_wind_dir = pd.concat([df_wind_dir, df_new_wind_dir], axis= 0)
df_wind_speed = pd.concat([df_wind_speed, df_new_wind_speed], axis= 0)
df_temperature = pd.concat([df_temperature, df_new_temperature], axis= 0)
df_weather_desc = pd.concat([df_weather_desc, df_new_weather_desc], axis= 0)


In [15]:
import dill 

dill.dump_session("nb.db")

In [1]:
import dill

dill.load_session("nb.db")