In [19]:
import requests
import pandas as pd
from datetime import datetime

# API endpoint and parameters
WEATHER_ENDPOINT = "https://api.openweathermap.org/data/2.5/weather"
API_KEY = "43c19ba6aa093146f60b85be89834aa7"  # Replace with your actual API key


provinces = {
    "Chiang Mai":{
        "lat": 18.79038,
        "lon": 98.98468
    },
    "Lamphun":{
        "lat": 18.5745,
        "lon": 99.0087
    },
    "Lampang":{
        "lat": 18.29232,
        "lon": 99.49277
    },
    "Uttaradit":{
        "lat": 17.6255,
        "lon": 100.0942
    },
    "Phrae":{
        "lat": 18.1459,
        "lon": 100.1410
    },
    "Nan":{
        "lat": 18.793543,
        "lon": 100.743077
    },
    "Phayao":{
        "lat": 19.1920,
        "lon": 99.8788
    },
    "Chiang Rai":{
        "lat": 19.9086 ,
        "lon": 99.8325
    },
    "Mae Hong Son":{
        "lat": 19.3006 ,
        "lon": 97.9686
    }
}
# Function to fetch and process weather data
def get_weather_data(province='Pathum Thani'):
    
    params = {
        "lat": provinces[province]['lat'],
        "lon": provinces[province]['lon'],
        "appid": API_KEY,
        "units": "metric"
    }
    try:
        # Make API request
        response = requests.get(WEATHER_ENDPOINT, params=params)
        response.raise_for_status()  # Raise an exception for bad status codes
        data = response.json()
        
        # Convert timestamp to datetime
        # created_at = datetime.fromtimestamp(data['dt'])

        dt = datetime.now()
        thai_tz = pytz.timezone('Asia/Bangkok')
        created_at = dt.replace(tzinfo=thai_tz)

        # Convert timestamp to datetime
        timestamp = datetime.fromtimestamp(data['dt'])
        
        # Create dictionary with required fields
        weather_dict = {
           'timestamp': timestamp,
            'year': timestamp.year,
            'month': timestamp.month,
            'day': timestamp.day,
            'hour': timestamp.hour,
            'minute': timestamp.minute,
            'created_at': created_at,
            'requested_province':province,
            'location': data['name'],
            'weather_main': data['weather'][0]['main'],
            'weather_description': data['weather'][0]['description'],
            'main.temp': data['main']['temp'],
            'main.temp_min': data['main']['temp_min'],
            'main.temp_max': data['main']['temp_max'],
            'main.feels_like': data['main']['feels_like'],
            'main.pressure': data['main']['pressure'],
            'main.humidity': data['main']['humidity'],
            'main.sea_level': data['main']['sea_level'],
            'main.grnd_level': data['main']['grnd_level'],
            'visibility': data['visibility'],
            'wind.speed': data['wind']['speed'],
            'wind.deg': data['wind']['deg']
        }
        
        # Create DataFrame
        # df = pd.DataFrame([weather_dict])
        
        # return df
        return weather_dict
    
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None
    except KeyError as e:
        print(f"Error processing data: Missing key {e}")
        return None

In [20]:
df=pd.DataFrame([get_weather_data(p) for p in list(provinces.keys())])
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 22 columns):
 #   Column               Non-Null Count  Dtype                       
---  ------               --------------  -----                       
 0   timestamp            9 non-null      datetime64[ns]              
 1   year                 9 non-null      int64                       
 2   month                9 non-null      int64                       
 3   day                  9 non-null      int64                       
 4   hour                 9 non-null      int64                       
 5   minute               9 non-null      int64                       
 6   created_at           9 non-null      datetime64[ns, Asia/Bangkok]
 7   requested_province   9 non-null      object                      
 8   location             9 non-null      object                      
 9   weather_main         9 non-null      object                      
 10  weather_description  9 non-null      objec

Unnamed: 0,timestamp,year,month,day,hour,minute,created_at,requested_province,location,weather_main,...,main.temp_min,main.temp_max,main.feels_like,main.pressure,main.humidity,main.sea_level,main.grnd_level,visibility,wind.speed,wind.deg
0,2025-04-30 16:09:32,2025,4,30,16,9,2025-04-30 16:31:55.022578+07:00,Chiang Mai,Chiang Mai,Clouds,...,31.27,32.97,35.71,1007,51,1007,961,10000,4.12,160
1,2025-04-30 16:13:54,2025,4,30,16,13,2025-04-30 16:31:55.118271+07:00,Lamphun,Lamphun,Clouds,...,32.32,33.1,39.78,1007,74,1007,969,10000,3.3,261
2,2025-04-30 16:12:42,2025,4,30,16,12,2025-04-30 16:31:55.222451+07:00,Lampang,Lampang,Clouds,...,28.57,29.45,27.81,1008,5,1008,973,10000,3.14,321
3,2025-04-30 16:04:23,2025,4,30,16,4,2025-04-30 16:31:55.328733+07:00,Uttaradit,Uttaradit,Clouds,...,35.3,35.3,40.21,1006,47,1006,984,10000,3.69,229
4,2025-04-30 16:09:28,2025,4,30,16,9,2025-04-30 16:31:55.434688+07:00,Phrae,Phrae,Clouds,...,32.25,32.25,32.64,1006,40,1006,969,10000,2.85,159


In [21]:
import pandas as pd

# lakeFS credentials from your docker-compose.yml
ACCESS_KEY = "access_key"
SECRET_KEY = "secret_key"

# lakeFS endpoint (running locally)
lakefs_endpoint = "http://localhost:8001/weather"
# lakeFS repository, branch, and file path
repo = "weather"
branch = "main"
path = "weather.parquet"

# Construct the full lakeFS S3-compatible path
lakefs_s3_path = f"s3a://{repo}/{branch}/{path}"

# Configure storage_options for lakeFS (S3-compatible)
storage_options = {
    "key": ACCESS_KEY,
    "secret": SECRET_KEY,
    "client_kwargs": {
        "endpoint_url": lakefs_endpoint
    }
}

In [6]:
pip install pyarrow

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.2 -> 25.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
pip install s3fs

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.2 -> 25.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
pip install fastparquet

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.2 -> 25.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
pip show fsspec

Name: fsspec
Version: 2025.3.2
Summary: File-system specification
Home-page: 
Author: 
Author-email: 
License: BSD 3-Clause License
        
        Copyright (c) 2018, Martin DurantNote: you may need to restart the kernel to use updated packages.
        All rights reserved.
        
        Redistribution and use in source and binary forms, with or without
        modification, are permitted provided that the following conditions are met:
        
        * Redistributions of source code must retain the above copyright notice, this
          list of conditions and the following disclaimer.

        
        * Redistributions in binary form must reproduce the above copyright notice,
          this list of conditions and the following disclaimer in the documentation
          and/or other materials provided with the distribution.
        
        * Neither the name of the copyright holder nor the names of its
          contributors may be used to endorse or promote products derived fro

In [22]:
lakefs_s3_path = "s3://weather/main/weather.parquet/"

storage_options = {
    "key": "access_key",
    "secret": "secret_key",
    "client_kwargs": {
        "endpoint_url": "http://localhost:8001"
    }
}

df.to_parquet(
    lakefs_s3_path,
    storage_options=storage_options,
    partition_cols=['year','month','day','hour']
)

In [18]:
from datetime import datetime
import pytz

dt = datetime.now()
thai_tz = pytz.timezone('Asia/Bangkok')
dt = dt.replace(tzinfo=thai_tz)