### Read Thai City

In [None]:
#select ชื่อเมืองในไทย
import json

with open('city.list.json', 'r') as f:
    d_test = json.load(f)

th_cities = [city for city in d_test if city['country'] == 'TH']
th_cities[:5]

#count = len(th_cities)
#count

[{'id': 1116760,
  'name': 'Don Sak',
  'state': '',
  'country': 'TH',
  'coord': {'lon': 99.691841, 'lat': 9.31676}}]

In [10]:
provinces = {
    city['name']: {
        'lat': city['coord']['lat'],
        'lon': city['coord']['lon']
    } for city in th_cities
}
# provinces
dict(list(provinces.items())[:5])

{'Don Sak': {'lat': 9.31676, 'lon': 99.691841},
 'Ban Thung That': {'lat': 8.26667, 'lon': 99.583328},
 'Thung Sai': {'lat': 16.31489, 'lon': 99.832672},
 'Tham Phannara': {'lat': 8.42044, 'lon': 99.395172},
 'Ban Talat Yai': {'lat': 7.88481, 'lon': 98.400078}}

### Fetch weather Data

In [None]:
# Test Find Lat Lon
province = "Pathum Thani"
print(
    provinces[province]['lat'],
    provinces[province]['lon']
)

14.01346 100.530487


In [None]:
#Endpoint API
WEATHER_ENDPOINT = "https://api.openweathermap.org/data/2.5/weather"
API_KEY = "68c16da5e675ad8635df84629765b118"

In [None]:
import requests
import pandas as pd
from datetime import datetime
import time
import pytz

# # Endpoint API
# WEATHER_ENDPOINT = "https://api.openweathermap.org/data/2.5/weather"
# API_KEY = "68c16da5e675ad8635df84629765b118"


# Function to fetch and process weather data

def get_weather_data(province='Pathum Thani'):
    #Sleep
    # time.sleep(2)
    #ENDPOINT API Parameters
    WEATHER_ENDPOINT = "https://api.openweathermap.org/data/2.5/weather"
    API_KEY = "68c16da5e675ad8635df84629765b118"
    params = {
        "lat": provinces[province]['lat'],
        "lon": provinces[province]['lon'],
        "appid": API_KEY,
        "units": "metric"
    }

    try:
        # Make API request
        response = requests.get(WEATHER_ENDPOINT, params=params)
        response.raise_for_status()  # Raise an exception for bad status codes
        data = response.json()
        # return data # Fetch Weather Data
        
        # Convert timestamp to datetime
        dt = datetime.now()
        thai_tz = pytz.timezone('Asia/Bangkok')
        created_at = dt.replace(tzinfo=thai_tz)
        
        timestamp = datetime.now()
        
        # Create dictionary with required fields
        weather_dict = {
            'timestamp': timestamp,
            'year': timestamp.year,
            'month': timestamp.month,
            'day': timestamp.day,
            'hour': timestamp.hour,
            'minute': timestamp.minute,
            'created_at': created_at,
            'requested_province' : province,
            'location': data['name'],
            'weather_main': data['weather'][0]['main'],
            'weather_description': data['weather'][0]['description'],
            'main.temp': data['main']['temp']
        }
        
        # Create DataFrame
        # df = pd.DataFrame([weather_dict])
        
        # return df
        return weather_dict
    
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None
    except KeyError as e:
        print(f"Error processing data: Missing key {e}")
        return None
    

In [15]:
get_weather_data()

{'timestamp': datetime.datetime(2025, 4, 26, 2, 8, 43, 669855),
 'year': 2025,
 'month': 4,
 'day': 26,
 'hour': 2,
 'minute': 8,
 'created_at': datetime.datetime(2025, 4, 26, 2, 8, 43, 628633, tzinfo=<DstTzInfo 'Asia/Bangkok' LMT+6:42:00 STD>),
 'requested_province': 'Pathum Thani',
 'location': 'Pathum Thani',
 'weather_main': 'Clouds',
 'weather_description': 'few clouds',
 'main.temp': 29.99}

### FLOW

In [23]:
weatherdata_df = pd.DataFrame([get_weather_data(p) for p in list(provinces.keys())[:2]])
weatherdata_df

Unnamed: 0,timestamp,year,month,day,hour,minute,created_at,requested_province,location,weather_main,weather_description,main.temp
0,2025-04-26 02:17:25.524864,2025,4,26,2,17,2025-04-26 02:35:25.524837+07:00,Don Sak,Don Sak,Clouds,overcast clouds,27.17
1,2025-04-26 02:17:25.641887,2025,4,26,2,17,2025-04-26 02:35:25.641859+07:00,Ban Thung That,Ban Thung That,Clear,clear sky,24.42


In [None]:
import nest_asyncio
nest_asyncio.apply()

import asyncio
from prefect import flow, task
from prefect.context import get_run_context
from prefect.client import get_client

@task
async def log_schedule_time():
    # Retrieve the current run context
    context = get_run_context()
    # Get the flow run ID from the context
    flow_run_id = context.flow_run_id
    print(f"Flow run ID: {flow_run_id}")
    
    # Use the Prefect client to retrieve full flow run details
    async with get_client() as client:
        flow_run = await client.read_flow_run(flow_run_id)
        # scheduled_start_time may be None if no schedule was applied;
        # otherwise, it should be set.
        scheduled_time = flow_run.scheduled_start_time
        print(f"This flow run was scheduled to start at: {scheduled_time}")
    return scheduled_time

@flow
def my_flow():
    # Because log_schedule_time is async, await it in an async context.
    # In Jupyter you can use top-level await. Otherwise, you can run the async function via asyncio.run().
    result = asyncio.run(log_schedule_time())
    return result

if __name__ == '__main__':
    my_flow()

In [None]:
from prefect import flow, task
from prefect.deployments import Deployment
from prefect.server.schemas.schedules import IntervalSchedule
from datetime import timedelta
import pandas as pd
import asyncio
import aiohttp
import nest_asyncio

nest_asyncio.apply()

# ---------- Async API Fetch Task ----------
@task
async def fetch_pollution_data(df):
    API_KEY = "your_api_key"
    endpoint = "http://api.openweathermap.org/data/2.5/air_pollution"

    async def fetch_row(session, row):
        try:
            params = {"lat": row['lat'], "lon": row['lon'], "appid": API_KEY}
            async with session.get(endpoint, params=params) as response:
                data = await response.json()
                components = data['list'][0]['components']
                return {
                    'province': row['province'],
                    'amphoe': row['amphoe'],
                    'aqi': data['list'][0]['aqi'],
                    'pm2_5': components['pm2_5'],
                    'pm10': components['pm10']
                }
        except Exception as e:
            return {'province': row['province'], 'amphoe': row['amphoe'], 'error': str(e)}

    async with aiohttp.ClientSession() as session:
        tasks = [fetch_row(session, row) for _, row in df.iterrows()]
        results = await asyncio.gather(*tasks)
    return results

# ---------- Main Flow ----------
@flow(name="pollution-flow")
def main_pollution_flow():
    df = pd.read_csv("dsi321/save/amphoe_coordinates.csv")  # ไฟล์ CSV ที่เก็บ lat/lon
    df = df.head(10)  # ชั่วคราว: จำกัดจำนวนเพื่อทดสอบ
    results = asyncio.run(fetch_pollution_data.submit(df))
    pollution_df = pd.DataFrame(results.result())
    pollution_df.to_csv("dsi321/save/pollution_output.csv", index=False)


In [6]:
import requests
import pandas as pd
from datetime import datetime
import time
import pytz
from datetime import timedelta

import nest_asyncio
import asyncio
import aiohttp
from prefect import flow, task # Prefect flow and task decorators


nest_asyncio.apply()


# @task
async def fetch_pollution_data(context):
    API_KEY = "68c16da5e675ad8635df84629765b118"
    # WEATHER_ENDPOINT = "https://api.openweathermap.org/data/2.5/weather"
    POLLUTION_ENDPOINT = "http://api.openweathermap.org/data/2.5/air_pollution"
    async def fetch_row(session, row):
        try:
            params = {
                # "lat": province_context['lat'],
                # "lon": province_context['lon'],
                "lat" : lat,
                "lon" : lon,
                "appid": API_KEY,
                "units": "metric"
            }
            async with session.get(POLLUTION_ENDPOINT, params=params) as response:
                data = await response.json()
                dt = datetime.utcnow()
                thai_tz = pytz.timezone('Asia/Bangkok')
                localtime = dt.astimezone(thai_tz)
                components = data['list'][0]['components']
                pollution_dict = {
                    'timestamp': dt,
                    'year': dt.year,
                    'month': dt.month,
                    'day': dt.day,
                    'hour': dt.hour,
                    'minute': dt.minute,
                    'localtime': localtime,
                    'province' : province,
                    'amphoe' : amphoe,
                    'lat' : data['coord']['lat'],
                    'lon' : data['coord']['lon'],
                    # 'location': data['name'],
                    'aqi' : data['list'][0]['main']['aqi'],
                    'co' : components['co'],
                    'no' : components['no'],
                    'no2' : components['no2'],
                    'o3' : components['o3'],
                    'so2' : components['so2'],
                    'pm25' : components['pm2_5'],
                    'pm10' : components['pm10'],
                    'nh3' : components['nh3']
                }
                return pollution_dict
                
        except Exception as e:
            return {
                'province': row.get('province', 'unknown'),
                'amphoe': row.get('amphoe', 'unknown'),
                'lat': row.get('lat', None),
                'lon': row.get('lon', None),
                'error': str(e)
        }
        except requests.exceptions.RequestException as e:
            print(f"Error fetching data: {e}")
            return None
        except KeyError as e:
            print(f"Error processing data: Missing key {e}")
            return None
        except Exception as e:
            print(f"Error: {e} at {province} - {amphoe}")
            return None


    async with aiohttp.ClientSession() as session:
        tasks = [fetch_row(session, row) for _, row in df.iterrows()]
        results = await asyncio.gather(*tasks)
    return results
            

        # Make API request
        # response = requests.get(POLLUTION_ENDPOINT, params=params)
        # response.raise_for_status()  # Raise an exception for bad status codes
        # data = response.json()
        # return data

# get_pollution_data()

In [None]:
# @flow(name="pollution-flow")
def main_pollution_flow():
    df = pd.read_csv("dsi321/save/amphoe_coordinates.csv")  # ไฟล์ CSV ที่เก็บ lat/lon
    df = df.head(10)  # ชั่วคราว: จำกัดจำนวนเพื่อทดสอบ
    results = asyncio.run(fetch_pollution_data.submit(df))
    pollution_df = pd.DataFrame(results.result())
    pollution_df.to_csv("dsi321/save/pollution_output.csv", index=False)

In [None]:
# def create_df():
# สมมติคุณมี coord_df อยู่แล้ว

import time

start_time = time.time()

pollution_data = []
    
for _, row in df_sample.iterrows():
        context = {
            'province': row['provinceEN'],
            'amphoe': row['amphoeEN'],
            'lat': row['lat'],
            'lon': row['lon']
        }
        result = get_pollution_data(context)
        if result:
            pollution_data.append(result)
        
        # time.sleep(1)  # พัก API เพื่อไม่ให้โดน block
    
    #สร้าง dataframe
pollution_df = pd.DataFrame(pollution_data)
# pollution_df(head)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"ใช้เวลาในการรันทั้งหมด: {elapsed_time:.2f} วินาที")
# create_df()
pollution_df

In [1]:
#### TEST
import requests
import pandas as pd
from datetime import datetime
import time
import pytz
from datetime import timedelta

import nest_asyncio
import asyncio
import aiohttp
from prefect import flow, task # Prefect flow and task decorators


nest_asyncio.apply()


# @task
async def fetch_pollution_data(coord_df):
    API_KEY = "68c16da5e675ad8635df84629765b118"
    # WEATHER_ENDPOINT = "https://api.openweathermap.org/data/2.5/weather"
    POLLUTION_ENDPOINT = "http://api.openweathermap.org/data/2.5/air_pollution"
    async def fetch_row(session, row):
        try:
            params = {
                # "lat": province_context['lat'],
                # "lon": province_context['lon'],
                "lat" : lat,
                "lon" : lon,
                "appid": API_KEY,
                "units": "metric"
            }
            async with session.get(POLLUTION_ENDPOINT, params=params) as response:
                data = await response.json()
                dt = datetime.utcnow()
                thai_tz = pytz.timezone('Asia/Bangkok')
                localtime = dt.astimezone(thai_tz)
                components = data['list'][0]['components']
                pollution_dict = {
                    'timestamp': dt,
                    'year': dt.year,
                    'month': dt.month,
                    'day': dt.day,
                    'hour': dt.hour,
                    'minute': dt.minute,
                    'localtime': localtime,
                    'province' : province,
                    'amphoe' : amphoe,
                    'lat' : data['coord']['lat'],
                    'lon' : data['coord']['lon'],
                    # 'location': data['name'],
                    'aqi' : data['list'][0]['main']['aqi'],
                    'co' : components['co'],
                    'no' : components['no'],
                    'no2' : components['no2'],
                    'o3' : components['o3'],
                    'so2' : components['so2'],
                    'pm25' : components['pm2_5'],
                    'pm10' : components['pm10'],
                    'nh3' : components['nh3']
                }
                return pollution_dict

        except Exception as e:
            return {
                'province': row.get('province', 'unknown'),
                'amphoe': row.get('amphoe', 'unknown'),
                'lat': row.get('lat', None),
                'lon': row.get('lon', None),
                'error': str(e)
        }
        except requests.exceptions.RequestException as e:
            print(f"Error fetching data: {e}")
            return None
        except KeyError as e:
            print(f"Error processing data: Missing key {e}")
            return None
        except Exception as e:
            print(f"Error: {e} at {province} - {amphoe}")
            return None


    async with aiohttp.ClientSession() as session:
        tasks = [fetch_row(session, row) for _, row in df.iterrows()]
        results = await asyncio.gather(*tasks)
    return results
            

        # Make API request
        # response = requests.get(POLLUTION_ENDPOINT, params=params)
        # response.raise_for_status()  # Raise an exception for bad status codes
        # data = response.json()
        # return data

# get_pollution_data()

In [2]:
from prefect import flow

# @flow(name="pollution-flow")
async def pollution_flow():
    coord_df = pd.read_csv('./save/amphoe_coord.csv')
    results = await fetch_all_pollution(coord_df)
    pollution_data = pd.DataFrame(results)



In [3]:
pollution_data

NameError: name 'pollution_data' is not defined