In [39]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def ROC_year(year):
    if year < 1912:
        raise ValueError("ROC year start from 1912 !")
    else:
        return int(year - 1911)

crops= {
    'Maize': '002',
    'Wheat': '003',
    'Soybean': '101',
    'Tomato': '425',
    'Sugarcane': '306',
    'Taro': '440',
    'Cabbage': '419'
}

items = {
    'winter': '00',
    'first_season': '01',
    'second_seanson': '02',
    'whole_year': '03'
}

def afa_scratch(year: int, item: str, crop: str, url: str = 'https://agr.afa.gov.tw/afa/pgcropcity.jsp'):
    if not 86 <= year <= 110:
        raise ValueError("Invalid year data")

    # Request data
    payload = {
        'accountingyear': str(year),
        'item': item,
        'corn001': '',
        'input803': '',
        'crop': crop,
        'city': '00',
        'btnSend': '送　出'
    }

    try:
        response = requests.post(url, data = payload)
        response.raise_for_status()  # Raises an HTTPError for bad responses (4XX, 5XX)
        print('Success')  # Outputs the response text directly
        return response
    except requests.exceptions.HTTPError as errh:
        print("HTTP Error:", errh)
    except requests.exceptions.ConnectionError as errc:
        print("Error Connecting:", errc)
    except requests.exceptions.Timeout as errt:
        print("Timeout Error:", errt)
    except requests.exceptions.RequestException as err:
        print("Oops: Something Else", err)

In [40]:
response = afa_scratch(ROC_year(2018), items['winter'], crops['Maize'])

Success


In [44]:
# Parse the HTML
soup = BeautifulSoup(response, 'lxml')

# Find the table
div = soup.find('div', class_='DivRestTbl')

rows = div.find('table', class_= "TDFont")

# Extract rows
rows = rows.find_all('tr')
# Process each row to extract column data
data = []
for row in rows:
    cols = [ele.text.strip() for ele in row.find_all('td')]
    if cols:  # Avoid empty lists from empty rows
        data.append(cols)

# Create a DataFrame with the extracted data
# First row of data contains the headers
df = pd.DataFrame(data[1:], columns=data[0])

print(df)

    縣市名稱      種植面積      收穫面積  每公頃收量          收量
0     公頃        公頃        公斤     公斤        None
1    新北市      3.44      3.44  3,773      12,978
2    宜蘭縣      5.40      5.40  5,496      29,679
3    桃園市      4.09      4.09  4,080      16,687
4    新竹縣      5.28      5.28  7,061      37,280
5    苗栗縣     37.67     37.67  7,368     277,566
6    台中市     80.52     80.52  6,265     504,494
7    彰化縣    197.07    196.87  7,094   1,396,658
8    南投縣    108.31    108.31  6,493     703,285
9    雲林縣  1,862.40  1,862.40  9,239  17,206,866
10   嘉義縣    856.57    856.57  5,948   5,094,559
11   台南市  1,152.30  1,152.30  7,573   8,726,458
12   高雄市    375.59    375.41  6,963   2,613,795
13   屏東縣    207.09    206.59  7,572   1,564,219
14   台東縣     89.51     89.51  5,664     506,983
15   花蓮縣    222.76    222.76  5,576   1,242,068
16   澎湖縣      0.09      0.09  7,000         630
17   新竹市      1.72      1.72  4,465       7,679
18   嘉義市      4.73      4.73  7,011      33,160
19   台北市      1.05      1.05  5,819     

In [59]:
import itertools
ROC_years = [ROC_year(y) for y in range(1997, 2021)]
# Use zip to iterate over matched elements
for year, item, crop in itertools.product(ROC_years, items.values(), crops.values()):
    print(year, item, crop)

86 00 002
86 00 003
86 00 101
86 00 425
86 00 306
86 00 440
86 00 419
86 01 002
86 01 003
86 01 101
86 01 425
86 01 306
86 01 440
86 01 419
86 02 002
86 02 003
86 02 101
86 02 425
86 02 306
86 02 440
86 02 419
86 03 002
86 03 003
86 03 101
86 03 425
86 03 306
86 03 440
86 03 419
87 00 002
87 00 003
87 00 101
87 00 425
87 00 306
87 00 440
87 00 419
87 01 002
87 01 003
87 01 101
87 01 425
87 01 306
87 01 440
87 01 419
87 02 002
87 02 003
87 02 101
87 02 425
87 02 306
87 02 440
87 02 419
87 03 002
87 03 003
87 03 101
87 03 425
87 03 306
87 03 440
87 03 419
88 00 002
88 00 003
88 00 101
88 00 425
88 00 306
88 00 440
88 00 419
88 01 002
88 01 003
88 01 101
88 01 425
88 01 306
88 01 440
88 01 419
88 02 002
88 02 003
88 02 101
88 02 425
88 02 306
88 02 440
88 02 419
88 03 002
88 03 003
88 03 101
88 03 425
88 03 306
88 03 440
88 03 419
89 00 002
89 00 003
89 00 101
89 00 425
89 00 306
89 00 440
89 00 419
89 01 002
89 01 003
89 01 101
89 01 425
89 01 306
89 01 440
89 01 419
89 02 002
89 02 003


In [25]:
from good_objects import ROC_year, afa_scratch
from bs4 import BeautifulSoup
import pandas as pd
import itertools


# Assuming you have lists of parameters that correspond by index
ROC_years = [ROC_year(y) for y in range(2000, 2001)]

crops= {
    'Maize': '002',
}

items = {
    'winter': '00',
}


In [40]:
from good_objects import ROC_year, afa_scratch, crops, items
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import requests


url = 'https://agr.afa.gov.tw/afa/pgricecity.jsp'
# Request data
payload = {
    'accountingyear': '108',
    'item': '01',
    'crop': 'C01',
    'city': '00',
    'btnSend': '送　出'
}


# POST request
response = requests.post(url, data = payload)

# Check the responses
response.raise_for_status()  # Raises an HTTPError for bad responses (4XX, 5XX)

soup = BeautifulSoup(response.text, 'html')
div = soup.find('div', class_= "DivRestTbl")

rows = div.find('table', "TDFont")
rows = rows.find_all("tr")
# Process each row to extract column data
data = []
for row in rows:
    cols = [ele.text.strip() for ele in row.find_all('td')]
    if cols:  # Avoid empty lists from empty rows
        cols = [col.replace(",", "") for col in cols]
        data.append(cols)


[['縣市名稱', '初步種植面積', '實際種植面積', '收穫面積', '無收穫面積', '稻穀總產量', '稻穀單位產量', '糙米總產量', '糙米單位產量'], ['公頃', '公頃', '公頃', '公頃', '公斤', '公斤', '公斤', '公斤'], ['新北市', '150.26', '150.91', '150.91', '0.00', '786843', '5214', '634353', '4204'], ['宜蘭縣', '9895.65', '9960.64', '9960.64', '0.00', '59799354', '6004', '46599382', '4678'], ['桃園市', '13157.44', '12989.08', '12989.08', '0.00', '69085683', '5319', '56178447', '4325'], ['新竹縣', '4260.00', '4270.00', '4270.00', '0.00', '23549335', '5515', '18971984', '4443'], ['苗栗縣', '6099.00', '6099.00', '6099.00', '0.00', '38848097', '6370', '31735682', '5203'], ['台中市', '12959.25', '12960.00', '12960.00', '0.00', '88444735', '6824', '69148837', '5336'], ['彰化縣', '22592.99', '22512.89', '22512.89', '0.00', '165962028', '7372', '129098125', '5734'], ['南投縣', '2252.79', '2262.79', '2262.79', '0.00', '15746700', '6959', '12301110', '5436'], ['雲林縣', '27830.38', '28328.37', '28328.37', '0.00', '217209782', '7668', '176302904', '6224'], ['嘉義縣', '17726.77', '17386.96', '17386.96', '

In [2]:
import numpy as np

In [39]:
import re
re.sub("[^\d\.]", "", "$1,000,000.01")
["sad", "saf,sa"].replace(",", "")

AttributeError: 'list' object has no attribute 'replace'