## Daftar Ibukota Provinsi di Indonesia

In [1]:
import requests

url = "https://id.wikipedia.org/wiki/Daftar_ibu_kota_provinsi_di_Indonesia"
response = requests.get(url)
wikisite = response.text
wikisite

'<!DOCTYPE html>\n<html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-enabled vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-disabled skin-theme-clientpref-day vector-toc-available" lang="id" dir="ltr">\n<head>\n<meta charset="UTF-8">\n<title>Daftar ibu kota provinsi di Indonesia - Wikipedia bahasa Indonesia, ensiklopedia bebas</title>\n<script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clie

In [2]:
from bs4 import BeautifulSoup
import re

soup = BeautifulSoup(wikisite, 'html.parser')
tbody = soup.find('table').find('tbody')

indProv = []

# Check if tbody exists
if tbody:
    # Iterate over the elements inside tbody
    for tr in tbody.find_all('tr'):  # Assuming you want to get all rows
        # Get the text inside the td
        td = tr.find_all('td')
        if td and td[0].get_text(strip=True) != '-':
            areas = td[6].get_text(strip=True)
            cleaned_areas = re.sub(r'\[.*?\]', '', areas).strip()

            capital = td[2].get_text(strip=True)
            cleaned_capital = re.sub(r'\[.*?\]', '', capital).strip()

            indProv.append({
                'id': td[0].get_text(strip=True),
                'nama': td[1].get_text(strip=True),
                'ibukota': cleaned_capital,
                'luas_wilayah': cleaned_areas,
                'ipm': td[7].get_text(strip=True)
            })
else:
    print("No tbody found")

In [3]:
indProv, len(indProv)

([{'id': '1',
   'nama': 'Aceh',
   'ibukota': 'Banda Aceh',
   'luas_wilayah': '61,36',
   'ipm': '85,71'},
  {'id': '2',
   'nama': 'Sumatera Utara',
   'ibukota': 'Medan',
   'luas_wilayah': '265,10',
   'ipm': '81,21'},
  {'id': '3',
   'nama': 'Sumatera Barat',
   'ibukota': 'Padang',
   'luas_wilayah': '694,93',
   'ipm': '82,90'},
  {'id': '4',
   'nama': 'Riau',
   'ibukota': 'Pekanbaru',
   'luas_wilayah': '632,26',
   'ipm': '81,58'},
  {'id': '5',
   'nama': 'Jambi',
   'ibukota': 'Jambi',
   'luas_wilayah': '205,58',
   'ipm': '79,12'},
  {'id': '6',
   'nama': 'Sumatera Selatan',
   'ibukota': 'Palembang',
   'luas_wilayah': '400,61',
   'ipm': '78,72'},
  {'id': '7',
   'nama': 'Bengkulu',
   'ibukota': 'Bengkulu',
   'luas_wilayah': '151,70',
   'ipm': '80,54'},
  {'id': '8',
   'nama': 'Lampung',
   'ibukota': 'Bandar Lampung',
   'luas_wilayah': '197,22',
   'ipm': '77,58'},
  {'id': '9',
   'nama': 'Kepulauan Bangka Belitung',
   'ibukota': 'Pangkalpinang',
   'luas_w

In [5]:
# there are missing values in ibukota jakarta
indProv[10]['ibukota'] = 'Jakarta'
print(indProv[10])

{'id': '11', 'nama': 'Daerah Khusus Ibukota Jakarta', 'ibukota': 'Jakarta', 'luas_wilayah': '662,33', 'ipm': '81,56'}


In [11]:
# change palang karaya to palangkaraya
indProv[20]['ibukota'] = indProv[20]['ibukota'].replace(" ", "")
print(indProv[20])

{'id': '21', 'nama': 'Kalimantan Tengah', 'ibukota': 'PalangkaRaya', 'luas_wilayah': '2.678,51', 'ipm': '80,82'}


In [13]:
# remove walesi because there are no geocoding information
indProv[36]


{'id': '37',
 'nama': 'Papua Pegunungan',
 'ibukota': 'Walesi',
 'luas_wilayah': '',
 'ipm': ''}

In [14]:
indProv.remove(indProv[36])
len(indProv)

37

In [None]:
# Palang Karaya pada wikipedia seharusnya palangkaraya, dan data wales ibukota papua pegununga tidak ada pada geocoding longlat
# ibukota jakarta juga tidak ada

In [15]:
import numpy as np

indProv_array = np.array(indProv)
np.save('temp/ind_prov.npy', indProv_array)

## Longitude and Latitude Based On Capital City

In [16]:
import requests
def get_json_from_capital(value):
    url = f"https://geocoding-api.open-meteo.com/v1/search?name={value}&count=1&language=en&format=json"
    response = requests.get(url)
    return response.json()


In [17]:
result_json = []
for i in indProv:
    result_json.append(get_json_from_capital(i['ibukota']))

In [18]:
print(result_json)

[{'results': [{'id': 1215502, 'name': 'Banda Aceh', 'latitude': 5.54167, 'longitude': 95.33333, 'elevation': 9.0, 'feature_code': 'PPLA', 'country_code': 'ID', 'admin1_id': 1215638, 'admin2_id': 1215501, 'timezone': 'Asia/Jakarta', 'population': 250757, 'country_id': 1643084, 'country': 'Indonesia', 'admin1': 'Aceh', 'admin2': 'Kota Banda Aceh'}], 'generationtime_ms': 1.0859966}, {'results': [{'id': 1214520, 'name': 'Medan', 'latitude': 3.58333, 'longitude': 98.66667, 'elevation': 26.0, 'feature_code': 'PPLA', 'country_code': 'ID', 'admin1_id': 1213642, 'timezone': 'Asia/Jakarta', 'population': 1750971, 'country_id': 1643084, 'country': 'Indonesia', 'admin1': 'North Sumatra'}], 'generationtime_ms': 1.7809868}, {'results': [{'id': 1633419, 'name': 'Padang', 'latitude': -0.94924, 'longitude': 100.35427, 'elevation': 6.0, 'feature_code': 'PPLA', 'country_code': 'ID', 'admin1_id': 1626197, 'timezone': 'Asia/Jakarta', 'population': 840352, 'country_id': 1643084, 'country': 'Indonesia', 'adm

In [None]:
# result_json[36], len(result_json), indProv[36]
# for i in range(len(result_json)):
#     if 'generationtime_ms' in result_json[i]:
#         print(result_json[i], i)
    # print(result_json[i], i)

In [19]:
# save data request to numpy array
import numpy as np

result_json_array = np.array(result_json)
np.save('temp/requests_json_array.npy', result_json_array)


In [20]:
import json

# iterate json array to combine with indProv
for index in range(len(result_json)):
    json_string = json.dumps(result_json[index])
    data = json.loads(json_string)
    if 'results' in data and len(data['results']) > 0:
        res = data['results'][0]
        country = res['country']
        latitude = res['latitude']
        longitude = res['longitude']

        indProv[index]['country'] = country
        indProv[index]['latitude'] = latitude
        indProv[index]['longitude'] = longitude
    else:
        print(index)

        # if next((index for index, province in enumerate(indProv) if province.get('ibukota').lower() in res['name'].lower()), None) == None:
        #     print(i)




In [21]:
# saved_data_indprov_withlonglat
np.save('temp/ind_prov_longlat.npy', indProv)

## Get the weather informations between date first januari until 17 agustus 2024 

In [22]:
import requests
def get_weather_from_longlat(longitude, latitude):
    url = f"https://api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}&daily=weather_code,temperature_2m_max,temperature_2m_min,sunrise,sunset,daylight_duration,sunshine_duration,uv_index_max,uv_index_clear_sky_max,wind_speed_10m_max,wind_direction_10m_dominant&timezone=Asia%2FBangkok&start_date=2024-01-01&end_date=2024-08-16"
    response = requests.get(url)
    return response.json()



In [29]:
for index in range(len(indProv)):
    weather_data = get_weather_from_longlat(indProv[index]['longitude'], indProv[index]['latitude'])
    capital = indProv[index]['ibukota']
    with open(f'temp/json_d/{index+1}_{capital}.json', 'w') as json_file:
        json.dump(weather_data, json_file, indent=4)

# weather_data = get_weather_from_longlat(indProv[0]['longitude'], indProv[0]['latitude'])
# weather_data

In [28]:
# higher than 25 start from sorong is had a limited api call