Link: https://power.larc.nasa.gov/data-access-viewer/

# 1. Get the agroclimatologi data in daily from 2018-2022 for 27 city

# for 20 parameters

In [1]:
import requests
import pandas as pd
from datetime import datetime

# List of 27 districts/cities in West Java Province with coordinates (latitude, longitude)
locations = [
    {"name": "Kabupaten Bogor", "lat": -6.917464, "lon": 107.619123},
    {"name": "Kabupaten Sukabumi", "lat": -6.238270, "lon": 106.975572},
    {"name": "Kabupaten Cianjur", "lat": -6.595038, "lon": 106.816635},
    {"name": "Kabupaten Bandung", "lat": -7.326221, "lon": 108.353737},
    {"name": "Kabupaten Garut", "lat": -6.816579, "lon": 107.142367},
    {"name": "Kabupaten Tasikmalaya", "lat": -6.872209, "lon": 107.542401},
    {"name": "Kabupaten Ciamis", "lat": -6.732023, "lon": 108.552316},
    {"name": "Kabupaten Kuningan", "lat": -6.402484, "lon": 106.794241},
    {"name": "Kabupaten Cirebon", "lat": -7.202967, "lon": 107.905548},
    {"name": "Kabupaten Majalengka", "lat": -6.327583, "lon": 108.324849},
    {"name": "Kabupaten Sumedang", "lat": -6.323338, "lon": 107.337579},
    {"name": "Kabupaten Indramayu", "lat": -6.979735, "lon": 108.483091},
    {"name": "Kabupaten Subang", "lat": -6.837654, "lon": 108.227136},
    {"name": "Kabupaten Purwakarta", "lat": -7.613702, "lon": 108.496964},
    {"name": "Kabupaten Karawang", "lat": -6.540802, "lon": 107.446556},
    {"name": "Kabupaten Bekasi", "lat": -6.575448, "lon": 107.761460},
    {"name": "Kabupaten Bandung Barat", "lat": -6.919320, "lon": 106.927506},
    {"name": "Kabupaten Pangandaran", "lat": -6.850743, "lon": 107.922792},
    {"name": "Kota Bogor", "lat": -7.350594, "lon": 108.217163},
    {"name": "Kota Sukabumi", "lat": -7.374949, "lon": 108.534549},
    {"name": "Kota Bandung", "lat": -6.872094, "lon": 107.548441},
    {"name": "Kota Cirebon", "lat": -6.248360, "lon": 107.132744},
    {"name": "Kota Bekasi", "lat": -6.472470, "lon": 106.819965},
    {"name": "Kota Depok", "lat": -6.813627, "lon": 107.142372},
    {"name": "Kota Cimahi", "lat": -6.731856, "lon": 108.552315},
    {"name": "Kota Tasikmalaya", "lat": -6.327583, "lon": 108.324849},
    {"name": "Kota Banjar", "lat": -6.919320, "lon": 106.927506}
]

# 33 parameters to be retrieved from NASA POWER
parameters = [
    "QV2M", "RH2M", "PRECTOTCORR", "PS", "WS2M", "WS2M_MAX", "WS2M_MIN", "WS2M_RANGE",
    "WD2M", "WS10M", "WS10M_MAX", "WS10M_MIN", "WS10M_RANGE", "WD10M", "GWETTOP", "GWETROOT",
    "GWETPROF", "ALLSKY_SFC_SW_DWN", "CLRSKY_SFC_SW_DWN", "TOA_SW_DWN"
    # "ALLSKY_SFC_PAR_TOT", "CLRSKY_SFC_PAR_TOT", "ALLSKY_SFC_UVA", "ALLSKY_SFC_UVB", "ALLSKY_SFC_UV_INDEX",
    # "T2M", "T2MDEW", "T2MWET", "TS", "T2M_RANGE", "T2M_MAX", "T2M_MIN", "ALLSKY_SFC_LW_DWN"
]

# URL NASA POWER API
url = "https://power.larc.nasa.gov/api/temporal/daily/point"

# Function to retrieve data from NASA POWER for one location+-
def fetch_nasa_power_data(lat, lon, start_date, end_date):
    params = {
        "start": start_date,
        "end": end_date,
        "latitude": lat,
        "longitude": lon,
        "community": "AG",
        "parameters": ",".join(parameters),
        "format": "JSON",
        "header": "true",
        "time-standard": "UTC"
    }

    response = requests.get(url, params=params)
    data = response.json()

    # Check if any data is received
    if "properties" in data and "parameter" in data["properties"]:
        df = pd.DataFrame(data["properties"]["parameter"])
        df["date"] = pd.to_datetime(df.index)
        return df
    else:
        print(f"Data not found for location: {lat}, {lon}")
        return pd.DataFrame()

# Start and end dates for the data to be retrieved (2018-2022)
start_date = "20180101"
end_date = "20221231"

# Retrieved data for all locations
all_data = []
for location in locations:
    print(f"Fetching data for {location['name']}...")
    data = fetch_nasa_power_data(location["lat"], location["lon"], start_date, end_date)
    data["location"] = location["name"]
    all_data.append(data)

# Merge data from all locations into one DataFrame
combined_data = pd.concat(all_data, ignore_index=True)

# Save dataframe to file CSV
combined_data.to_csv("nasa_power_data_jawa_barat.csv", index=False)

print("Data retrieval complete and saved to nasa_power_data_jawa_barat.csv")

Fetching data for Kabupaten Bogor...
Fetching data for Kabupaten Sukabumi...
Fetching data for Kabupaten Cianjur...
Fetching data for Kabupaten Bandung...
Fetching data for Kabupaten Garut...
Fetching data for Kabupaten Tasikmalaya...
Fetching data for Kabupaten Ciamis...
Fetching data for Kabupaten Kuningan...
Fetching data for Kabupaten Cirebon...
Fetching data for Kabupaten Majalengka...
Fetching data for Kabupaten Sumedang...
Fetching data for Kabupaten Indramayu...
Fetching data for Kabupaten Subang...
Fetching data for Kabupaten Purwakarta...
Fetching data for Kabupaten Karawang...
Fetching data for Kabupaten Bekasi...
Fetching data for Kabupaten Bandung Barat...
Fetching data for Kabupaten Pangandaran...
Fetching data for Kota Bogor...
Fetching data for Kota Sukabumi...
Fetching data for Kota Bandung...
Fetching data for Kota Cirebon...
Fetching data for Kota Bekasi...
Fetching data for Kota Depok...
Fetching data for Kota Cimahi...
Fetching data for Kota Tasikmalaya...
Fetching

In [2]:
df1 = pd.read_csv('nasa_power_data_jawa_barat.csv', sep=',')
df1.head()

Unnamed: 0,QV2M,RH2M,PRECTOTCORR,PS,WS2M,WS2M_MAX,WS2M_MIN,WS2M_RANGE,WD2M,WS10M,...,WS10M_RANGE,WD10M,GWETTOP,GWETROOT,GWETPROF,ALLSKY_SFC_SW_DWN,CLRSKY_SFC_SW_DWN,TOA_SW_DWN,date,location
0,14.95,83.88,2.04,90.69,1.28,1.98,0.91,1.07,195.31,1.98,...,1.59,195.31,0.89,0.92,0.95,22.32,26.36,38.13,2018-01-01,Kabupaten Bogor
1,16.36,88.88,11.46,90.71,1.2,2.31,0.52,1.8,196.81,1.75,...,2.02,197.31,0.9,0.92,0.95,20.59,25.91,38.14,2018-01-02,Kabupaten Bogor
2,16.17,91.12,12.68,90.77,1.04,1.55,0.78,0.77,232.44,1.61,...,1.13,232.56,0.9,0.92,0.95,19.6,25.28,38.16,2018-01-03,Kabupaten Bogor
3,15.75,93.31,15.63,90.86,2.08,2.97,1.02,1.95,233.81,3.05,...,2.05,233.75,0.91,0.95,0.96,14.55,25.47,38.17,2018-01-04,Kabupaten Bogor
4,15.81,89.88,39.75,90.86,1.97,2.52,0.9,1.62,244.12,2.83,...,1.95,243.75,0.93,0.96,0.97,12.47,25.62,38.18,2018-01-05,Kabupaten Bogor


In [3]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49302 entries, 0 to 49301
Data columns (total 22 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   QV2M               49302 non-null  float64
 1   RH2M               49302 non-null  float64
 2   PRECTOTCORR        49302 non-null  float64
 3   PS                 49302 non-null  float64
 4   WS2M               49302 non-null  float64
 5   WS2M_MAX           49302 non-null  float64
 6   WS2M_MIN           49302 non-null  float64
 7   WS2M_RANGE         49302 non-null  float64
 8   WD2M               49302 non-null  float64
 9   WS10M              49302 non-null  float64
 10  WS10M_MAX          49302 non-null  float64
 11  WS10M_MIN          49302 non-null  float64
 12  WS10M_RANGE        49302 non-null  float64
 13  WD10M              49302 non-null  float64
 14  GWETTOP            49302 non-null  float64
 15  GWETROOT           49302 non-null  float64
 16  GWETPROF           493

# for another 14 paramaters

In [32]:
import requests
import pandas as pd
from datetime import datetime

# List of 27 districts/cities in West Java Province with coordinates (latitude, longitude)
locations = [
    {"name": "Kabupaten Bogor", "lat": -6.917464, "lon": 107.619123},
    {"name": "Kabupaten Sukabumi", "lat": -6.238270, "lon": 106.975572},
    {"name": "Kabupaten Cianjur", "lat": -6.595038, "lon": 106.816635},
    {"name": "Kabupaten Bandung", "lat": -7.326221, "lon": 108.353737},
    {"name": "Kabupaten Garut", "lat": -6.816579, "lon": 107.142367},
    {"name": "Kabupaten Tasikmalaya", "lat": -6.872209, "lon": 107.542401},
    {"name": "Kabupaten Ciamis", "lat": -6.732023, "lon": 108.552316},
    {"name": "Kabupaten Kuningan", "lat": -6.402484, "lon": 106.794241},
    {"name": "Kabupaten Cirebon", "lat": -7.202967, "lon": 107.905548},
    {"name": "Kabupaten Majalengka", "lat": -6.327583, "lon": 108.324849},
    {"name": "Kabupaten Sumedang", "lat": -6.323338, "lon": 107.337579},
    {"name": "Kabupaten Indramayu", "lat": -6.979735, "lon": 108.483091},
    {"name": "Kabupaten Subang", "lat": -6.837654, "lon": 108.227136},
    {"name": "Kabupaten Purwakarta", "lat": -7.613702, "lon": 108.496964},
    {"name": "Kabupaten Karawang", "lat": -6.540802, "lon": 107.446556},
    {"name": "Kabupaten Bekasi", "lat": -6.575448, "lon": 107.761460},
    {"name": "Kabupaten Bandung Barat", "lat": -6.919320, "lon": 106.927506},
    {"name": "Kabupaten Pangandaran", "lat": -6.850743, "lon": 107.922792},
    {"name": "Kota Bogor", "lat": -7.350594, "lon": 108.217163},
    {"name": "Kota Sukabumi", "lat": -7.374949, "lon": 108.534549},
    {"name": "Kota Bandung", "lat": -6.872094, "lon": 107.548441},
    {"name": "Kota Cirebon", "lat": -6.248360, "lon": 107.132744},
    {"name": "Kota Bekasi", "lat": -6.472470, "lon": 106.819965},
    {"name": "Kota Depok", "lat": -6.813627, "lon": 107.142372},
    {"name": "Kota Cimahi", "lat": -6.731856, "lon": 108.552315},
    {"name": "Kota Tasikmalaya", "lat": -6.327583, "lon": 108.324849},
    {"name": "Kota Banjar", "lat": -6.919320, "lon": 106.927506}
]

# 33 parameters to be retrieved from NASA POWER
parameters = [
    # "QV2M", "RH2M", "PRECTOTCORR", "PS", "WS2M", "WS2M_MAX", "WS2M_MIN", "WS2M_RANGE",
    # "WD2M", "WS10M", "WS10M_MAX", "WS10M_MIN", "WS10M_RANGE", "WD10M", "GWETTOP", "GWETROOT",
    # "GWETPROF", "ALLSKY_SFC_SW_DWN", "CLRSKY_SFC_SW_DWN", "TOA_SW_DWN",
    "ALLSKY_SFC_PAR_TOT", "CLRSKY_SFC_PAR_TOT", "ALLSKY_SFC_UVA", "ALLSKY_SFC_UVB", "ALLSKY_SFC_UV_INDEX",
    "T2M", "T2MDEW", "T2MWET", "TS", "T2M_RANGE", "T2M_MAX", "T2M_MIN", "ALLSKY_SFC_LW_DWN"
]

# URL NASA POWER API
url = "https://power.larc.nasa.gov/api/temporal/daily/point"

# Function to retrieve data from NASA POWER for one location+-
def fetch_nasa_power_data(lat, lon, start_date, end_date):
    params = {
        "start": start_date,
        "end": end_date,
        "latitude": lat,
        "longitude": lon,
        "community": "AG",
        "parameters": ",".join(parameters),
        "format": "JSON",
        "header": "true",
        "time-standard": "UTC"
    }

    response = requests.get(url, params=params)
    data = response.json()

    # Check if any data is received
    if "properties" in data and "parameter" in data["properties"]:
        df = pd.DataFrame(data["properties"]["parameter"])
        df["date"] = pd.to_datetime(df.index)
        return df
    else:
        print(f"Data not found for location: {lat}, {lon}")
        return pd.DataFrame()

# Start and end dates for the data to be retrieved (2018-2022)
start_date = "20180101"
end_date = "20221231"

# Retrieved data for all locations
all_data = []
for location in locations:
    print(f"Fetching data for {location['name']}...")
    data = fetch_nasa_power_data(location["lat"], location["lon"], start_date, end_date)
    data["location"] = location["name"]
    all_data.append(data)

# Merge data from all locations into one DataFrame
combined_data = pd.concat(all_data, ignore_index=True)

# Save dataframe to file CSV
combined_data.to_csv("nasa_power_data_jawa_barat2.csv", index=False)

print("Data retrieval complete and saved to nasa_power_data_jawa_barat2.csv")

Fetching data for Kabupaten Bogor...
Fetching data for Kabupaten Sukabumi...
Fetching data for Kabupaten Cianjur...
Fetching data for Kabupaten Bandung...
Fetching data for Kabupaten Garut...
Fetching data for Kabupaten Tasikmalaya...
Fetching data for Kabupaten Ciamis...
Fetching data for Kabupaten Kuningan...
Fetching data for Kabupaten Cirebon...
Fetching data for Kabupaten Majalengka...
Fetching data for Kabupaten Sumedang...
Fetching data for Kabupaten Indramayu...
Fetching data for Kabupaten Subang...
Fetching data for Kabupaten Purwakarta...
Fetching data for Kabupaten Karawang...
Fetching data for Kabupaten Bekasi...
Fetching data for Kabupaten Bandung Barat...
Fetching data for Kabupaten Pangandaran...
Fetching data for Kota Bogor...
Fetching data for Kota Sukabumi...
Fetching data for Kota Bandung...
Fetching data for Kota Cirebon...
Fetching data for Kota Bekasi...
Fetching data for Kota Depok...
Fetching data for Kota Cimahi...
Fetching data for Kota Tasikmalaya...
Fetching

In [51]:
df2 = pd.read_csv('nasa_power_data_jawa_barat2.csv', sep=',')
df2.head()

Unnamed: 0,ALLSKY_SFC_PAR_TOT,CLRSKY_SFC_PAR_TOT,ALLSKY_SFC_UVA,ALLSKY_SFC_UVB,ALLSKY_SFC_UV_INDEX,T2M,T2MDEW,T2MWET,TS,T2M_RANGE,T2M_MAX,T2M_MIN,ALLSKY_SFC_LW_DWN,date,location
0,120.8,141.33,16.17,0.51,2.65,21.89,18.73,20.3,22.43,7.93,26.51,18.58,417.02,2018-01-01,Kabupaten Bogor
1,112.98,139.89,15.27,0.48,2.56,22.19,20.14,21.16,22.69,5.78,25.73,19.95,421.32,2018-01-02,Kabupaten Bogor
2,106.56,135.64,14.32,0.45,2.33,21.59,20.0,20.8,21.92,5.43,24.53,19.1,427.34,2018-01-03,Kabupaten Bogor
3,80.54,137.13,11.15,0.36,1.88,20.69,19.53,20.11,20.72,4.78,23.11,18.33,425.75,2018-01-04,Kabupaten Bogor
4,69.28,137.76,9.98,0.32,1.66,21.49,19.65,20.57,21.91,6.27,25.26,19.0,419.06,2018-01-05,Kabupaten Bogor


In [53]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49302 entries, 0 to 49301
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ALLSKY_SFC_PAR_TOT   49302 non-null  float64
 1   CLRSKY_SFC_PAR_TOT   49302 non-null  float64
 2   ALLSKY_SFC_UVA       49302 non-null  float64
 3   ALLSKY_SFC_UVB       49302 non-null  float64
 4   ALLSKY_SFC_UV_INDEX  49302 non-null  float64
 5   T2M                  49302 non-null  float64
 6   T2MDEW               49302 non-null  float64
 7   T2MWET               49302 non-null  float64
 8   TS                   49302 non-null  float64
 9   T2M_RANGE            49302 non-null  float64
 10  T2M_MAX              49302 non-null  float64
 11  T2M_MIN              49302 non-null  float64
 12  ALLSKY_SFC_LW_DWN    49302 non-null  float64
 13  date                 49302 non-null  object 
 14  location             49302 non-null  object 
dtypes: float64(13), object(2)
memory usa

# 2. Merge the two csv files

In [56]:
import pandas as pd

# Menggabungkan dua DataFrame berdasarkan 'date' dan 'location'
merged = pd.merge(df1, df2, on=['date', 'location'])

# Menyimpan DataFrame yang telah digabungkan ke file CSV baru
merged.to_csv('agroclimatology_data_of_Jawa_Barat.csv', index=False)

In [57]:
df3 = pd.read_csv('agroclimatology_data_of_Jawa_Barat.csv', sep=',')
df3.head()

Unnamed: 0,QV2M,RH2M,PRECTOTCORR,PS,WS2M,WS2M_MAX,WS2M_MIN,WS2M_RANGE,WD2M,WS10M,...,ALLSKY_SFC_UVB,ALLSKY_SFC_UV_INDEX,T2M,T2MDEW,T2MWET,TS,T2M_RANGE,T2M_MAX,T2M_MIN,ALLSKY_SFC_LW_DWN
0,14.95,83.88,2.04,90.69,1.28,1.98,0.91,1.07,195.31,1.98,...,0.51,2.65,21.89,18.73,20.3,22.43,7.93,26.51,18.58,417.02
1,16.36,88.88,11.46,90.71,1.2,2.31,0.52,1.8,196.81,1.75,...,0.48,2.56,22.19,20.14,21.16,22.69,5.78,25.73,19.95,421.32
2,16.17,91.12,12.68,90.77,1.04,1.55,0.78,0.77,232.44,1.61,...,0.45,2.33,21.59,20.0,20.8,21.92,5.43,24.53,19.1,427.34
3,15.75,93.31,15.63,90.86,2.08,2.97,1.02,1.95,233.81,3.05,...,0.36,1.88,20.69,19.53,20.11,20.72,4.78,23.11,18.33,425.75
4,15.81,89.88,39.75,90.86,1.97,2.52,0.9,1.62,244.12,2.83,...,0.32,1.66,21.49,19.65,20.57,21.91,6.27,25.26,19.0,419.06


# Make 'date' and 'locations' in column 1 and 2

In [61]:
# Get all columns from dataframe
cols = df3.columns.tolist()

# Remove 'date' and 'location' from column list
cols.remove('date')
cols.remove('location')

# Rechange column of list and put 'date' and 'location' to first column
cols = ['date', 'location'] + cols

# Rechange dataframe with new columns list
df3 = df3[cols]

In [63]:
df3.head()

Unnamed: 0,date,location,QV2M,RH2M,PRECTOTCORR,PS,WS2M,WS2M_MAX,WS2M_MIN,WS2M_RANGE,...,ALLSKY_SFC_UVB,ALLSKY_SFC_UV_INDEX,T2M,T2MDEW,T2MWET,TS,T2M_RANGE,T2M_MAX,T2M_MIN,ALLSKY_SFC_LW_DWN
0,2018-01-01,Kabupaten Bogor,14.95,83.88,2.04,90.69,1.28,1.98,0.91,1.07,...,0.51,2.65,21.89,18.73,20.3,22.43,7.93,26.51,18.58,417.02
1,2018-01-02,Kabupaten Bogor,16.36,88.88,11.46,90.71,1.2,2.31,0.52,1.8,...,0.48,2.56,22.19,20.14,21.16,22.69,5.78,25.73,19.95,421.32
2,2018-01-03,Kabupaten Bogor,16.17,91.12,12.68,90.77,1.04,1.55,0.78,0.77,...,0.45,2.33,21.59,20.0,20.8,21.92,5.43,24.53,19.1,427.34
3,2018-01-04,Kabupaten Bogor,15.75,93.31,15.63,90.86,2.08,2.97,1.02,1.95,...,0.36,1.88,20.69,19.53,20.11,20.72,4.78,23.11,18.33,425.75
4,2018-01-05,Kabupaten Bogor,15.81,89.88,39.75,90.86,1.97,2.52,0.9,1.62,...,0.32,1.66,21.49,19.65,20.57,21.91,6.27,25.26,19.0,419.06


In [65]:
df3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49302 entries, 0 to 49301
Data columns (total 35 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   date                 49302 non-null  object 
 1   location             49302 non-null  object 
 2   QV2M                 49302 non-null  float64
 3   RH2M                 49302 non-null  float64
 4   PRECTOTCORR          49302 non-null  float64
 5   PS                   49302 non-null  float64
 6   WS2M                 49302 non-null  float64
 7   WS2M_MAX             49302 non-null  float64
 8   WS2M_MIN             49302 non-null  float64
 9   WS2M_RANGE           49302 non-null  float64
 10  WD2M                 49302 non-null  float64
 11  WS10M                49302 non-null  float64
 12  WS10M_MAX            49302 non-null  float64
 13  WS10M_MIN            49302 non-null  float64
 14  WS10M_RANGE          49302 non-null  float64
 15  WD10M                49302 non-null 

# 4. Change from daily to montly data time

In [80]:
# Ubah kolom 'date' menjadi tipe data datetime
df3['date'] = pd.to_datetime(df3['date'], format='%Y-%m-%d')

# Mengatur 'date' sebagai index
df3.set_index('date', inplace=True)

# Inisialisasi DataFrame kosong untuk hasil akhir
df_monthly = pd.DataFrame()

# Loop melalui setiap kota unik dalam kolom 'location'
for location in df3['location'].unique():
    # Filter DataFrame untuk hanya baris dengan lokasi ini
    df_location = df3[df3['location'] == location]

    # Mengubah data harian menjadi data bulanan dengan mengambil rata-rata setiap bulan
    # Hanya kolom numerik yang akan dirata-ratakan
    df_location_monthly = df_location.select_dtypes(include=['float64', 'int64']).resample('M').mean()

    # Menambahkan kembali kolom 'location' ke df_location_monthly
    df_location_monthly['location'] = location

    # Menambahkan df_location_monthly ke df_monthly
    df_monthly = pd.concat([df_monthly, df_location_monthly])

# Mengatur ulang index
df_monthly.reset_index(inplace=True)

In [None]:
df3.info()

In [None]:
df_monthly.head()

In [None]:
# # Mengurutkan DataFrame berdasarkan tahun dalam kolom 'date'
df_monthly = df_monthly.sort_values(by='date')

In [None]:
df_monthly.info()

In [None]:
# Get all columns from dataframe
cols = df_monthly.columns.tolist()
cols.insert(1, cols.pop(cols.index('location')))

# Rechange dataframe with new columns list
df_monthly = df_monthly[cols]

In [None]:
# Buat list urutan kota
urutan_kota = [
    "Kabupaten Bogor",
    "Kabupaten Sukabumi",
    "Kabupaten Cianjur",
    "Kabupaten Bandung",
    "Kabupaten Garut",
    "Kabupaten Tasikmalaya",
    "Kabupaten Ciamis",
    "Kabupaten Kuningan",
    "Kabupaten Cirebon",
    "Kabupaten Majalengka",
    "Kabupaten Sumedang",
    "Kabupaten Indramayu",
    "Kabupaten Subang",
    "Kabupaten Purwakarta",
    "Kabupaten Karawang",
    "Kabupaten Bekasi",
    "Kabupaten Bandung Barat",
    "Kabupaten Pangandaran",
    "Kota Bogor",
    "Kota Sukabumi",
    "Kota Bandung",
    "Kota Cirebon",
    "Kota Bekasi",
    "Kota Depok",
    "Kota Cimahi",
    "Kota Tasikmalaya",
    "Kota Banjar"
]

# Buat kolom baru untuk urutan kota
df_monthly['urutan'] = pd.Categorical(df_monthly['location'], categories=urutan_kota, ordered=True)

# # Ubah kolom 'tanggal' menjadi tipe datetime
# df_monthly['date'] = pd.to_datetime(df_monthly['date'])

# # Set kolom 'tanggal' sebagai index
# df_monthly.set_index('date', inplace=True)

# # Resample data menjadi bulanan dan hitung rata-rata
# df_monthly = df_monthly.groupby('location').resample('M').mean().reset_index()

# Urutkan data berdasarkan urutan kota dan tanggal
df_monthly['urutan'] = pd.Categorical(df_monthly['location'], categories=urutan_kota, ordered=True)
df_monthly = df_monthly.sort_values(by=['date', 'urutan']).drop(columns=['urutan'])


In [None]:
df_monthly

Save dataframe to CSV file

In [None]:
# Menyimpan DataFrame yang telah diatur ulang ke file CSV
df_monthly.to_csv('agroclimatology_data_of_Jawa_Barat.csv', index=False)

# 5. Merge agroclimatologi with rice productions dataset

In [78]:
import pandas as pd
# Baca file CSV
df4 = pd.read_csv('agroclimatology_data_of_Jawa_Barat.csv', sep=',')
df5 = pd.read_csv('Padi Jawa Barat 2018 - 2022.csv', sep=';')

# Menggabungkan DataFrame berdasarkan susunan barisannya
merged = pd.concat([df4, df5], axis=1)

# Menyimpan hasil penggabungan ke file CSV baru
merged.to_csv('merged_file.csv', index=False)

In [None]:
merged.head()

In [None]:
merged.info()

In [None]:
df5.info()

In [None]:
df4.info()

In [None]:
print(df4['location'].nunique())
print(df5['location'].nunique())

In [None]:
print(df4['location'].duplicated().sum())
print(df5['location'].duplicated().sum())