<a href="https://colab.research.google.com/github/satria-mitra/rr-fklim-awscenter/blob/main/rr_fklim_awscenter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
import pandas as pd
import os
from datetime import datetime, timedelta

# Mount Google Drive
drive.mount('/content/drive')

# Define paths
station_sources = {
    'ARG': '/content/drive/My Drive/Weather_Station_Data/ARG',
    'AWS': '/content/drive/My Drive/Weather_Station_Data/AWS',
    'AAWS': '/content/drive/My Drive/Weather_Station_Data/AAWS'
}

output_file = '/content/drive/My Drive/Weather_Station_Data/processed-ch.csv'

# Collect results
all_rows = []
for station_type, base_dir in station_sources.items():
  for date in pd.date_range(start='2020-01-01', end='2024-12-01', freq='MS'):
      year = date.year
      month = date.month
      month_dir = f"{base_dir}/{year}/{str(month).zfill(2)}"
      if not os.path.exists(month_dir):
          print(f"Skipping missing: {month_dir}")
          continue

      files = [f for f in os.listdir(month_dir) if f.endswith('.csv')]
      for file in files:
          file_path = os.path.join(month_dir, file)
          try:
              df = pd.read_csv(file_path, on_bad_lines='skip')
              df.columns = df.columns.str.strip()

              if 'Tanggal' not in df.columns or 'rr' not in df.columns:
                  print(f"Missing required columns in {file}")
                  continue

              df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', utc=True)
              df.sort_values('Tanggal', inplace=True)
              df.set_index('Tanggal', inplace=True)

              if 'Nama Sta' in df.columns:
                  station_name = df['Nama Sta'].dropna().iloc[0].strip()
              else:
                  station_name = os.path.splitext(file)[0]  # fallback if column not present
              row_data = {'name_arg': station_name, 'Tahun': year, 'Bulan': month}

              for day in range(1, 32):
                  try:
                      if station_name.lower().strip() == "arg kuansing" or station_name.lower().strip() == "arg kuantan tengah":
                          target_dt = pd.Timestamp(datetime(year, month, day, 23, 50), tz='UTC')
                      else:
                          target_dt = pd.Timestamp(datetime(year, month, day, 0, 0), tz='UTC')
                  except ValueError:
                      continue  # skip invalid days

                  rr_value = None
                  if target_dt in df.index:
                      val = df.loc[target_dt, 'rr']
                      if pd.notna(val):
                          rr_value = val

                  if rr_value is None:
                      window_start = target_dt - timedelta(hours=23)
                      fallback_df = df.loc[window_start:target_dt - timedelta(seconds=1)]
                      fallback_df = fallback_df[~fallback_df['rr'].isna()].sort_index(ascending=False)
                      if not fallback_df.empty:
                          rr_value = fallback_df.iloc[0]['rr']

                  # 🛠️ Here is the important adjustment:
                  store_day = day if station_name.lower().strip() != "arg kuansing" and station_name.lower().strip() != "arg kuantan tengah" else day + 1

                  if store_day <= 31:
                      row_data[str(store_day)] = rr_value if rr_value is not None else 9999



              all_rows.append(row_data)

          except Exception as e:
              print(f"Error in file {file_path}: {e}")

# Final output
final_df = pd.DataFrame(all_rows)

# Ensure all day columns exist
for d in range(1, 32):
    col = str(d)
    if col not in final_df.columns:
        final_df[col] = None

# Order columns
ordered_cols = ['name_arg', 'Tahun', 'Bulan'] + [str(d) for d in range(1, 32)]
final_df = final_df[ordered_cols]

# Save to CSV
final_df.to_csv(output_file, index=False)
print(f"✅ Saved to: {output_file}")


Mounted at /content/drive
Missing required columns in 2020_03_arg_rokan_iv_koto.csv
Missing required columns in 2020_04_arg_rokan_iv_koto.csv
Missing required columns in 2020_05_arg_rokan_iv_koto.csv
Missing required columns in 2020_06_arg_rokan_iv_koto.csv
Missing required columns in 2020_06_arg_pusako.csv
Missing required columns in 2020_07_arg_pusako.csv
Missing required columns in 2020_07_arg_rokan_iv_koto.csv
Missing required columns in 2020_09_arg_bantan.csv
Missing required columns in 2020_09_arg_tambusai.csv
Missing required columns in 2020_09_arg_teluk_meranti.csv
Missing required columns in 2020_10_arg_rokan_iv_koto.csv
Missing required columns in 2020_10_arg_tambusai.csv
Missing required columns in 2020_11_arg_rokan_iv_koto.csv
Missing required columns in 2020_11_arg_tambusai.csv
Missing required columns in 2020_11_arg_kuantan_tengah.csv
Missing required columns in 2020_12_arg_kuantan_tengah.csv
Missing required columns in 2020_12_arg_rokan_iv_koto.csv
Missing required colum