## **1. Impor Library**

In [None]:
import pandas as pd
import numpy as np
from statistics import mode
import os
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

## **2. Collecting Data**

### **2.1 Data Cuaca**

In [None]:
# Upload 2 CSV
df1 = pd.read_csv('/content/microclimate3_04_2025.csv')
df2 = pd.read_csv('/content/microclimate3_05_2025.csv')

# Gabung keduanya jadi satu DataFrame
df = pd.concat([df1, df2], ignore_index=True)


In [None]:
df.head()

In [None]:
df.tail(5)

In [None]:
df = df.drop(['direction','angle','wind_speed'], axis=1)

In [None]:
# Pastikan kolom timestamp jadi datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Buat rentang tanggal
start_date = '2025-04-24'
end_date = '2025-05-07'

# Filter data berdasarkan rentang tanggal
filtered_df = df[(df['timestamp'] >= start_date) & (df['timestamp'] <= end_date)]

# Lihat hasil
filtered_df.head()


In [None]:
# Pastikan timestamp dalam datetime
filtered_df['timestamp'] = pd.to_datetime(filtered_df['timestamp'])

# Buat rentang waktu lengkap per menit
full_range = pd.date_range(start='2025-04-24 00:00:00', end='2025-05-07 23:59:00', freq='T')
full_df = pd.DataFrame({'timestamp': full_range})

# Merge dengan data asli, supaya waktu yang tidak ada tetap muncul sebagai NaN
merged_df2 = pd.merge(full_df, filtered_df, on='timestamp', how='left')

# Lihat hasil
print(merged_df2.head())


In [None]:
merged_df2.head()

### **2.2 Data CO2**

In [None]:
import pandas as pd

# Baca semua file CSV yang sudah di-upload ke /content/
df1 = pd.read_csv('/content/gas2_20250424.csv')
df2 = pd.read_csv('/content/gas2_20250425.csv')
df3 = pd.read_csv('/content/gas2_20250426.csv')
df4 = pd.read_csv('/content/gas2_20250427.csv')
df5 = pd.read_csv('/content/gas2_20250428.csv')
df6 = pd.read_csv('/content/gas2_20250429.csv')
df7 = pd.read_csv('/content/gas2_20250430.csv')
df8 = pd.read_csv('/content/gas2_20250501.csv')
df9 = pd.read_csv('/content/gas2_20250502.csv')
df10 = pd.read_csv('/content/gas2_20250503.csv')
df11 = pd.read_csv('/content/gas2_20250504.csv')
df12 = pd.read_csv('/content/gas2_20250505.csv')
df13 = pd.read_csv('/content/gas2_20250506.csv')
df14 = pd.read_csv('/content/gas2_20250507.csv')

# Gabung semua jadi satu DataFrame
combined_df = pd.concat([
    df1, df2, df3, df4, df5, df6, df7,
    df8, df9, df10, df11, df12, df13, df14
], ignore_index=True)

In [None]:
import pandas as pd
import glob
import os

# Path folder di Google Drive
folder_path = '/content/drive/MyDrive/CO2'  # Sesuaikan jika lokasi berbeda
file_list = sorted(os.listdir(folder_path))  # Pastikan urut

# Cek apakah folder ada
if not os.path.exists(folder_path):
    print("Folder tidak ditemukan:", folder_path)
else:
    # Lihat isi folder
    print("Isi folder:", os.listdir(folder_path))

    # Ambil semua file .csv
    all_csv_files = glob.glob(os.path.join(folder_path, "*.csv"))
    print("File CSV ditemukan:", all_csv_files)

    if all_csv_files:
        combined_df = pd.concat([pd.read_csv(f) for f in all_csv_files], ignore_index=True)

        # Simpan hasil gabungan ke folder COLLECTING-DATA
        output_path = '/content/drive/MyDrive/COLLECTING-DATA/hasil_gabungan.csv'
        os.makedirs(os.path.dirname(output_path), exist_ok=True)  # Buat folder jika belum ada
        combined_df.to_csv(output_path, index=False)
        print(f"File hasil gabungan csv telah tersimpan dengan nama: '{os.path.basename(output_path)}' di folder: '{os.path.dirname(output_path)}'")
    else:
        print("Tidak ada file CSV ditemukan di folder.")


Isi folder: ['gas2_20250501.csv', 'gas2_20250430.csv', 'gas2_20250503.csv', 'gas2_20250507.csv', 'gas2_20250429.csv', 'gas2_20250428.csv', 'gas2_20250504.csv', 'gas2_20250502.csv', 'gas2_20250425.csv', 'gas2_20250424.csv', 'gas2_20250506.csv', 'gas2_20250427.csv', 'gas2_20250426.csv', 'gas2_20250505.csv']
File CSV ditemukan: ['/content/drive/MyDrive/CO2/gas2_20250501.csv', '/content/drive/MyDrive/CO2/gas2_20250430.csv', '/content/drive/MyDrive/CO2/gas2_20250503.csv', '/content/drive/MyDrive/CO2/gas2_20250507.csv', '/content/drive/MyDrive/CO2/gas2_20250429.csv', '/content/drive/MyDrive/CO2/gas2_20250428.csv', '/content/drive/MyDrive/CO2/gas2_20250504.csv', '/content/drive/MyDrive/CO2/gas2_20250502.csv', '/content/drive/MyDrive/CO2/gas2_20250425.csv', '/content/drive/MyDrive/CO2/gas2_20250424.csv', '/content/drive/MyDrive/CO2/gas2_20250506.csv', '/content/drive/MyDrive/CO2/gas2_20250427.csv', '/content/drive/MyDrive/CO2/gas2_20250426.csv', '/content/drive/MyDrive/CO2/gas2_20250505.csv']


In [None]:
combined_df.head()

Unnamed: 0,timestamp,bmp388Pressure,bmp388Temp,bmp388ApprxAltitude,sht85Humi,sht85Temp,co2,ch4,H2OSHT85
0,2025-05-01T00:06:01.352,78.55744934,24.05245972,8.39351368,0,24.52734947,562,0,0
1,2025-05-01T00:06:01.431,101.2254639,34.62210083,8.124289513,0,24.51399803,562,0,0
2,2025-05-01T00:06:01.593,101.2283783,34.64136505,7.740341663,0,24.52734947,562,0,0
3,2025-05-01T00:06:01.755,101.2335129,34.6588707,7.355905056,0,24.51399803,562,0,0
4,2025-05-01T00:06:01.917,101.2375488,34.67176819,7.06406498,0,24.45792198,562,0,0


In [None]:
# Menyimpan hanya kolom 'timestamp' dan 'co2'
combined_df = combined_df[['timestamp', 'co2']]

# Menampilkan hasil
combined_df.head(627)

Unnamed: 0,timestamp,co2
0,2025-05-01T00:06:01.352,562
1,2025-05-01T00:06:01.431,562
2,2025-05-01T00:06:01.593,562
3,2025-05-01T00:06:01.755,562
4,2025-05-01T00:06:01.917,562
...,...,...
622,2025-05-01T00:07:42.231,561
623,2025-05-01T00:07:42.394,561
624,2025-05-01T00:07:42.556,561
625,2025-05-01T00:07:42.718,561


In [None]:
# Tampilkan baris yang memiliki timestamp atau co2 bernilai NaN
nan_rows = combined_df[combined_df['co2'].isna()]

# Tampilkan hanya kolom timestamp dan co2
print("Jumlah baris dengan timestamp atau co2 NaN:", len(nan_rows))
display(nan_rows[['timestamp', 'co2']])

Jumlah baris dengan timestamp atau co2 NaN: 5587


Unnamed: 0,timestamp,co2
777,61,
1305,2025-05-01T00:09:32.930,
2089,2025-05-01T00:11:39.990,
2870,2025-05-,
2871,"01T00:13:46.564,101.2220078,34.63520813,8.4730...",
...,...,...
2361565,2025-05-05T23:30:59.459,
2362353,2025-05-05T23:33:07.302,
2362354,6165,
2363142,2025-05-05T23:35:15.300,


In [None]:
from statistics import mode
import pandas as pd

# Pastikan kolom timestamp sudah dalam format datetime
combined_df['timestamp'] = pd.to_datetime(combined_df['timestamp'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')

# Buat kolom 'minute' yang menyimpan tanggal + jam + menit
combined_df['minute'] = combined_df['timestamp'].dt.floor('T')  # floor to nearest minute

# Hitung modus CO2 per menit
df_perminute = combined_df.groupby('minute')['co2'].agg(lambda x: mode(x)).reset_index()

# Tampilkan hasil
print(df_perminute.head(20000))

  combined_df['minute'] = combined_df['timestamp'].dt.floor('T')  # floor to nearest minute


                  minute  co2
0    2025-04-24 00:06:00  519
1    2025-04-24 00:07:00  519
2    2025-04-24 00:14:00  573
3    2025-04-24 00:15:00  582
4    2025-04-24 00:16:00  581
...                  ...  ...
7727 2025-05-07 22:32:00  576
7728 2025-05-07 22:33:00  580
7729 2025-05-07 22:34:00  583
7730 2025-05-07 22:35:00  581
7731 2025-05-07 22:36:00  580

[7732 rows x 2 columns]


In [None]:
import pandas as pd
import numpy as np

# Pastikan timestamp datetime & co2 numerik
combined_df['timestamp'] = pd.to_datetime(combined_df['timestamp'])
combined_df['co2'] = pd.to_numeric(combined_df['co2'], errors='coerce')

# Buat kolom 'minute'
combined_df['minute'] = combined_df['timestamp'].dt.floor('min')

# Ambil tanggal unik dari data
all_dates = combined_df['minute'].dt.date.unique()

# Buat semua kombinasi jam 00:00 s/d 23:59 untuk setiap tanggal
full_minutes = pd.date_range(
    start=combined_df['minute'].min().floor('D'),
    end=combined_df['minute'].max().ceil('D') - pd.Timedelta(minutes=1),
    freq='1min'
)
full_minutes_df = pd.DataFrame({'minute': full_minutes})

# Hitung modus per menit
minute_co2 = combined_df.groupby('minute')['co2'].agg(lambda x: x.mode().iloc[0] if not x.mode().empty else np.nan).reset_index()

# Merge data lengkap
merged_df = full_minutes_df.merge(minute_co2, on='minute', how='left')

# Cek hasil
print(merged_df.head(10))
print(merged_df.tail(10))

               minute    co2
0 2025-04-24 00:00:00    NaN
1 2025-04-24 00:01:00    NaN
2 2025-04-24 00:02:00    NaN
3 2025-04-24 00:03:00    NaN
4 2025-04-24 00:04:00    NaN
5 2025-04-24 00:05:00    NaN
6 2025-04-24 00:06:00  519.0
7 2025-04-24 00:07:00  519.0
8 2025-04-24 00:08:00    NaN
9 2025-04-24 00:09:00    NaN
                   minute  co2
20150 2025-05-07 23:50:00  NaN
20151 2025-05-07 23:51:00  NaN
20152 2025-05-07 23:52:00  NaN
20153 2025-05-07 23:53:00  NaN
20154 2025-05-07 23:54:00  NaN
20155 2025-05-07 23:55:00  NaN
20156 2025-05-07 23:56:00  NaN
20157 2025-05-07 23:57:00  NaN
20158 2025-05-07 23:58:00  NaN
20159 2025-05-07 23:59:00  NaN


In [None]:
print("Jumlah total data:", len(merged_df))

Jumlah total data: 20160


In [None]:
# Simpan hasil gabungan langsung ke /content/
output_path = 'co2_per_minute.csv'
df.to_csv(output_path, index=False)

# Konfirmasi penyimpanan
print(f"File hasil sampling per-menit telah tersimpan dengan nama: '{output_path}' di folder: '/content/'")

File hasil sampling per-menit telah tersimpan dengan nama: 'co2_per_minute.csv' di folder: '/content/'


### **2.3 Merge Data**

In [None]:
from google.colab import drive
drive.mount('/content/drive')
# Pindah ke folder utama proyek
%cd /content/drive/MyDrive/DBS/

Mounted at /content/drive
/content/drive/MyDrive/DBS


In [None]:
# Baca file CO2 dan cuaca dengan nama yang benar
co2_df = pd.read_csv('COLLECTING-DATA/co2_per_minute.csv')
cuaca_df = pd.read_csv('COLLECTING-DATA/cuaca_per_menit.csv')

In [None]:
# Ubah kolom 'minute' di co2_df menjadi datetime
co2_df['minute'] = pd.to_datetime(co2_df['minute'])

# Pastikan kolom 'timestamp' di cuaca_df sudah dalam format datetime
cuaca_df['timestamp'] = pd.to_datetime(cuaca_df['timestamp'])

# Gabungkan kedua DataFrame berdasarkan waktu (minute dan timestamp)
merged_df = pd.merge(co2_df, cuaca_df, left_on='minute', right_on='timestamp', how='left')

# Cek hasil merge
print(merged_df.head())

               minute  co2           timestamp  temperature  humidity  \
0 2025-04-24 00:00:00  NaN 2025-04-24 00:00:00       24.450     100.0   
1 2025-04-24 00:01:00  NaN 2025-04-24 00:01:00          NaN       NaN   
2 2025-04-24 00:02:00  NaN 2025-04-24 00:02:00          NaN       NaN   
3 2025-04-24 00:03:00  NaN 2025-04-24 00:03:00          NaN       NaN   
4 2025-04-24 00:04:00  NaN 2025-04-24 00:04:00       24.469     100.0   

   rainfall  pyrano  
0       0.0     0.0  
1       NaN     NaN  
2       NaN     NaN  
3       NaN     NaN  
4       0.0     0.0  


In [None]:
# Hapus kolom 'timestamp' yang sudah ada
merged_df = merged_df.drop(columns=['timestamp'])

# Ubah kolom 'minute' menjadi 'timestamp'
merged_df = merged_df.rename(columns={'minute': 'timestamp'})

# Cek hasilnya
print(merged_df.head())

            timestamp  co2  temperature  humidity  rainfall  pyrano
0 2025-04-24 00:00:00  NaN       24.450     100.0       0.0     0.0
1 2025-04-24 00:01:00  NaN          NaN       NaN       NaN     NaN
2 2025-04-24 00:02:00  NaN          NaN       NaN       NaN     NaN
3 2025-04-24 00:03:00  NaN          NaN       NaN       NaN     NaN
4 2025-04-24 00:04:00  NaN       24.469     100.0       0.0     0.0


In [None]:
# Simpan hasil gabungan ke folder COLLECTING-DATA
output_path = 'COLLECTING-DATA/data_output_collecting.csv'
merged_df.to_csv(output_path, index=False)

# Konfirmasi penyimpanan
print(f"File data telah tersimpan dengan nama: '{os.path.basename(output_path)}' di folder: '{os.path.dirname(output_path)}'")

File data telah tersimpan dengan nama: 'data_output_collecting.csv' di folder: 'COLLECTING-DATA'
