In [13]:
# Import necessary libraries
import pandas as pd
import numpy as np

# Load the data
data = pd.read_csv('indoor_data.csv', parse_dates=['created_at'])

# Display the first few rows
print("Initial Data:")
print(data.head())

# Handle missing values (if any) using forward fill
data.ffill(inplace=True)

# Convert 'created_at' to datetime and set as index
data.set_index('created_at', inplace=True)

# 'field2' is the air quality index, convert it to numeric values
data['field2'] = pd.to_numeric(data['field2'], errors='coerce')

# Drop rows with NaN values in 'field2'
data.dropna(subset=['field2'], inplace=True)

# Display the cleaned data
print("Cleaned Data:")
print(data.head())

# Save the cleaned data to a new CSV file
cleaned_file_path = 'preprocessed_cleaned_indoor_data.csv'
data.to_csv(cleaned_file_path)

print(f"Cleaned data saved to {cleaned_file_path}")


Initial Data:
                 created_at  entry_id  field1  field2  field3  field4  field5  \
0 2024-04-06 12:00:27+05:30      1256     163    42.0    33.8    49.0   520.0   
1 2024-04-06 12:01:30+05:30      1257     162    37.0    33.8    49.0   637.0   
2 2024-04-06 12:02:32+05:30      1258     173    47.0    33.7    50.0   679.0   
3 2024-04-06 12:03:34+05:30      1259     168    37.0    33.3    51.0   539.0   
4 2024-04-06 12:04:36+05:30      1260     168    37.0    33.3    52.0   697.0   

   field6  field7  latitude  longitude  elevation  status  
0    18.0    21.0       NaN        NaN        NaN     NaN  
1    36.0     9.0       NaN        NaN        NaN     NaN  
2    42.0     3.0       NaN        NaN        NaN     NaN  
3    21.0     0.0       NaN        NaN        NaN     NaN  
4    45.0     0.0       NaN        NaN        NaN     NaN  
Cleaned Data:
                           entry_id  field1  field2  field3  field4  field5  \
created_at                                    