In [25]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [26]:
# Load the dataset
df = pd.read_csv('IoTProcessed_Data.csv')

In [27]:
# Display basic information about the dataset
print("Dataset Information:")
print(df.info())

Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37922 entries, 0 to 37921
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   date                     37920 non-null  object 
 1   tempreature              37922 non-null  int64  
 2   humidity                 37922 non-null  int64  
 3   water_level              37922 non-null  int64  
 4   N                        37922 non-null  int64  
 5   P                        37922 non-null  int64  
 6   K                        37922 non-null  int64  
 7   Fan_actuator_OFF         37922 non-null  float64
 8   Fan_actuator_ON          37922 non-null  float64
 9   Watering_plant_pump_OFF  37922 non-null  float64
 10  Watering_plant_pump_ON   37922 non-null  float64
 11  Water_pump_actuator_OFF  37922 non-null  float64
 12  Water_pump_actuator_ON   37922 non-null  float64
dtypes: float64(6), int64(6), object(1)
memory usage: 3.8+ M

In [28]:
# Check for missing values
print("\nMissing Values:")
print(df.isnull().sum())


Missing Values:
date                       2
tempreature                0
humidity                   0
water_level                0
N                          0
P                          0
K                          0
Fan_actuator_OFF           0
Fan_actuator_ON            0
Watering_plant_pump_OFF    0
Watering_plant_pump_ON     0
Water_pump_actuator_OFF    0
Water_pump_actuator_ON     0
dtype: int64


In [29]:
print(df.head())

                  date  tempreature  humidity  water_level    N    P    K  \
0  2024-02-08 06:10:00           41        63          100  255  255  255   
1  2024-02-08 06:15:00           41        59          100  255  255  255   
2  2024-02-08 06:20:00           41        62          100  255  255  255   
3  2024-02-08 06:05:00           40        60          100  255  255  255   
4  2024-02-08 06:00:00           39        61          100  255  255  255   

   Fan_actuator_OFF  Fan_actuator_ON  Watering_plant_pump_OFF  \
0               0.0              1.0                      1.0   
1               0.0              1.0                      1.0   
2               0.0              1.0                      1.0   
3               0.0              1.0                      1.0   
4               0.0              1.0                      1.0   

   Watering_plant_pump_ON  Water_pump_actuator_OFF  Water_pump_actuator_ON  
0                     0.0                      1.0                   

In [30]:
# Convert 'date' column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Calculate the average time difference between consecutive timestamps
time_diff = df['date'].diff().mean()

# Fill missing 'date' values based on average time difference
df['date'] = df['date'].fillna(method='ffill', limit=1)
df['date'] = df['date'].fillna(method='bfill', limit=1)

In [31]:
# Check for missing values after cleaning.
print("\nMissing Values:")
print(df.isnull().sum())


Missing Values:
date                       0
tempreature                0
humidity                   0
water_level                0
N                          0
P                          0
K                          0
Fan_actuator_OFF           0
Fan_actuator_ON            0
Watering_plant_pump_OFF    0
Watering_plant_pump_ON     0
Water_pump_actuator_OFF    0
Water_pump_actuator_ON     0
dtype: int64


In [32]:
# Save the cleaned dataset to a new CSV file
df.to_csv('Cleaned_IoTProcessed_Data.csv', index=False)

In [33]:
#  Display information about the cleaned dataset
print("\nCleaned Dataset Information:")
print(df.info())


Cleaned Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37922 entries, 0 to 37921
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   date                     37922 non-null  datetime64[ns]
 1   tempreature              37922 non-null  int64         
 2   humidity                 37922 non-null  int64         
 3   water_level              37922 non-null  int64         
 4   N                        37922 non-null  int64         
 5   P                        37922 non-null  int64         
 6   K                        37922 non-null  int64         
 7   Fan_actuator_OFF         37922 non-null  float64       
 8   Fan_actuator_ON          37922 non-null  float64       
 9   Watering_plant_pump_OFF  37922 non-null  float64       
 10  Watering_plant_pump_ON   37922 non-null  float64       
 11  Water_pump_actuator_OFF  37922 non-null  float64       
 12  Wa

In [34]:
# Check for missing values
print("\nMissing Values:")
print(df.isnull().sum())


Missing Values:
date                       0
tempreature                0
humidity                   0
water_level                0
N                          0
P                          0
K                          0
Fan_actuator_OFF           0
Fan_actuator_ON            0
Watering_plant_pump_OFF    0
Watering_plant_pump_ON     0
Water_pump_actuator_OFF    0
Water_pump_actuator_ON     0
dtype: int64


In [35]:
# Summary report
print("\nSummary Report:")
print("Original dataset size:", len(pd.read_csv('IoTProcessed_Data.csv')))
print("Cleaned dataset size:", len(df))


Summary Report:
Original dataset size: 37922
Cleaned dataset size: 37922


In [36]:
# Optional: Display a sample of the cleaned dataset
print("\nSample of Cleaned Dataset:")
print(df.head())


Sample of Cleaned Dataset:
                 date  tempreature  humidity  water_level    N    P    K  \
0 2024-02-08 06:10:00           41        63          100  255  255  255   
1 2024-02-08 06:15:00           41        59          100  255  255  255   
2 2024-02-08 06:20:00           41        62          100  255  255  255   
3 2024-02-08 06:05:00           40        60          100  255  255  255   
4 2024-02-08 06:00:00           39        61          100  255  255  255   

   Fan_actuator_OFF  Fan_actuator_ON  Watering_plant_pump_OFF  \
0               0.0              1.0                      1.0   
1               0.0              1.0                      1.0   
2               0.0              1.0                      1.0   
3               0.0              1.0                      1.0   
4               0.0              1.0                      1.0   

   Watering_plant_pump_ON  Water_pump_actuator_OFF  Water_pump_actuator_ON  
0                     0.0                      