In [1]:
import pandas as pd
import requests
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Task 1: Data Collection (from CSV file)

In [2]:
# Load environmental data from a CSV file
csv_file_path = "/content/environmental_data.csv"  # Replace with the actual path to your CSV file
environmental_data = pd.read_csv(csv_file_path)

# Task 2: Data Preprocessing

In [3]:
# Handle missing values by filling with the mean value of the column
environmental_data.fillna(environmental_data.mean(), inplace=True)

In [4]:
# Normalize pollution and weather readings (using MinMaxScaler for a range [0, 1])
scaler = MinMaxScaler()
environmental_data[['pm25', 'pm10', 'co2', 'temperature', 'humidity', 'wind_speed']] = scaler.fit_transform(
    environmental_data[['pm25', 'pm10', 'co2', 'temperature', 'humidity', 'wind_speed']]
)

# Task 3: Feature Engineering

In [5]:
# Create additional features for predictive modeling
# Example: Moving averages for PM2.5 and temperature to capture trends
environmental_data['pm25_ma'] = environmental_data['pm25'].rolling(window=3).mean()  # 3-period moving average
environmental_data['temp_ma'] = environmental_data['temperature'].rolling(window=3).mean()

In [6]:
# Example: Interaction term between humidity and temperature (might impact air quality)
environmental_data['humidity_temp_interaction'] = environmental_data['humidity'] * environmental_data['temperature']

In [7]:
# Example: Binary feature indicating whether wind speed is above a certain threshold (e.g., 5 m/s)
environmental_data['high_wind_speed'] = (environmental_data['wind_speed'] > 0.5).astype(int)

In [8]:
# Display the processed and engineered features
print(environmental_data[['sensor_id', 'pm25', 'pm10', 'co2', 'temperature', 'humidity', 'wind_speed',
                          'pm25_ma', 'temp_ma', 'humidity_temp_interaction', 'high_wind_speed']].head())

   sensor_id      pm25      pm10       co2  temperature  humidity  wind_speed  \
0          1  0.241758  0.253406  0.331126     0.425926  0.318182    0.277778   
1          2  0.802198  0.801090  0.768212     0.777778  0.090909    0.416667   
2          3  0.000000  0.000000  0.103753     0.129630  0.772727    0.000000   
3          4  0.516484  0.452316  1.000000     0.370370  0.545455    0.138889   
4          5  1.000000  1.000000  0.543046     0.685185  0.227273    0.750000   

    pm25_ma   temp_ma  humidity_temp_interaction  high_wind_speed  
0       NaN       NaN                   0.135522                0  
1       NaN       NaN                   0.070707                0  
2  0.347985  0.444444                   0.100168                0  
3  0.439560  0.425926                   0.202020                0  
4  0.505495  0.395062                   0.155724                1  


In [9]:
# Save the preprocessed data to a CSV file for further use in modeling
environmental_data.to_csv("preprocessed_environmental_data.csv", index=False)