In [1]:
import os
import pandas as pd
from skimage.io import imread  # Assuming you'll use skimage for reading Sentinel images
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

In [2]:
# Step 1: Load CSV data
csv_file = 'delhi_data_2019.csv'
df = pd.read_csv(csv_file)
print(df.head())

         date  no2Value  longitude   latitude regionName  day  month  year  \
0  2019-01-01  0.315386   77.30505  28.646846      Delhi    1      1  2019   
1  2019-01-02  0.274257   77.30505  28.646846      Delhi    2      1  2019   
2  2019-01-03  0.249579   77.30505  28.646846      Delhi    3      1  2019   
3  2019-01-04  0.248769   77.30505  28.646846      Delhi    4      1  2019   
4  2019-01-05  0.190129   77.30505  28.646846      Delhi    5      1  2019   

   weekday  
0        1  
1        2  
2        3  
3        4  
4        5  


In [3]:
# Step 2: Load Sentinel images
image_folder = r"C:\Users\ashis\Desktop\Anjali\ImageData\2019\Delhi_NO2"

def load_sentinel_image(date):
    image_name = f'DelhiNO2{date.strftime("%Y-%m-%d")}.tif'
    image_path = os.path.join(image_folder, image_name)
    try:
        image = imread(image_path)
        # Perform any preprocessing or feature extraction here
        return image
    except FileNotFoundError:
        print(f"Warning: File not found - {image_path}")
        return None  # Or handle missing image case as per your requirement

# Convert 'year', 'month', 'day' columns to datetime
df['date'] = pd.to_datetime(df[['year', 'month', 'day']])

# Apply load_sentinel_image function to create 'image' column
df['image'] = df['date'].apply(lambda x: load_sentinel_image(x))



In [4]:
# Step 3: Feature Engineering
def calculate_image_stats(image):
    if image is not None:
        # Calculate statistics from the image (example: mean, standard deviation, etc.)
        mean_value = np.mean(image)
        std_value = np.std(image)
        max_value = np.max(image)
        min_value = np.min(image)
        return mean_value, std_value, max_value, min_value
    else:
        return None, None, None, None

# Apply feature extraction to each image and create new columns
df['mean_pixel'], df['std_pixel'], df['max_pixel'], df['min_pixel'] = zip(*df['image'].apply(calculate_image_stats))

In [5]:
# Step 4: Prepare data for model training
X = df[['mean_pixel', 'std_pixel', 'max_pixel', 'min_pixel']]  # Features used for prediction
y = df['no2Value']  # Target variable

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Step 5: Train a regression model (Example: Random Forest Regressor)
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=5, scoring='r2')
grid_search.fit(X_train, y_train)

print("Best parameters found by GridSearchCV:")
print(grid_search.best_params_)

# Use the best model from GridSearchCV
best_model = grid_search.best_estimator_

Best parameters found by GridSearchCV:
{'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 100}


In [7]:
# Step 6: Evaluate the model
y_pred_train = best_model.predict(X_train)
train_r2 = r2_score(y_train, y_pred_train)
print(f"Training R2 Score: {train_r2}")

y_pred_test = best_model.predict(X_test)
test_r2 = r2_score(y_test, y_pred_test)
print(f"Testing R2 Score: {test_r2}")

# Additional evaluation metrics (RMSE)
train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train))
test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
print(f"Training RMSE: {train_rmse}")
print(f"Testing RMSE: {test_rmse}")

Training R2 Score: 0.6172050546529642
Testing R2 Score: 0.35261255773395817
Training RMSE: 0.033260382414128514
Testing RMSE: 0.046510780077743084


In [11]:
# Step 7: Predict NO2 value for a specific date
date_of_interest = pd.to_datetime('2019-01-01')
image_of_interest = load_sentinel_image(date_of_interest)

if image_of_interest is not None:
    mean_val, std_val, max_val, min_val = calculate_image_stats(image_of_interest.reshape(-1))  # Reshape image for stats
    prediction_input = pd.DataFrame({
        'mean_pixel': [mean_val],
        'std_pixel': [std_val],
        'max_pixel': [max_val],
        'min_pixel': [min_val]
    })

    predicted_no2 = best_model.predict(prediction_input)
    print(f"Predicted NO2 value for {date_of_interest.date()}: {predicted_no2[0]}")
else:
    print(f"No image found for {date_of_interest.date()}. Prediction cannot be made.")

Predicted NO2 value for 2019-01-01: 0.2765728429128043


# This is second trial of another model having r2 score of 2 👇

In [25]:
import os
import pandas as pd
from skimage.io import imread  # Assuming you'll use skimage for reading Sentinel images
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

# Step 1: Load CSV data
csv_file = 'delhi_data_2019.csv'
df = pd.read_csv(csv_file)

# Step 2: Load Sentinel images
image_folder = r"C:\Users\ashis\Desktop\Anjali\ImageData\2019\Delhi_NO2"

def load_sentinel_image(date):
    image_name = f'DelhiNO2{date.strftime("%Y-%m-%d")}.tif'
    image_path = os.path.join(image_folder, image_name)
    try:
        image = imread(image_path)
        # Perform any preprocessing or feature extraction here
        return image
    except FileNotFoundError:
        print(f"Warning: File not found - {image_path}")
        return None  # Or handle missing image case as per your requirement

# Convert 'year', 'month', 'day' columns to datetime
df['date'] = pd.to_datetime(df[['year', 'month', 'day']])

# Apply load_sentinel_image function to create 'image' column
df['image'] = df['date'].apply(lambda x: load_sentinel_image(x))

# Step 3: Feature Engineering (Example: Calculate mean pixel value as a feature)
def calculate_mean_pixel(image):
    if image is not None:
        return image.mean()  # Example feature extraction, modify as per your requirement
    else:
        return None

df['mean_pixel_value'] = df['image'].apply(calculate_mean_pixel)

# Step 4: Prepare data for model training
X = df[['mean_pixel_value']]  # Feature(s) used for prediction
y = df['no2Value']  # Target variable

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train a regression model (Example: Random Forest Regressor)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Step 6: Evaluate the model
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
print(f"R2 Score: {r2}")

# Step 7: Optionally, use the trained model for predictions on new data
# Example:
new_data = pd.DataFrame({'mean_pixel_value': [100.0, 120.0, 90.0]})
predictions = model.predict(new_data)
print("Predictions:", predictions)


R2 Score: 0.0963632540273317
Predictions: [0.1678295  0.24421542 0.20423371]


# Till this was 2 model completed