In [2]:
 # Suppress warnings
import warnings
warnings.filterwarnings('ignore')

# Visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Data manipulation and analysis
import numpy as np
import pandas as pd

# Feature preprocessing and data splitting
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Machine Learning with XGBoost
import xgboost as xgb
from sklearn.metrics import r2_score, mean_squared_error

In [3]:
# Load the datasets
water_quality_df = pd.read_csv('water_quality_training_dataset.csv')
terraclimate_df = pd.read_csv('terraclimate_features_training.csv')
landsat_df = pd.read_csv('landsat_features_training.csv')

# Merge on common columns: Latitude, Longitude, Sample Date
merged_df = pd.merge(water_quality_df, terraclimate_df, on=['Latitude', 'Longitude', 'Sample Date'], how='inner')
merged_df = pd.merge(merged_df, landsat_df, on=['Latitude', 'Longitude', 'Sample Date'], how='inner')

# Display the first few rows and shape to verify
print(merged_df.head())
print(merged_df.shape)

    Latitude  Longitude Sample Date  Total Alkalinity  Electrical Conductance  \
0 -28.760833  17.730278  02-01-2011           128.912                   555.0   
1 -26.861111  28.884722  03-01-2011            74.720                   162.9   
2 -26.450000  28.085833  03-01-2011            89.254                   573.0   
3 -27.671111  27.236944  03-01-2011            82.000                   203.6   
4 -27.356667  27.286389  03-01-2011            56.100                   145.1   

   Dissolved Reactive Phosphorus    pet      nir    green   swir16   swir22  \
0                           10.0  174.2  11190.0  11426.0   7687.5   7645.0   
1                          163.0  124.1  17658.5   9550.0  13746.5  10574.0   
2                           80.0  127.5  15210.0  10720.0  17974.0  14201.0   
3                          101.0  129.7  14887.0  10943.0  13522.0  11403.0   
4                          151.0  129.2  16828.5   9502.5  12665.5   9643.0   

       NDMI     MNDWI  
0  0.185538  0