In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# Load processed data
df = pd.read_csv("Processed_AOD_PM25.csv")

# Mapping location codes to actual names
location_names = {
    0: "Bollaram Industrial Area, Hyderabad - TSPCB",
    1: "Central University, Hyderabad - TSPCB",
    2: "ICRISAT Patancheru, Hyderabad - TSPCB",
    3: "IDA Pashamylaram, Hyderabad - TSPCB",
    4: "Sanathnagar, Hyderabad - TSPCB",
    5: "Zoo Park, Hyderabad - TSPCB"
}

# Prepare results dictionary
results = {}

# Get unique locations
locations = df["Location"].unique()

# Train model for each location
for loc in locations:
    location_name = location_names.get(loc, f"Unknown Location {loc}")
    #print(f"Training model for Location: {location_name}")
    
    # Filter data for the current location
    df_loc = df[df["Location"] == loc]
    
    # Define features and target variable
    X = df_loc.drop(columns=["PM2.5", "Dates", "Location"])
    y = df_loc["PM2.5"]
    
    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Standardize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train SVR model
    model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
    model.fit(X_train_scaled, y_train)
    
    # Predict on test set
    y_pred = model.predict(X_test_scaled)
    
    # Evaluate model performance
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    
    # Store results
    results[location_name] = {"R2 Score": r2, "RMSE": rmse, "MAE": mae}
    
    print(f"Location {location_name}: R2 = {r2:.4f}, RMSE = {rmse:.4f}, MAE = {mae:.4f}\n")

# Convert results to DataFrame and save
results_df = pd.DataFrame.from_dict(results, orient="index")
results_df.to_csv("SVR_Model_Performance_by_Location.csv")

print("Training completed. Model performance saved in 'SVR_Model_Performance_by_Location.csv'.")


Location Bollaram Industrial Area, Hyderabad - TSPCB: R2 = 0.4448, RMSE = 14.3863, MAE = 11.5677

Location Central University, Hyderabad - TSPCB: R2 = 0.6140, RMSE = 11.8073, MAE = 10.0514

Location ICRISAT Patancheru, Hyderabad - TSPCB: R2 = 0.3534, RMSE = 16.3495, MAE = 13.1585

Location IDA Pashamylaram, Hyderabad - TSPCB: R2 = 0.4040, RMSE = 15.7242, MAE = 13.8131

Location Sanathnagar, Hyderabad - TSPCB: R2 = 0.2630, RMSE = 21.9458, MAE = 17.0253

Location Zoo Park, Hyderabad - TSPCB: R2 = 0.3823, RMSE = 13.9181, MAE = 10.3261

Training completed. Model performance saved in 'SVR_Model_Performance_by_Location.csv'.


# Test and Train Data Model Performance

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# Load processed data
df = pd.read_csv("Processed_AOD_PM25.csv")

# Mapping location codes to actual names
location_names = {
    0: "Bollaram Industrial Area, Hyderabad - TSPCB",
    1: "Central University, Hyderabad - TSPCB",
    2: "ICRISAT Patancheru, Hyderabad - TSPCB",
    3: "IDA Pashamylaram, Hyderabad - TSPCB",
    4: "Sanathnagar, Hyderabad - TSPCB",
    5: "Zoo Park, Hyderabad - TSPCB"
}

# Prepare results dictionary
results = {}

# Get unique locations
locations = df["Location"].unique()

# Train model for each location
for loc in locations:
    location_name = location_names.get(loc, f"Unknown Location {loc}")
    #print(f"Training model for Location: {location_name}")
    
    # Filter data for the current location
    df_loc = df[df["Location"] == loc]
    
    # Define features and target variable
    X = df_loc.drop(columns=["PM2.5", "Dates", "Location"])
    y = df_loc["PM2.5"]
    
    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Standardize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train SVR model
    model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
    model.fit(X_train_scaled, y_train)
    
    # Predict on train and test sets
    y_train_pred = model.predict(X_train_scaled)
    y_test_pred = model.predict(X_test_scaled)
    
    # Evaluate model performance
    r2_train = r2_score(y_train, y_train_pred)
    r2_test = r2_score(y_test, y_test_pred)
    
    rmse_train = np.sqrt(mean_squared_error(y_train, y_train_pred))
    rmse_test = np.sqrt(mean_squared_error(y_test, y_test_pred))
    
    mae_train = mean_absolute_error(y_train, y_train_pred)
    mae_test = mean_absolute_error(y_test, y_test_pred)
    
    # Store results
    results[location_name] = {
        "R2 (Train)": r2_train,
        "R2 (Test)": r2_test,
        "RMSE (Train)": rmse_train,
        "RMSE (Test)": rmse_test,
        "MAE (Train)": mae_train,
        "MAE (Test)": mae_test
    }
    
    print(f"Location {location_name}:")
    print(f"  R2 (Train) = {r2_train:.4f}, R2 (Test) = {r2_test:.4f}")
    print(f"  RMSE (Train) = {rmse_train:.4f}, RMSE (Test) = {rmse_test:.4f}")
    print(f"  MAE (Train) = {mae_train:.4f}, MAE (Test) = {mae_test:.4f}\n")

# Convert results to DataFrame and save
results_df = pd.DataFrame.from_dict(results, orient="index")
results_df.to_csv("SVR_Model_Performance_by_Location.csv")

print("Training completed. Model performance saved in 'SVR_Model_Performance_by_Location.csv'.")


Location Bollaram Industrial Area, Hyderabad - TSPCB:
  R2 (Train) = 0.9966, R2 (Test) = 0.4448
  RMSE (Train) = 0.9679, RMSE (Test) = 14.3863
  MAE (Train) = 0.1968, MAE (Test) = 11.5677

Location Central University, Hyderabad - TSPCB:
  R2 (Train) = 1.0000, R2 (Test) = 0.6140
  RMSE (Train) = 0.0998, RMSE (Test) = 11.8073
  MAE (Train) = 0.0996, MAE (Test) = 10.0514

Location ICRISAT Patancheru, Hyderabad - TSPCB:
  R2 (Train) = 0.9943, R2 (Test) = 0.3534
  RMSE (Train) = 1.4549, RMSE (Test) = 16.3495
  MAE (Train) = 0.2956, MAE (Test) = 13.1585

Location IDA Pashamylaram, Hyderabad - TSPCB:
  R2 (Train) = 0.9843, R2 (Test) = 0.4040
  RMSE (Train) = 2.3487, RMSE (Test) = 15.7242
  MAE (Train) = 0.5994, MAE (Test) = 13.8131

Location Sanathnagar, Hyderabad - TSPCB:
  R2 (Train) = 0.4122, R2 (Test) = 0.2630
  RMSE (Train) = 55.5088, RMSE (Test) = 21.9458
  MAE (Train) = 6.3513, MAE (Test) = 17.0253

Location Zoo Park, Hyderabad - TSPCB:
  R2 (Train) = 1.0000, R2 (Test) = 0.3823
  RMSE 