<a href="https://colab.research.google.com/github/mdsiam135/research/blob/main/Sequential_CNN(Regression).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [38]:
pip install pandas numpy scikit-learn tensorflow matplotlib seaborn




In [39]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import seaborn as sns

In [40]:
# --- Data Loading and Preparation ---

# Load the datasets
try:
    train_df = pd.read_csv("/content/drive/MyDrive/Yearbook of Agricultural Statistics/test.csv")
    val_df = pd.read_csv("/content/drive/MyDrive/Yearbook of Agricultural Statistics/train.csv")
    test_df = pd.read_csv("/content/drive/MyDrive/Yearbook of Agricultural Statistics/validation.csv")
    print("Datasets loaded successfully.")
except FileNotFoundError:
    print("Error: One or more CSV files not found. Make sure train.csv, validation.csv, and test.csv are uploaded to your Colab environment.")
    # You might want to add code here to upload files if running in Colab
    # from google.colab import files
    # uploaded = files.upload()
    exit()

Datasets loaded successfully.


In [41]:
# Define target and features
target_column = 'Production (M.Ton)' # Standard single quotes

# Separate features (X) and target (y)
X_train = train_df.drop(target_column, axis=1)
X_val = val_df.drop(target_column, axis=1)
X_test = test_df.drop(target_column, axis=1)

y_train = train_df[target_column]
y_val = val_df[target_column]
y_test = test_df[target_column]

In [42]:
# Ensure all feature columns are numeric (handle potential non-numeric if any - though inspection showed they seem okay)
feature_columns = X_train.columns
X_train = X_train.apply(pd.to_numeric, errors='coerce')
X_val = X_val.apply(pd.to_numeric, errors='coerce')
X_test = X_test.apply(pd.to_numeric, errors='coerce')

In [43]:
# Handle potential NaNs introduced by coercion (if any)
X_train.fillna(X_train.mean(), inplace=True)
X_val.fillna(X_train.mean(), inplace=True) # Use train mean for val/test
X_test.fillna(X_train.mean(), inplace=True)


In [44]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [45]:
# Reshape data for 1D CNN input: (samples, timesteps, features)
# We treat each row as a sequence of length 1 with multiple features
X_train_reshaped = X_train_scaled[:, np.newaxis, :]
X_val_reshaped = X_val_scaled[:, np.newaxis, :]
X_test_reshaped = X_test_scaled[:, np.newaxis, :]


In [46]:
print(f"Training data shape: {X_train_reshaped.shape}")
print(f"Validation data shape: {X_val_reshaped.shape}")
print(f"Test data shape: {X_test_reshaped.shape}")
print(f"Training target shape: {y_train.shape}")
print(f"Validation target shape: {y_val.shape}")
print(f"Test target shape: {y_test.shape}")

print("\nData preparation complete.")


Training data shape: (768, 1, 83)
Validation data shape: (3584, 1, 83)
Test data shape: (768, 1, 83)
Training target shape: (768,)
Validation target shape: (3584,)
Test target shape: (768,)

Data preparation complete.


In [47]:
# --- Model Building ---

input_shape = (X_train_reshaped.shape[1], X_train_reshaped.shape[2]) # (timesteps, features)

model = Sequential([
    # Corrected activation function syntax (standard quotes)
    Conv1D(filters=64, kernel_size=1, activation='relu', input_shape=input_shape),
    Dropout(0.3),
    Conv1D(filters=32, kernel_size=1, activation='relu'),
    Flatten(),
    Dense(100, activation='relu'),
    Dropout(0.4),
    Dense(50, activation='relu'),
    Dense(1) # Output layer for regression (1 neuron, linear activation by default)
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

model.summary()

print("\nModel built successfully.")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Model built successfully.


In [57]:
# --- Model Training ---

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

print("\nStarting model training...")

history = model.fit(
    X_train_reshaped,
    y_train,
    epochs=100, # Adjust epochs as needed
    batch_size=32, # Adjust batch size as needed
    validation_data=(X_val_reshaped, y_val),
    callbacks=[early_stopping],
    verbose=1 # Set to 1 or 2 for progress updates, 0 for silent
)

print("\nModel training complete.")


Starting model training...
Epoch 1/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 22807438.0000 - mae: 2469.5769 - val_loss: 60815400.0000 - val_mae: 3433.2891
Epoch 2/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - loss: 162154848.0000 - mae: 3360.7446 - val_loss: 67970216.0000 - val_mae: 3606.6750
Epoch 3/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 71500504.0000 - mae: 3272.6350 - val_loss: 67786936.0000 - val_mae: 3569.1824
Epoch 4/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 79ms/step - loss: 25620318.0000 - mae: 2529.6321 - val_loss: 67118184.0000 - val_mae: 3548.9285
Epoch 5/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 61ms/step - loss: 41272516.0000 - mae: 2706.2739 - val_loss: 63923188.0000 - val_mae: 3499.5269
Epoch 6/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - loss: 38303332.0000 - m

In [58]:
# --- Model Evaluation ---

print("\nEvaluating model...")

# Evaluate on Validation Set
val_loss, val_mae = model.evaluate(X_val_reshaped, y_val, verbose=0)
print(f"Validation Loss (MSE): {val_loss:.4f}")
print(f"Validation Mean Absolute Error (MAE): {val_mae:.4f}")



Evaluating model...
Validation Loss (MSE): 56995116.0000
Validation Mean Absolute Error (MAE): 3345.9624


In [59]:
# Evaluate on Test Set
test_loss, test_mae = model.evaluate(X_test_reshaped, y_test, verbose=0)
print(f"Test Loss (MSE): {test_loss:.4f}")
print(f"Test Mean Absolute Error (MAE): {test_mae:.4f}")

Test Loss (MSE): 49386660.0000
Test Mean Absolute Error (MAE): 3389.1213


In [60]:
# Make predictions
y_pred_val = model.predict(X_val_reshaped).flatten()
y_pred_test = model.predict(X_test_reshaped).flatten()

print("\nPredictions generated for validation and test sets.")

[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 

Predictions generated for validation and test sets.


In [61]:
# --- Regression Metrics Calculation ---

print("\nCalculating regression metrics...")

# Validation Set Metrics
mse_val = mean_squared_error(y_val, y_pred_val)
rmse_val = np.sqrt(mse_val)
mae_val_sklearn = mean_absolute_error(y_val, y_pred_val) # Recalculate for consistency check
r2_val = r2_score(y_val, y_pred_val)

print("\n--- Validation Set Metrics ---")
print(f"Mean Squared Error (MSE): {mse_val:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse_val:.4f}")
print(f"Mean Absolute Error (MAE): {mae_val_sklearn:.4f}")
print(f"R-squared (R²): {r2_val:.4f}")


Calculating regression metrics...

--- Validation Set Metrics ---
Mean Squared Error (MSE): 56995121.1898
Root Mean Squared Error (RMSE): 7549.5113
Mean Absolute Error (MAE): 3345.9626
R-squared (R²): 0.9319


In [62]:
# Test Set Metrics
mse_test = mean_squared_error(y_test, y_pred_test)
rmse_test = np.sqrt(mse_test)
mae_test_sklearn = mean_absolute_error(y_test, y_pred_test) # Recalculate for consistency check
r2_test = r2_score(y_test, y_pred_test)

print("\n--- Test Set Metrics ---")
print(f"Mean Squared Error (MSE): {mse_test:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse_test:.4f}")
print(f"Mean Absolute Error (MAE): {mae_test_sklearn:.4f}")
print(f"R-squared (R²): {r2_test:.4f}")

print("\nRegression metrics calculation complete.")



--- Test Set Metrics ---
Mean Squared Error (MSE): 49386662.1469
Root Mean Squared Error (RMSE): 7027.5645
Mean Absolute Error (MAE): 3389.1215
R-squared (R²): 0.9596

Regression metrics calculation complete.


In [63]:
# --- Visualizations ---

print("\nGenerating visualizations...")

# 1. Learning Curves (Loss)
plt.figure(figsize=(10, 5))
plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.title("Model Loss During Training")
plt.xlabel("Epoch")
plt.ylabel("Mean Squared Error (Loss)")
plt.legend()
plt.grid(True)
plt.savefig("training_validation_loss.png") # Save the plot
print("Saved training_validation_loss.png")
# plt.show() # Use plt.show() if running interactively outside Colab script
plt.close() # Close the plot figure to free memory



Generating visualizations...
Saved training_validation_loss.png


In [55]:
# 2. Actual vs. Predicted Scatter Plot (Test Set)
plt.figure(figsize=(10, 10))
sns.scatterplot(x=y_test, y=y_pred_test, alpha=0.6)
# Add a line for perfect predictions
min_val = min(y_test.min(), y_pred_test.min())
max_val = max(y_test.max(), y_pred_test.max())
# Corrected plt.plot syntax with standard quotes
plt.plot([min_val, max_val], [min_val, max_val], color="red", linestyle="--", linewidth=2, label="Perfect Prediction")
plt.title("Actual vs. Predicted Production (Test Set)")
plt.xlabel("Actual Production (M.Ton)")
plt.ylabel("Predicted Production (M.Ton)")
plt.legend()
plt.grid(True)
plt.savefig("actual_vs_predicted_test.png") # Save the plot
print("Saved actual_vs_predicted_test.png")
# plt.show()
plt.close() # Close the plot figure


Saved actual_vs_predicted_test.png


In [56]:
# 3. Actual vs. Predicted Scatter Plot (Validation Set)
plt.figure(figsize=(10, 10))
sns.scatterplot(x=y_val, y=y_pred_val, alpha=0.6)
# Add a line for perfect predictions
min_val_val = min(y_val.min(), y_pred_val.min())
max_val_val = max(y_val.max(), y_pred_val.max())
# Corrected plt.plot syntax with standard quotes
plt.plot([min_val_val, max_val_val], [min_val_val, max_val_val], color="red", linestyle="--", linewidth=2, label="Perfect Prediction")
plt.title("Actual vs. Predicted Production (Validation Set)")
plt.xlabel("Actual Production (M.Ton)")
plt.ylabel("Predicted Production (M.Ton)")
plt.legend()
plt.grid(True)
plt.savefig("actual_vs_predicted_validation.png") # Save the plot
print("Saved actual_vs_predicted_validation.png")
# plt.show()
plt.close() # Close the plot figure

print("\nVisualization generation complete. Plots saved as PNG files.")


Saved actual_vs_predicted_validation.png

Visualization generation complete. Plots saved as PNG files.
