In [4]:
!pip install albumentations

Collecting albumentations
  Downloading albumentations-2.0.5-py3-none-any.whl.metadata (41 kB)
Collecting pydantic>=2.9.2 (from albumentations)
  Downloading pydantic-2.11.3-py3-none-any.whl.metadata (65 kB)
Collecting albucore==0.0.23 (from albumentations)
  Downloading albucore-0.0.23-py3-none-any.whl.metadata (5.3 kB)
Collecting opencv-python-headless>=4.9.0.80 (from albumentations)
  Downloading opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting stringzilla>=3.10.4 (from albucore==0.0.23->albumentations)
  Downloading stringzilla-3.12.4-cp313-cp313-win_amd64.whl.metadata (81 kB)
Collecting simsimd>=5.9.2 (from albucore==0.0.23->albumentations)
  Downloading simsimd-6.2.1-cp313-cp313-win_amd64.whl.metadata (67 kB)
Collecting annotated-types>=0.6.0 (from pydantic>=2.9.2->albumentations)
  Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)
Collecting pydantic-core==2.33.1 (from pydantic>=2.9.2->albumentations)
  Downloading pydantic_c


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
!pip install timm

Collecting timm
  Downloading timm-1.0.15-py3-none-any.whl.metadata (52 kB)
Collecting torchvision (from timm)
  Downloading torchvision-0.21.0-cp313-cp313-win_amd64.whl.metadata (6.3 kB)
Collecting huggingface_hub (from timm)
  Downloading huggingface_hub-0.30.2-py3-none-any.whl.metadata (13 kB)
Collecting safetensors (from timm)
  Downloading safetensors-0.5.3-cp38-abi3-win_amd64.whl.metadata (3.9 kB)
Collecting tqdm>=4.42.1 (from huggingface_hub->timm)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Downloading timm-1.0.15-py3-none-any.whl (2.4 MB)
   ---------------------------------------- 0.0/2.4 MB ? eta -:--:--
   ---------------------- ----------------- 1.3/2.4 MB 8.1 MB/s eta 0:00:01
   ---------------------------------------- 2.4/2.4 MB 6.7 MB/s eta 0:00:00
Downloading huggingface_hub-0.30.2-py3-none-any.whl (481 kB)
Downloading safetensors-0.5.3-cp38-abi3-win_amd64.whl (308 kB)
Downloading torchvision-0.21.0-cp313-cp313-win_amd64.whl (1.6 MB)
   ----------------


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier

# ✅ Step 1: Load CSV
df = pd.read_csv("D:/fy_project1/data3.csv")  # Replace with your filename if different

# ✅ Step 2: Define the target column
target_col = 'category'

# ✅ Step 3: Clean the label values
df[target_col] = df[target_col].astype(str).str.strip()

# ✅ Step 4: Drop image_name column (it's metadata, not a feature)
df = df.drop(columns=['image_name'])

# ✅ Step 5: Remove rare classes (those with only 1 sample)
class_counts = df[target_col].value_counts()
valid_classes = class_counts[class_counts > 1].index
filtered_df = df[df[target_col].isin(valid_classes)]

# ✅ Step 6: Extract features and target
X = filtered_df.drop(columns=[target_col])
y = filtered_df[target_col]

# ✅ Step 7: Define attention module
class EnhancedWeatherAttention(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.att1 = nn.Sequential(
            nn.Linear(n_features, n_features // 2),
            nn.ReLU(),
            nn.Linear(n_features // 2, n_features),
            nn.Sigmoid()
        )
        self.att2 = nn.Sequential(
            nn.Linear(n_features, n_features // 2),
            nn.ReLU(),
            nn.Linear(n_features // 2, n_features),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        if isinstance(x, np.ndarray):
            x = torch.FloatTensor(x)
        att1_weights = self.att1(x)
        x = x * att1_weights
        att2_weights = self.att2(x)
        x = x * att2_weights + x
        return x

# ✅ Step 8: Stacking Ensemble Class
class StackingWeatherEnsemble(BaseEstimator, ClassifierMixin):
    def __init__(self, n_features=14):
        self.n_features = n_features
        self.attention = EnhancedWeatherAttention(n_features)
        self.label_encoder = LabelEncoder()

        self.base_models = [
            ('svm', SVC(kernel='rbf', C=10, probability=True, random_state=42)),
            ('rf', RandomForestClassifier(n_estimators=200, max_depth=15, random_state=42)),
            ('xgb', XGBClassifier(n_estimators=300, learning_rate=0.1, max_depth=7, random_state=42)),
            ('mlp', MLPClassifier(hidden_layer_sizes=(64, 32), early_stopping=True))
        ]
        
        self.meta_learner = StackingClassifier(
            estimators=self.base_models,
            final_estimator=LogisticRegression(C=10, max_iter=1000),
            cv=5,
            stack_method='predict_proba'
        )

    def fit(self, X, y):
        y_encoded = self.label_encoder.fit_transform(y)
        X_tensor = torch.FloatTensor(X.values)
        self.attention.train()
        optimizer = torch.optim.Adam(self.attention.parameters(), lr=0.001)

        for _ in range(5):
            optimizer.zero_grad()
            X_att = self.attention(X_tensor)
            loss = F.mse_loss(X_att, X_tensor)
            loss.backward()
            optimizer.step()

        with torch.no_grad():
            X_att_np = self.attention(X_tensor).numpy()
        
        self.meta_learner.fit(X_att_np, y_encoded)
        return self

    def predict_proba(self, X):
        X_tensor = torch.FloatTensor(X.values)
        with torch.no_grad():
            X_att_np = self.attention(X_tensor).numpy()
        return self.meta_learner.predict_proba(X_att_np)

    def predict(self, X):
        probs = self.predict_proba(X)
        return self.label_encoder.inverse_transform(np.argmax(probs, axis=1))

# ✅ Step 9: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# ✅ Step 10: Train and Evaluate
weather_ensemble = StackingWeatherEnsemble(n_features=X.shape[1])
weather_ensemble.fit(X_train, y_train)
y_pred = weather_ensemble.predict(X_test)

# ✅ Step 11: Metrics
print(f'''
Enhanced Stacking Ensemble Results:
Accuracy:  {accuracy_score(y_test, y_pred):.4f}
Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}
Recall:    {recall_score(y_test, y_pred, average='weighted'):.4f}
F1-score:  {f1_score(y_test, y_pred, average='weighted'):.4f}
''')



Enhanced Stacking Ensemble Results:
Accuracy:  0.9680
Precision: 0.9680
Recall:    0.9680
F1-score:  0.9680



In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.utils.validation import check_is_fitted
from xgboost import XGBClassifier

# ✅ Step 1: Load and clean the dataset
df = pd.read_csv("D:/fy_project1/data3.csv")
df["category"] = df["category"].astype(str).str.strip()
df = df.drop(columns=["image_name"])  # Drop metadata column

# ✅ Step 2: Filter out rare classes
class_counts = df["category"].value_counts()
valid_classes = class_counts[class_counts > 1].index
filtered_df = df[df["category"].isin(valid_classes)]

# ✅ Step 3: Extract features and labels
X = filtered_df.drop(columns=["category"])
y = filtered_df["category"]

# ✅ Step 4: CNN + Transformer Model
class HybridCNNTransformer(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=128, nhead=4, dim_feedforward=256),
            num_layers=2
        )
        self.attention = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.Sigmoid()
        )
        self.classifier = nn.Linear(128, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)        # (batch, 1, features)
        x = self.conv(x)          # (batch, 128, features//2)
        x = x.permute(2, 0, 1)    # (seq_len, batch, features)
        x = self.transformer(x)
        x = x.mean(dim=0)
        attn_weights = self.attention(x)
        x = x * attn_weights + x
        return self.classifier(x)

# ✅ Step 5: Scikit-learn compatible wrapper
class HybridWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, input_dim, epochs=20, lr=0.001):
        self.input_dim = input_dim
        self.epochs = epochs
        self.lr = lr
        self.label_encoder = LabelEncoder()
        self.model = None

    def fit(self, X, y):
        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()
        y_enc = self.label_encoder.fit_transform(y)
        self.classes_ = self.label_encoder.classes_
        self.num_classes_ = len(self.classes_)
        self.model = HybridCNNTransformer(input_dim=self.input_dim, num_classes=self.num_classes_)

        X_tensor = torch.FloatTensor(X)
        y_tensor = torch.LongTensor(y_enc)
        dataset = TensorDataset(X_tensor, y_tensor)
        loader = DataLoader(dataset, batch_size=32, shuffle=True)

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr)

        self.model.train()
        for epoch in range(self.epochs):
            for batch_X, batch_y in loader:
                optimizer.zero_grad()
                outputs = self.model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
        return self

    def predict_proba(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()
        check_is_fitted(self, ['model'])
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(torch.FloatTensor(X))
        return F.softmax(outputs, dim=1).numpy()

    def predict(self, X):
        proba = self.predict_proba(X)
        return self.label_encoder.inverse_transform(np.argmax(proba, axis=1))

# ✅ Step 6: Ensemble definition
class ImprovedWeatherEnsemble(BaseEstimator, ClassifierMixin):
    def __init__(self, n_features=14):
        self.n_features = n_features
        self.base_models = [
            ('hybrid_cnn_transformer', HybridWrapper(n_features)),
            ('xgb', XGBClassifier(
                n_estimators=500, max_depth=9, learning_rate=0.05,
                tree_method='hist', eval_metric='mlogloss'
            )),
            ('mlp', MLPClassifier(
                hidden_layer_sizes=(256, 128),
                activation='relu',
                early_stopping=True,
                batch_size=256
            ))
        ]
        self.meta_learner = StackingClassifier(
            estimators=self.base_models,
            final_estimator=LogisticRegression(C=15, max_iter=2000),
            cv=5,
            stack_method='auto',
            n_jobs=1  # ⚠️ Use 1 to avoid joblib multiprocessing issues with PyTorch
        )

    def fit(self, X, y):
        self.meta_learner.fit(X, y)
        return self

    def predict_proba(self, X):
        return self.meta_learner.predict_proba(X)

    def predict(self, X):
        return self.meta_learner.predict(X)

# ✅ Step 7: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# ✅ Step 8: Train and Evaluate
weather_ensemble = ImprovedWeatherEnsemble(n_features=X.shape[1])
weather_ensemble.fit(X_train, y_train)
y_pred = weather_ensemble.predict(X_test)

# ✅ Step 9: Metrics
print(f'''
Enhanced Stacking Results:
Accuracy:  {accuracy_score(y_test, y_pred):.4f}
Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}
Recall:    {recall_score(y_test, y_pred, average='weighted'):.4f}
F1-score:  {f1_score(y_test, y_pred, average='weighted'):.4f}
''')





Enhanced Stacking Results:
Accuracy:  0.9776
Precision: 0.9776
Recall:    0.9776
F1-score:  0.9776



In [3]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier

# ✅ Step 1: Load CSV
df = pd.read_csv("D:/fy_project1/data4.csv")  # Replace with your filename if different

# ✅ Step 2: Define the target column
target_col = 'category'

# ✅ Step 3: Clean the label values
df[target_col] = df[target_col].astype(str).str.strip()

# ✅ Step 4: Drop image_name column (it's metadata, not a feature)
df = df.drop(columns=['image_name'])

# ✅ Step 5: Remove rare classes (those with only 1 sample)
class_counts = df[target_col].value_counts()
valid_classes = class_counts[class_counts > 1].index
filtered_df = df[df[target_col].isin(valid_classes)]

# ✅ Step 6: Extract features and target
X = filtered_df.drop(columns=[target_col])
y = filtered_df[target_col]

# ✅ Step 7: Define attention module
class EnhancedWeatherAttention(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.att1 = nn.Sequential(
            nn.Linear(n_features, n_features // 2),
            nn.ReLU(),
            nn.Linear(n_features // 2, n_features),
            nn.Sigmoid()
        )
        self.att2 = nn.Sequential(
            nn.Linear(n_features, n_features // 2),
            nn.ReLU(),
            nn.Linear(n_features // 2, n_features),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        if isinstance(x, np.ndarray):
            x = torch.FloatTensor(x)
        att1_weights = self.att1(x)
        x = x * att1_weights
        att2_weights = self.att2(x)
        x = x * att2_weights + x
        return x

# ✅ Step 8: Stacking Ensemble Class
class StackingWeatherEnsemble(BaseEstimator, ClassifierMixin):
    def __init__(self, n_features=14):
        self.n_features = n_features
        self.attention = EnhancedWeatherAttention(n_features)
        self.label_encoder = LabelEncoder()

        self.base_models = [
            ('svm', SVC(kernel='rbf', C=10, probability=True, random_state=42)),
            ('rf', RandomForestClassifier(n_estimators=200, max_depth=15, random_state=42)),
            ('xgb', XGBClassifier(n_estimators=300, learning_rate=0.1, max_depth=7, random_state=42)),
            ('mlp', MLPClassifier(hidden_layer_sizes=(64, 32), early_stopping=True))
        ]
        
        self.meta_learner = StackingClassifier(
            estimators=self.base_models,
            final_estimator=LogisticRegression(C=10, max_iter=1000),
            cv=5,
            stack_method='predict_proba'
        )

    def fit(self, X, y):
        y_encoded = self.label_encoder.fit_transform(y)
        X_tensor = torch.FloatTensor(X.values)
        self.attention.train()
        optimizer = torch.optim.Adam(self.attention.parameters(), lr=0.001)

        for _ in range(5):
            optimizer.zero_grad()
            X_att = self.attention(X_tensor)
            loss = F.mse_loss(X_att, X_tensor)
            loss.backward()
            optimizer.step()

        with torch.no_grad():
            X_att_np = self.attention(X_tensor).numpy()
        
        self.meta_learner.fit(X_att_np, y_encoded)
        return self

    def predict_proba(self, X):
        X_tensor = torch.FloatTensor(X.values)
        with torch.no_grad():
            X_att_np = self.attention(X_tensor).numpy()
        return self.meta_learner.predict_proba(X_att_np)

    def predict(self, X):
        probs = self.predict_proba(X)
        return self.label_encoder.inverse_transform(np.argmax(probs, axis=1))

# ✅ Step 9: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# ✅ Step 10: Train and Evaluate
weather_ensemble = StackingWeatherEnsemble(n_features=X.shape[1])
weather_ensemble.fit(X_train, y_train)
y_pred = weather_ensemble.predict(X_test)

# ✅ Step 11: Metrics
print(f'''
Enhanced Stacking Ensemble Results:
Accuracy:  {accuracy_score(y_test, y_pred):.4f}
Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}
Recall:    {recall_score(y_test, y_pred, average='weighted'):.4f}
F1-score:  {f1_score(y_test, y_pred, average='weighted'):.4f}
''')



Enhanced Stacking Ensemble Results:
Accuracy:  0.9417
Precision: 0.9354
Recall:    0.9417
F1-score:  0.9378



In [6]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.base import BaseEstimator, ClassifierMixin
from torch.utils.data import DataLoader, TensorDataset
from sklearn.utils.validation import check_is_fitted
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import StackingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
import numpy as np

# Step 1: Load dataset
df = pd.read_csv("D:/fy_project1/data4.csv")
df["category"] = df["category"].astype(str).str.strip()
df = df.drop(columns=["image_name"])

# Step 2: Filter rare classes
class_counts = df["category"].value_counts()
valid_classes = class_counts[class_counts > 1].index
filtered_df = df[df["category"].isin(valid_classes)]

# Step 3: Extract features and labels
X = filtered_df.drop(columns=["category"])
y = filtered_df["category"]

# Step 4: Updated HybridCNNTransformer
class HybridCNNTransformer(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()

        self.conv = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=3, padding=1),
            nn.GELU(),
            nn.MaxPool1d(2),
            nn.Dropout(0.2),

            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.GELU(),
            nn.MaxPool1d(2),
            nn.Dropout(0.2)
        )

        transformer_layer = nn.TransformerEncoderLayer(
            d_model=128, nhead=4, dim_feedforward=256, dropout=0.1,
            activation="gelu", batch_first=False, norm_first=True
        )
        self.transformer = nn.TransformerEncoder(transformer_layer, num_layers=3)

        self.attention = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(64, 128),
            nn.Sigmoid()
        )

        self.classifier = nn.Sequential(
            nn.LayerNorm(128),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = x.unsqueeze(1)  # (batch, 1, features)
        x = self.conv(x)    # (batch, 128, features//4)
        x = x.permute(2, 0, 1)  # (seq_len, batch, features)
        x = self.transformer(x)
        x = x.mean(dim=0)

        attn_weights = self.attention(x)
        x = x * attn_weights + x
        return self.classifier(x)

# Step 5: Sklearn wrapper
class HybridWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, input_dim, epochs=50, lr=0.001):
        self.input_dim = input_dim
        self.epochs = epochs
        self.lr = lr
        self.label_encoder = LabelEncoder()
        self.model = None

    def fit(self, X, y):
        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()
        y_enc = self.label_encoder.fit_transform(y)
        self.classes_ = self.label_encoder.classes_
        self.num_classes_ = len(self.classes_)
        self.model = HybridCNNTransformer(input_dim=self.input_dim, num_classes=self.num_classes_)

        X_tensor = torch.FloatTensor(X)
        y_tensor = torch.LongTensor(y_enc)
        dataset = TensorDataset(X_tensor, y_tensor)
        loader = DataLoader(dataset, batch_size=32, shuffle=True)

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

        self.model.train()
        for epoch in range(self.epochs):
            for batch_X, batch_y in loader:
                optimizer.zero_grad()
                outputs = self.model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
            scheduler.step()

        return self

    def predict_proba(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()
        check_is_fitted(self, ['model'])
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(torch.FloatTensor(X))
        return F.softmax(outputs, dim=1).numpy()

    def predict(self, X):
        proba = self.predict_proba(X)
        return self.label_encoder.inverse_transform(np.argmax(proba, axis=1))

# Step 6: Ensemble definition
class ImprovedWeatherEnsemble(BaseEstimator, ClassifierMixin):
    def __init__(self, n_features=14):
        self.n_features = n_features
        self.base_models = [
            ('hybrid_cnn_transformer', HybridWrapper(n_features)),
            ('xgb', XGBClassifier(
                n_estimators=500, max_depth=9, learning_rate=0.05,
                tree_method='hist', eval_metric='mlogloss'
            )),
            ('mlp', MLPClassifier(
                hidden_layer_sizes=(256, 128),
                activation='relu',
                early_stopping=True,
                batch_size=256
            ))
        ]
        self.meta_learner = StackingClassifier(
            estimators=self.base_models,
            final_estimator=LogisticRegression(C=15, max_iter=2000),
            cv=5,
            stack_method='auto',
            n_jobs=1
        )

    def fit(self, X, y):
        self.meta_learner.fit(X, y)
        return self

    def predict_proba(self, X):
        return self.meta_learner.predict_proba(X)

    def predict(self, X):
        return self.meta_learner.predict(X)

# Step 7: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Step 8: Train and evaluate
weather_ensemble = ImprovedWeatherEnsemble(n_features=X.shape[1])
weather_ensemble.fit(X_train, y_train)
y_pred = weather_ensemble.predict(X_test)

# Step 9: Print metrics
print(f'''
Enhanced Stacking Results:
Accuracy:  {accuracy_score(y_test, y_pred):.4f}
Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}
Recall:    {recall_score(y_test, y_pred, average='weighted'):.4f}
F1-score:  {f1_score(y_test, y_pred, average='weighted'):.4f}
''')





Enhanced Stacking Results:
Accuracy:  0.9507
Precision: 0.9473
Recall:    0.9507
F1-score:  0.9485



In [4]:
# ✅ Step 0: Imports
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.utils.validation import check_is_fitted
from xgboost import XGBClassifier
import torch.optim as optim

# ✅ Step 1: Load and clean the dataset
df = pd.read_csv("D:/fy_project1/data4.csv")
df["category"] = df["category"].astype(str).str.strip()
df = df.drop(columns=["image_name"])

# ✅ Step 2: Filter out rare classes
class_counts = df["category"].value_counts()
valid_classes = class_counts[class_counts > 1].index
filtered_df = df[df["category"].isin(valid_classes)]

# ✅ Step 3: Extract features and labels
X = filtered_df.drop(columns=["category"])
y = filtered_df["category"]

# ✅ Step 4: Enhanced Weather Attention Module
class EnhancedWeatherAttention(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.att1 = nn.Sequential(
            nn.Linear(n_features, n_features // 2),
            nn.ReLU(),
            nn.Linear(n_features // 2, n_features),
            nn.Sigmoid()
        )
        self.att2 = nn.Sequential(
            nn.Linear(n_features, n_features // 2),
            nn.ReLU(),
            nn.Linear(n_features // 2, n_features),
            nn.Sigmoid()
        )

    def forward(self, x):
        if isinstance(x, np.ndarray):
            x = torch.FloatTensor(x)
        att1_weights = self.att1(x)
        x = x * att1_weights
        att2_weights = self.att2(x)
        x = x * att2_weights + x
        return x

# ✅ Step 5: Hybrid CNN + Transformer Model
class HybridCNNTransformer(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=128, nhead=4, dim_feedforward=256),
            num_layers=2
        )
        self.attention = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.Sigmoid()
        )
        self.classifier = nn.Linear(128, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)
        x = self.conv(x)
        x = x.permute(2, 0, 1)
        x = self.transformer(x)
        x = x.mean(dim=0)
        attn_weights = self.attention(x)
        x = x * attn_weights + x
        return self.classifier(x)

# ✅ Step 6: Wrapper for Hybrid Model
class HybridWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, input_dim, epochs=20, lr=0.001):
        self.input_dim = input_dim
        self.epochs = epochs
        self.lr = lr
        self.label_encoder = LabelEncoder()
        self.model = None

    def fit(self, X, y):
        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()
        y_enc = self.label_encoder.fit_transform(y)
        self.classes_ = self.label_encoder.classes_
        self.model = HybridCNNTransformer(input_dim=self.input_dim, num_classes=len(self.classes_))

        X_tensor = torch.FloatTensor(X)
        y_tensor = torch.LongTensor(y_enc)
        dataset = TensorDataset(X_tensor, y_tensor)
        loader = DataLoader(dataset, batch_size=32, shuffle=True)

        optimizer = optim.AdamW(self.model.parameters(), lr=self.lr)
        criterion = nn.CrossEntropyLoss()

        self.model.train()
        for epoch in range(self.epochs):
            for batch_X, batch_y in loader:
                optimizer.zero_grad()
                outputs = self.model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
        return self

    def predict_proba(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()
        check_is_fitted(self, ['model'])
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(torch.FloatTensor(X))
        return F.softmax(outputs, dim=1).numpy()

    def predict(self, X):
        proba = self.predict_proba(X)
        return self.label_encoder.inverse_transform(np.argmax(proba, axis=1))

# ✅ Step 7: Neural Network Meta-Learner
class MetaLearnerNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim // 2)
        self.bn2 = nn.BatchNorm1d(hidden_dim // 2)
        self.fc3 = nn.Linear(hidden_dim // 2, output_dim)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.dropout(x)
        return self.fc3(x)

class NNMetaLearner(BaseEstimator, ClassifierMixin):
    def __init__(self, input_dim, hidden_dim, output_dim, lr=0.001, epochs=100, batch_size=32):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.lr = lr
        self.epochs = epochs
        self.batch_size = batch_size
        self.model = None

    def fit(self, X, y):
        self.model = MetaLearnerNN(self.input_dim, self.hidden_dim, self.output_dim)
        X_tensor = torch.FloatTensor(X)
        y_tensor = torch.LongTensor(y)
        dataset = TensorDataset(X_tensor, y_tensor)
        loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)

        optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        criterion = nn.CrossEntropyLoss()

        for _ in range(self.epochs):
            for batch_X, batch_y in loader:
                optimizer.zero_grad()
                outputs = self.model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
        return self

    def predict_proba(self, X):
        check_is_fitted(self, ['model'])
        with torch.no_grad():
            outputs = self.model(torch.FloatTensor(X))
        return F.softmax(outputs, dim=1).numpy()

    def predict(self, X):
        return np.argmax(self.predict_proba(X), axis=1)

# ✅ Step 8: Ensemble Class with Manual Meta Learner
class EnhancedWeatherEnsemble(BaseEstimator, ClassifierMixin):
    def __init__(self, n_features, num_classes):
        self.n_features = n_features
        self.num_classes = num_classes
        self.attention = EnhancedWeatherAttention(n_features)
        self.label_encoder = LabelEncoder()
        self.base_models = [
            ('hybrid', HybridWrapper(n_features)),
            ('xgb', XGBClassifier(n_estimators=500, max_depth=9, learning_rate=0.05,
                                  tree_method='hist', eval_metric='mlogloss')),
            ('mlp', MLPClassifier(hidden_layer_sizes=(256, 128), activation='relu',
                                  early_stopping=True, batch_size=256))
        ]
        self.meta_learner = None

    def fit(self, X, y):
        y_enc = self.label_encoder.fit_transform(y)
        X_tensor = torch.FloatTensor(X.to_numpy() if isinstance(X, pd.DataFrame) else X)
        optimizer = torch.optim.Adam(self.attention.parameters(), lr=0.001)
        for _ in range(5):
            optimizer.zero_grad()
            X_att = self.attention(X_tensor)
            loss = F.mse_loss(X_att, X_tensor)
            loss.backward()
            optimizer.step()

        with torch.no_grad():
            X_transformed = self.attention(X_tensor).numpy()

        self.fitted_base_models_ = []
        base_outputs = []
        for name, model in self.base_models:
            model.fit(X_transformed, y_enc)
            self.fitted_base_models_.append((name, model))
            base_outputs.append(model.predict_proba(X_transformed))

        meta_input = np.hstack(base_outputs)
        self.meta_learner = NNMetaLearner(
            input_dim=meta_input.shape[1], hidden_dim=128, output_dim=self.num_classes
        )
        self.meta_learner.fit(meta_input, y_enc)
        return self

    def predict_proba(self, X):
        X_tensor = torch.FloatTensor(X.to_numpy() if isinstance(X, pd.DataFrame) else X)
        with torch.no_grad():
            X_transformed = self.attention(X_tensor).numpy()
        base_outputs = [model.predict_proba(X_transformed) for _, model in self.fitted_base_models_]
        meta_input = np.hstack(base_outputs)
        return self.meta_learner.predict_proba(meta_input)

    def predict(self, X):
        probs = self.predict_proba(X)
        return self.label_encoder.inverse_transform(np.argmax(probs, axis=1))

# ✅ Step 9: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# ✅ Step 10: Train and Evaluate
ensemble = EnhancedWeatherEnsemble(n_features=X.shape[1], num_classes=len(np.unique(y)))
ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_test)

# ✅ Step 11: Print metrics
print(f'''
Enhanced Attention + Manual MetaLearner Results:
Accuracy:  {accuracy_score(y_test, y_pred):.4f}
Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}
Recall:    {recall_score(y_test, y_pred, average='weighted'):.4f}
F1-score:  {f1_score(y_test, y_pred, average='weighted'):.4f}
''')





Enhanced Attention + Manual MetaLearner Results:
Accuracy:  0.9395
Precision: 0.9355
Recall:    0.9395
F1-score:  0.9369



In [2]:
# ✅ Step 0: Imports
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.utils.validation import check_is_fitted
from xgboost import XGBClassifier
import torch.optim as optim

# ✅ Step 1: Load and clean the dataset
df = pd.read_csv("D:/fy_project1/data4.csv")
df["category"] = df["category"].astype(str).str.strip()
df = df.drop(columns=["image_name"])

# ✅ Step 2: Filter out rare classes
class_counts = df["category"].value_counts()
valid_classes = class_counts[class_counts > 1].index
filtered_df = df[df["category"].isin(valid_classes)]
X = filtered_df.drop(columns=["category"])
y = filtered_df["category"]

# ✅ Enhanced Weather Attention Module
class EnhancedWeatherAttention(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.att1 = nn.Sequential(
            nn.Linear(n_features, n_features//2),
            nn.ReLU(),
            nn.Linear(n_features//2, n_features),
            nn.Sigmoid()
        )
        self.att2 = nn.Sequential(
            nn.Linear(n_features, n_features//2),
            nn.ReLU(),
            nn.Linear(n_features//2, n_features),
            nn.Sigmoid()
        )

    def forward(self, x):
        if isinstance(x, np.ndarray):
            x = torch.FloatTensor(x)
        att1_weights = self.att1(x)
        x = x * att1_weights
        att2_weights = self.att2(x)
        x = x * att2_weights + x
        return x

class HybridCNNTransformer(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=128, nhead=4, dim_feedforward=256),
            num_layers=2
        )
        self.attention = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.Sigmoid()
        )
        self.classifier = nn.Linear(128, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)
        x = self.conv(x)
        x = x.permute(2, 0, 1)
        x = self.transformer(x)
        x = x.mean(dim=0)
        attn_weights = self.attention(x)
        x = x * attn_weights + x
        return self.classifier(x)

class HybridWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, input_dim, epochs=20, lr=0.001):
        self.input_dim = input_dim
        self.epochs = epochs
        self.lr = lr
        self.label_encoder = LabelEncoder()
        self.model = None
        self.classes_ = None

    def fit(self, X, y):
        y_enc = self.label_encoder.fit_transform(y)
        self.classes_ = self.label_encoder.classes_
        self.num_classes_ = len(self.classes_)
        self.model = HybridCNNTransformer(self.input_dim, self.num_classes_)
        X_tensor = torch.FloatTensor(X)
        y_tensor = torch.LongTensor(y_enc)
        dataset = TensorDataset(X_tensor, y_tensor)
        loader = DataLoader(dataset, batch_size=32, shuffle=True)
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr)
        for epoch in range(self.epochs):
            for batch_X, batch_y in loader:
                optimizer.zero_grad()
                outputs = self.model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
        return self

    def predict_proba(self, X):
        check_is_fitted(self, ['model'])
        with torch.no_grad():
            outputs = self.model(torch.FloatTensor(X))
        return F.softmax(outputs, dim=1).numpy()

    def predict(self, X):
        proba = self.predict_proba(X)
        return self.label_encoder.inverse_transform(np.argmax(proba, axis=1))

class MetaLearnerNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim // 2)
        self.bn2 = nn.BatchNorm1d(hidden_dim // 2)
        self.fc3 = nn.Linear(hidden_dim // 2, output_dim)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        return x

# ✅ Train and Evaluate

def train_and_evaluate(X_train, y_train, X_test, y_test):
    num_classes = len(np.unique(y_train))
    n_features = X_train.shape[1]
    label_encoder = LabelEncoder()
    y_train_enc = label_encoder.fit_transform(y_train)
    y_test_enc = label_encoder.transform(y_test)

    # Step 1: Apply attention
    attention = EnhancedWeatherAttention(n_features)
    optimizer = torch.optim.Adam(attention.parameters(), lr=0.001)
    X_tensor = torch.FloatTensor(X_train.values)
    for _ in range(5):
        optimizer.zero_grad()
        X_att = attention(X_tensor)
        loss = F.mse_loss(X_att, X_tensor)
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        X_train_att = attention(torch.FloatTensor(X_train.values)).numpy()
        X_test_att = attention(torch.FloatTensor(X_test.values)).numpy()

    # Step 2: Train base models
    base_preds_train = []
    base_preds_test = []

    base_models = [
        HybridWrapper(n_features).fit(X_train_att, y_train),
        XGBClassifier(n_estimators=500, max_depth=9, learning_rate=0.05, tree_method='hist', eval_metric='mlogloss').fit(X_train_att, y_train_enc),
        MLPClassifier(hidden_layer_sizes=(256, 128), activation='relu', early_stopping=True, batch_size=256).fit(X_train_att, y_train_enc)
    ]

    for model in base_models:
        base_preds_train.append(model.predict_proba(X_train_att))
        base_preds_test.append(model.predict_proba(X_test_att))

    # Step 3: Prepare meta-input
    meta_X_train = np.hstack(base_preds_train)
    meta_X_test = np.hstack(base_preds_test)

    # Step 4: Train NNMetaLearner
    meta_learner = MetaLearnerNN(input_dim=meta_X_train.shape[1], hidden_dim=128, output_dim=num_classes)
    optimizer = optim.Adam(meta_learner.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    dataset = TensorDataset(torch.FloatTensor(meta_X_train), torch.LongTensor(y_train_enc))
    loader = DataLoader(dataset, batch_size=32, shuffle=True)
    for epoch in range(100):
        for batch_X, batch_y in loader:
            optimizer.zero_grad()
            outputs = meta_learner(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    # Step 5: Evaluate
    with torch.no_grad():
        outputs = meta_learner(torch.FloatTensor(meta_X_test))
        y_pred = np.argmax(F.softmax(outputs, dim=1).numpy(), axis=1)
    print(f'''
    Enhanced Weather Ensemble Results:
    Accuracy:  {accuracy_score(y_test_enc, y_pred):.4f}
    Precision: {precision_score(y_test_enc, y_pred, average='weighted'):.4f}
    Recall:    {recall_score(y_test_enc, y_pred, average='weighted'):.4f}
    F1-score:  {f1_score(y_test_enc, y_pred, average='weighted'):.4f}
    ''')

# ✅ Data Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
model = train_and_evaluate(X_train, y_train, X_test, y_test)





    Enhanced Weather Ensemble Results:
    Accuracy:  0.9469
    Precision: 0.9434
    Recall:    0.9469
    F1-score:  0.9420
    


In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.utils.validation import check_is_fitted
from xgboost import XGBClassifier
import albumentations as A

# ✅ Step 1: Load and clean the dataset
df = pd.read_csv("D:/fy_project1/data4.csv")
df["category"] = df["category"].astype(str).str.strip()
df = df.drop(columns=["image_name"])  # Drop metadata column

# ✅ Step 2: Filter rare classes
class_counts = df["category"].value_counts()
valid_classes = class_counts[class_counts > 1].index
filtered_df = df[df["category"].isin(valid_classes)]

# ✅ Step 3: Extract features and labels
X = filtered_df.drop(columns=["category"])
y = filtered_df["category"]

# ✅ Step 4: Data augmentation using Albumentations (real-world conditions)
augmenter = A.Compose([
    A.GaussNoise(p=0.2),
    A.RandomBrightnessContrast(p=0.3),
    A.GaussianBlur(p=0.2),
    A.HueSaturationValue(p=0.2),
])

def augment_data(X):
    X_aug = []
    for x in X:
        img = np.array(x, dtype=np.float32).reshape(1, -1)
        augmented = augmenter(image=img)["image"]
        X_aug.append(augmented.flatten())
    return np.array(X_aug)

# ✅ Step 5: CNN + Transformer Model
class HybridCNNTransformer(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=128, nhead=4, dim_feedforward=256),
            num_layers=2
        )
        self.attention = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.Sigmoid()
        )
        self.classifier = nn.Linear(128, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)        # (batch, 1, features)
        x = self.conv(x)          # (batch, 128, features//2)
        x = x.permute(2, 0, 1)    # (seq_len, batch, features)
        x = self.transformer(x)
        x = x.mean(dim=0)
        attn_weights = self.attention(x)
        x = x * attn_weights + x
        return self.classifier(x)

# ✅ Step 6: Wrapper for Hybrid model
class HybridWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, input_dim, epochs=20, lr=0.001):
        self.input_dim = input_dim
        self.epochs = epochs
        self.lr = lr
        self.label_encoder = LabelEncoder()
        self.model = None

    def fit(self, X, y):
        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()

        # Augment training data
        X_augmented = augment_data(X)

        # Combine original and augmented data
        X_combined = np.vstack([X, X_augmented])
        y_combined = np.concatenate([y, y])

        y_enc = self.label_encoder.fit_transform(y_combined)
        self.classes_ = self.label_encoder.classes_
        self.num_classes_ = len(self.classes_)
        self.model = HybridCNNTransformer(input_dim=self.input_dim, num_classes=self.num_classes_)

        X_tensor = torch.FloatTensor(X_combined)
        y_tensor = torch.LongTensor(y_enc)

        dataset = TensorDataset(X_tensor, y_tensor)
        loader = DataLoader(dataset, batch_size=32, shuffle=True)

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr)

        self.model.train()
        for epoch in range(self.epochs):
            for batch_X, batch_y in loader:
                optimizer.zero_grad()
                outputs = self.model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
        return self

    def predict_proba(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()
        check_is_fitted(self, ['model'])
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(torch.FloatTensor(X))
        return F.softmax(outputs, dim=1).numpy()

    def predict(self, X):
        proba = self.predict_proba(X)
        return self.label_encoder.inverse_transform(np.argmax(proba, axis=1))

# ✅ Step 7: Ensemble definition
class ImprovedWeatherEnsemble(BaseEstimator, ClassifierMixin):
    def __init__(self, n_features=14):
        self.n_features = n_features
        self.base_models = [
            ('hybrid_cnn_transformer', HybridWrapper(n_features)),
            ('xgb', XGBClassifier(
                n_estimators=500, max_depth=9, learning_rate=0.05,
                tree_method='hist', eval_metric='mlogloss'
            )),
            ('mlp', MLPClassifier(
                hidden_layer_sizes=(256, 128),
                activation='relu',
                early_stopping=True,
                batch_size=256
            ))
        ]
        self.meta_learner = StackingClassifier(
            estimators=self.base_models,
            final_estimator=LogisticRegression(C=15, max_iter=2000),
            cv=5,
            stack_method='auto',
            n_jobs=1
        )

    def fit(self, X, y):
        self.meta_learner.fit(X, y)
        return self

    def predict_proba(self, X):
        return self.meta_learner.predict_proba(X)

    def predict(self, X):
        return self.meta_learner.predict(X)

# ✅ Step 8: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# ✅ Step 9: Train and Evaluate
weather_ensemble = ImprovedWeatherEnsemble(n_features=X.shape[1])
weather_ensemble.fit(X_train, y_train)
y_pred = weather_ensemble.predict(X_test)

# ✅ Step 10: Metrics
print(f'''
Enhanced Stacking Results with Augmentation:
Accuracy:  {accuracy_score(y_test, y_pred):.4f}
Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}
Recall:    {recall_score(y_test, y_pred, average='weighted'):.4f}
F1-score:  {f1_score(y_test, y_pred, average='weighted'):.4f}
''')


  result = func(img, *args, **kwargs)
  result = func(img, *args, **kwargs)
  result = func(img, *args, **kwargs)
  result = func(img, *args, **kwargs)
  result = func(img, *args, **kwargs)
  result = func(img, *args, **kwargs)



Enhanced Stacking Results with Augmentation:
Accuracy:  0.9537
Precision: 0.9504
Recall:    0.9537
F1-score:  0.9515



In [10]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
import albumentations as A
import timm

# ✅ Step 1: Load Dataset
df = pd.read_csv("D:/fy_project1/data4.csv")
df["category"] = df["category"].astype(str).str.strip()
df = df.drop(columns=["image_name"])

# ✅ Step 2: Filter Rare Classes
class_counts = df["category"].value_counts()
valid_classes = class_counts[class_counts > 1].index
filtered_df = df[df["category"].isin(valid_classes)]

X = filtered_df.drop(columns=["category"]).values
y = filtered_df["category"].values

# Assume images are 224x224x3 (Reshape your actual data accordingly)
X_images = X.reshape(-1, 224, 224, 3)

# ✅ Step 3: Data Augmentation
augmenter = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.3),
    A.GaussianBlur(p=0.2),
])

class WeatherDataset(Dataset):
    def __init__(self, images, labels, augment=False):
        self.images = images
        self.labels = labels
        self.augment = augment
        self.le = LabelEncoder()
        self.labels_encoded = self.le.fit_transform(labels)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx].astype(np.uint8)
        label = self.labels_encoded[idx]

        if self.augment:
            img = augmenter(image=img)['image']

        img = img.astype(np.float32) / 255.0
        img = np.transpose(img, (2, 0, 1))  # To channel-first (C,H,W)
        return torch.tensor(img), torch.tensor(label)

# ✅ Step 4: Hybrid EfficientNet + Transformer Model
class EfficientTransformer(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.backbone = timm.create_model('efficientnet_b3', pretrained=True, num_classes=0)
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=1536, nhead=8, dim_feedforward=1024),
            num_layers=2
        )
        self.classifier = nn.Linear(1536, num_classes)

    def forward(self, x):
        x = self.backbone(x)
        x = x.unsqueeze(0)
        x = self.transformer(x)
        x = x.mean(dim=0)
        return self.classifier(x)

# ✅ Step 5: Wrapper
class EfficientWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, epochs=10, lr=0.0001, batch_size=16):
        self.epochs = epochs
        self.lr = lr
        self.batch_size = batch_size
        self.label_encoder = LabelEncoder()

    def fit(self, X, y):
        y_enc = self.label_encoder.fit_transform(y)
        self.classes_ = self.label_encoder.classes_
        num_classes = len(self.classes_)

        train_dataset = WeatherDataset(X, y, augment=True)
        train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)

        self.model = EfficientTransformer(num_classes)
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr)

        self.model.train()
        for epoch in range(self.epochs):
            for imgs, labels in train_loader:
                optimizer.zero_grad()
                outputs = self.model(imgs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
        return self

    def predict_proba(self, X):
        dataset = WeatherDataset(X, np.zeros(len(X)))
        loader = DataLoader(dataset, batch_size=self.batch_size)
        self.model.eval()
        probs = []
        with torch.no_grad():
            for imgs, _ in loader:
                outputs = self.model(imgs)
                probs.append(F.softmax(outputs, dim=1).numpy())
        return np.vstack(probs)

    def predict(self, X):
        proba = self.predict_proba(X)
        return self.label_encoder.inverse_transform(np.argmax(proba, axis=1))

# ✅ Step 6: Ensemble
class ImprovedWeatherEnsemble(BaseEstimator, ClassifierMixin):
    def __init__(self):
        self.base_models = [
            ('efficient_transformer', EfficientWrapper()),
            ('xgb', XGBClassifier(n_estimators=500, max_depth=9, learning_rate=0.05, tree_method='hist')),
            ('mlp', MLPClassifier(hidden_layer_sizes=(256,128), activation='relu', early_stopping=True))
        ]
        self.meta_learner = StackingClassifier(
            estimators=self.base_models,
            final_estimator=LogisticRegression(C=15, max_iter=2000),
            cv=3,
            n_jobs=1
        )

    def fit(self, X, y):
        X_flat = X.reshape(len(X), -1)
        self.meta_learner.fit(X_flat, y)
        return self

    def predict(self, X):
        X_flat = X.reshape(len(X), -1)
        return self.meta_learner.predict(X_flat)

# ✅ Step 7: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_images, y, test_size=0.2, stratify=y, random_state=42
)

# ✅ Step 8: Train & Evaluate
ensemble = ImprovedWeatherEnsemble()
ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_test)

# ✅ Step 9: Metrics
print(f'''
EfficientNet Transformer Ensemble Results:
Accuracy:  {accuracy_score(y_test, y_pred):.4f}
Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}
Recall:    {recall_score(y_test, y_pred, average='weighted'):.4f}
F1-score:  {f1_score(y_test, y_pred, average='weighted'):.4f}
''')


ValueError: cannot reshape array of size 173888 into shape (224,224,3)