In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier

# ✅ Step 1: Load CSV
df = pd.read_csv("D:/fy_project1/data3.csv")  # Replace with your filename if different

# ✅ Step 2: Define the target column
target_col = 'category'

# ✅ Step 3: Clean the label values
df[target_col] = df[target_col].astype(str).str.strip()

# ✅ Step 4: Drop image_name column (it's metadata, not a feature)
df = df.drop(columns=['image_name'])

# ✅ Step 5: Remove rare classes (those with only 1 sample)
class_counts = df[target_col].value_counts()
valid_classes = class_counts[class_counts > 1].index
filtered_df = df[df[target_col].isin(valid_classes)]

# ✅ Step 6: Extract features and target
X = filtered_df.drop(columns=[target_col])
y = filtered_df[target_col]

# ✅ Step 7: Define attention module
class EnhancedWeatherAttention(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.att1 = nn.Sequential(
            nn.Linear(n_features, n_features // 2),
            nn.ReLU(),
            nn.Linear(n_features // 2, n_features),
            nn.Sigmoid()
        )
        self.att2 = nn.Sequential(
            nn.Linear(n_features, n_features // 2),
            nn.ReLU(),
            nn.Linear(n_features // 2, n_features),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        if isinstance(x, np.ndarray):
            x = torch.FloatTensor(x)
        att1_weights = self.att1(x)
        x = x * att1_weights
        att2_weights = self.att2(x)
        x = x * att2_weights + x
        return x

# ✅ Step 8: Stacking Ensemble Class
class StackingWeatherEnsemble(BaseEstimator, ClassifierMixin):
    def __init__(self, n_features=14):
        self.n_features = n_features
        self.attention = EnhancedWeatherAttention(n_features)
        self.label_encoder = LabelEncoder()

        self.base_models = [
            ('svm', SVC(kernel='rbf', C=10, probability=True, random_state=42)),
            ('rf', RandomForestClassifier(n_estimators=200, max_depth=15, random_state=42)),
            ('xgb', XGBClassifier(n_estimators=300, learning_rate=0.1, max_depth=7, random_state=42)),
            ('mlp', MLPClassifier(hidden_layer_sizes=(64, 32), early_stopping=True))
        ]
        
        self.meta_learner = StackingClassifier(
            estimators=self.base_models,
            final_estimator=LogisticRegression(C=10, max_iter=1000),
            cv=5,
            stack_method='predict_proba'
        )

    def fit(self, X, y):
        y_encoded = self.label_encoder.fit_transform(y)
        X_tensor = torch.FloatTensor(X.values)
        self.attention.train()
        optimizer = torch.optim.Adam(self.attention.parameters(), lr=0.001)

        for _ in range(5):
            optimizer.zero_grad()
            X_att = self.attention(X_tensor)
            loss = F.mse_loss(X_att, X_tensor)
            loss.backward()
            optimizer.step()

        with torch.no_grad():
            X_att_np = self.attention(X_tensor).numpy()
        
        self.meta_learner.fit(X_att_np, y_encoded)
        return self

    def predict_proba(self, X):
        X_tensor = torch.FloatTensor(X.values)
        with torch.no_grad():
            X_att_np = self.attention(X_tensor).numpy()
        return self.meta_learner.predict_proba(X_att_np)

    def predict(self, X):
        probs = self.predict_proba(X)
        return self.label_encoder.inverse_transform(np.argmax(probs, axis=1))

# ✅ Step 9: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# ✅ Step 10: Train and Evaluate
weather_ensemble = StackingWeatherEnsemble(n_features=X.shape[1])
weather_ensemble.fit(X_train, y_train)
y_pred = weather_ensemble.predict(X_test)

# ✅ Step 11: Metrics
print(f'''
Enhanced Stacking Ensemble Results:
Accuracy:  {accuracy_score(y_test, y_pred):.4f}
Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}
Recall:    {recall_score(y_test, y_pred, average='weighted'):.4f}
F1-score:  {f1_score(y_test, y_pred, average='weighted'):.4f}
''')



Enhanced Stacking Ensemble Results:
Accuracy:  0.9680
Precision: 0.9680
Recall:    0.9680
F1-score:  0.9680



In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.utils.validation import check_is_fitted
from xgboost import XGBClassifier

# ✅ Step 1: Load and clean the dataset
df = pd.read_csv("D:/fy_project1/data3.csv")
df["category"] = df["category"].astype(str).str.strip()
df = df.drop(columns=["image_name"])  # Drop metadata column

# ✅ Step 2: Filter out rare classes
class_counts = df["category"].value_counts()
valid_classes = class_counts[class_counts > 1].index
filtered_df = df[df["category"].isin(valid_classes)]

# ✅ Step 3: Extract features and labels
X = filtered_df.drop(columns=["category"])
y = filtered_df["category"]

# ✅ Step 4: CNN + Transformer Model
class HybridCNNTransformer(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=128, nhead=4, dim_feedforward=256),
            num_layers=2
        )
        self.attention = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.Sigmoid()
        )
        self.classifier = nn.Linear(128, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)        # (batch, 1, features)
        x = self.conv(x)          # (batch, 128, features//2)
        x = x.permute(2, 0, 1)    # (seq_len, batch, features)
        x = self.transformer(x)
        x = x.mean(dim=0)
        attn_weights = self.attention(x)
        x = x * attn_weights + x
        return self.classifier(x)

# ✅ Step 5: Scikit-learn compatible wrapper
class HybridWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, input_dim, epochs=20, lr=0.001):
        self.input_dim = input_dim
        self.epochs = epochs
        self.lr = lr
        self.label_encoder = LabelEncoder()
        self.model = None

    def fit(self, X, y):
        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()
        y_enc = self.label_encoder.fit_transform(y)
        self.classes_ = self.label_encoder.classes_
        self.num_classes_ = len(self.classes_)
        self.model = HybridCNNTransformer(input_dim=self.input_dim, num_classes=self.num_classes_)

        X_tensor = torch.FloatTensor(X)
        y_tensor = torch.LongTensor(y_enc)
        dataset = TensorDataset(X_tensor, y_tensor)
        loader = DataLoader(dataset, batch_size=32, shuffle=True)

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr)

        self.model.train()
        for epoch in range(self.epochs):
            for batch_X, batch_y in loader:
                optimizer.zero_grad()
                outputs = self.model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
        return self

    def predict_proba(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()
        check_is_fitted(self, ['model'])
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(torch.FloatTensor(X))
        return F.softmax(outputs, dim=1).numpy()

    def predict(self, X):
        proba = self.predict_proba(X)
        return self.label_encoder.inverse_transform(np.argmax(proba, axis=1))

# ✅ Step 6: Ensemble definition
class ImprovedWeatherEnsemble(BaseEstimator, ClassifierMixin):
    def __init__(self, n_features=14):
        self.n_features = n_features
        self.base_models = [
            ('hybrid_cnn_transformer', HybridWrapper(n_features)),
            ('xgb', XGBClassifier(
                n_estimators=500, max_depth=9, learning_rate=0.05,
                tree_method='hist', eval_metric='mlogloss'
            )),
            ('mlp', MLPClassifier(
                hidden_layer_sizes=(256, 128),
                activation='relu',
                early_stopping=True,
                batch_size=256
            ))
        ]
        self.meta_learner = StackingClassifier(
            estimators=self.base_models,
            final_estimator=LogisticRegression(C=15, max_iter=2000),
            cv=5,
            stack_method='auto',
            n_jobs=1  # ⚠️ Use 1 to avoid joblib multiprocessing issues with PyTorch
        )

    def fit(self, X, y):
        self.meta_learner.fit(X, y)
        return self

    def predict_proba(self, X):
        return self.meta_learner.predict_proba(X)

    def predict(self, X):
        return self.meta_learner.predict(X)

# ✅ Step 7: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# ✅ Step 8: Train and Evaluate
weather_ensemble = ImprovedWeatherEnsemble(n_features=X.shape[1])
weather_ensemble.fit(X_train, y_train)
y_pred = weather_ensemble.predict(X_test)

# ✅ Step 9: Metrics
print(f'''
Enhanced Stacking Results:
Accuracy:  {accuracy_score(y_test, y_pred):.4f}
Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}
Recall:    {recall_score(y_test, y_pred, average='weighted'):.4f}
F1-score:  {f1_score(y_test, y_pred, average='weighted'):.4f}
''')





Enhanced Stacking Results:
Accuracy:  0.9776
Precision: 0.9776
Recall:    0.9776
F1-score:  0.9776



In [3]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier

# ✅ Step 1: Load CSV
df = pd.read_csv("D:/fy_project1/data4.csv")  # Replace with your filename if different

# ✅ Step 2: Define the target column
target_col = 'category'

# ✅ Step 3: Clean the label values
df[target_col] = df[target_col].astype(str).str.strip()

# ✅ Step 4: Drop image_name column (it's metadata, not a feature)
df = df.drop(columns=['image_name'])

# ✅ Step 5: Remove rare classes (those with only 1 sample)
class_counts = df[target_col].value_counts()
valid_classes = class_counts[class_counts > 1].index
filtered_df = df[df[target_col].isin(valid_classes)]

# ✅ Step 6: Extract features and target
X = filtered_df.drop(columns=[target_col])
y = filtered_df[target_col]

# ✅ Step 7: Define attention module
class EnhancedWeatherAttention(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.att1 = nn.Sequential(
            nn.Linear(n_features, n_features // 2),
            nn.ReLU(),
            nn.Linear(n_features // 2, n_features),
            nn.Sigmoid()
        )
        self.att2 = nn.Sequential(
            nn.Linear(n_features, n_features // 2),
            nn.ReLU(),
            nn.Linear(n_features // 2, n_features),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        if isinstance(x, np.ndarray):
            x = torch.FloatTensor(x)
        att1_weights = self.att1(x)
        x = x * att1_weights
        att2_weights = self.att2(x)
        x = x * att2_weights + x
        return x

# ✅ Step 8: Stacking Ensemble Class
class StackingWeatherEnsemble(BaseEstimator, ClassifierMixin):
    def __init__(self, n_features=14):
        self.n_features = n_features
        self.attention = EnhancedWeatherAttention(n_features)
        self.label_encoder = LabelEncoder()

        self.base_models = [
            ('svm', SVC(kernel='rbf', C=10, probability=True, random_state=42)),
            ('rf', RandomForestClassifier(n_estimators=200, max_depth=15, random_state=42)),
            ('xgb', XGBClassifier(n_estimators=300, learning_rate=0.1, max_depth=7, random_state=42)),
            ('mlp', MLPClassifier(hidden_layer_sizes=(64, 32), early_stopping=True))
        ]
        
        self.meta_learner = StackingClassifier(
            estimators=self.base_models,
            final_estimator=LogisticRegression(C=10, max_iter=1000),
            cv=5,
            stack_method='predict_proba'
        )

    def fit(self, X, y):
        y_encoded = self.label_encoder.fit_transform(y)
        X_tensor = torch.FloatTensor(X.values)
        self.attention.train()
        optimizer = torch.optim.Adam(self.attention.parameters(), lr=0.001)

        for _ in range(5):
            optimizer.zero_grad()
            X_att = self.attention(X_tensor)
            loss = F.mse_loss(X_att, X_tensor)
            loss.backward()
            optimizer.step()

        with torch.no_grad():
            X_att_np = self.attention(X_tensor).numpy()
        
        self.meta_learner.fit(X_att_np, y_encoded)
        return self

    def predict_proba(self, X):
        X_tensor = torch.FloatTensor(X.values)
        with torch.no_grad():
            X_att_np = self.attention(X_tensor).numpy()
        return self.meta_learner.predict_proba(X_att_np)

    def predict(self, X):
        probs = self.predict_proba(X)
        return self.label_encoder.inverse_transform(np.argmax(probs, axis=1))

# ✅ Step 9: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# ✅ Step 10: Train and Evaluate
weather_ensemble = StackingWeatherEnsemble(n_features=X.shape[1])
weather_ensemble.fit(X_train, y_train)
y_pred = weather_ensemble.predict(X_test)

# ✅ Step 11: Metrics
print(f'''
Enhanced Stacking Ensemble Results:
Accuracy:  {accuracy_score(y_test, y_pred):.4f}
Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}
Recall:    {recall_score(y_test, y_pred, average='weighted'):.4f}
F1-score:  {f1_score(y_test, y_pred, average='weighted'):.4f}
''')



Enhanced Stacking Ensemble Results:
Accuracy:  0.9417
Precision: 0.9354
Recall:    0.9417
F1-score:  0.9378



In [6]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
from sklearn.utils.validation import check_is_fitted
from xgboost import XGBClassifier

# Step 1: Load dataset
df = pd.read_csv("D:/fy_project1/data4.csv")
df["category"] = df["category"].astype(str).str.strip()
df = df.drop(columns=["image_name"])

# Step 2: Filter rare classes
class_counts = df["category"].value_counts()
valid_classes = class_counts[class_counts > 1].index
filtered_df = df[df["category"].isin(valid_classes)]

# Step 3: Extract features and labels
X = filtered_df.drop(columns=["category"])
y = filtered_df["category"]

# Step 4: Updated HybridCNNTransformer
class HybridCNNTransformer(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()

        self.conv = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=3, padding=1),
            nn.GELU(),
            nn.MaxPool1d(2),
            nn.Dropout(0.2),

            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.GELU(),
            nn.MaxPool1d(2),
            nn.Dropout(0.2)
        )

        transformer_layer = nn.TransformerEncoderLayer(
            d_model=128, nhead=4, dim_feedforward=256, dropout=0.1,
            activation="gelu", batch_first=False, norm_first=True
        )
        self.transformer = nn.TransformerEncoder(transformer_layer, num_layers=3)

        self.attention = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(64, 128),
            nn.Sigmoid()
        )

        self.classifier = nn.Sequential(
            nn.LayerNorm(128),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = x.unsqueeze(1)  # (batch, 1, features)
        x = self.conv(x)    # (batch, 128, features//4)
        x = x.permute(2, 0, 1)  # (seq_len, batch, features)
        x = self.transformer(x)
        x = x.mean(dim=0)

        attn_weights = self.attention(x)
        x = x * attn_weights + x
        return self.classifier(x)

# Step 5: Sklearn wrapper
class HybridWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, input_dim, epochs=50, lr=0.001):
        self.input_dim = input_dim
        self.epochs = epochs
        self.lr = lr
        self.label_encoder = LabelEncoder()
        self.model = None

    def fit(self, X, y):
        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()
        y_enc = self.label_encoder.fit_transform(y)
        self.classes_ = self.label_encoder.classes_
        self.num_classes_ = len(self.classes_)
        self.model = HybridCNNTransformer(input_dim=self.input_dim, num_classes=self.num_classes_)

        X_tensor = torch.FloatTensor(X)
        y_tensor = torch.LongTensor(y_enc)
        dataset = TensorDataset(X_tensor, y_tensor)
        loader = DataLoader(dataset, batch_size=32, shuffle=True)

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

        self.model.train()
        for epoch in range(self.epochs):
            for batch_X, batch_y in loader:
                optimizer.zero_grad()
                outputs = self.model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
            scheduler.step()

        return self

    def predict_proba(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()
        check_is_fitted(self, ['model'])
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(torch.FloatTensor(X))
        return F.softmax(outputs, dim=1).numpy()

    def predict(self, X):
        proba = self.predict_proba(X)
        return self.label_encoder.inverse_transform(np.argmax(proba, axis=1))

# Step 6: Ensemble definition
class ImprovedWeatherEnsemble(BaseEstimator, ClassifierMixin):
    def __init__(self, n_features=14):
        self.n_features = n_features
        self.base_models = [
            ('hybrid_cnn_transformer', HybridWrapper(n_features)),
            ('xgb', XGBClassifier(
                n_estimators=500, max_depth=9, learning_rate=0.05,
                tree_method='hist', eval_metric='mlogloss'
            )),
            ('mlp', MLPClassifier(
                hidden_layer_sizes=(256, 128),
                activation='relu',
                early_stopping=True,
                batch_size=256
            ))
        ]
        self.meta_learner = StackingClassifier(
            estimators=self.base_models,
            final_estimator=LogisticRegression(C=15, max_iter=2000),
            cv=5,
            stack_method='auto',
            n_jobs=1
        )

    def fit(self, X, y):
        self.meta_learner.fit(X, y)
        return self

    def predict_proba(self, X):
        return self.meta_learner.predict_proba(X)

    def predict(self, X):
        return self.meta_learner.predict(X)

# Step 7: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Step 8: Train and evaluate
weather_ensemble = ImprovedWeatherEnsemble(n_features=X.shape[1])
weather_ensemble.fit(X_train, y_train)
y_pred = weather_ensemble.predict(X_test)

# Step 9: Print metrics
print(f'''
Enhanced Stacking Results:
Accuracy:  {accuracy_score(y_test, y_pred):.4f}
Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}
Recall:    {recall_score(y_test, y_pred, average='weighted'):.4f}
F1-score:  {f1_score(y_test, y_pred, average='weighted'):.4f}
''')





Enhanced Stacking Results:
Accuracy:  0.9507
Precision: 0.9473
Recall:    0.9507
F1-score:  0.9485

