In [1]:
import pandas as pd
import numpy as np
from scipy import signal
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import os

def test_data_paths(data_dir, catalog_path):
    """Test if the paths exist and contain data"""
    #print("\nTesting data paths:")
    #print(f"Data directory exists: {os.path.exists(data_dir)}")
    #print(f"Catalog file exists: {os.path.exists(catalog_path)}")
    
    if os.path.exists(data_dir):
        files = os.listdir(data_dir)
        #print(f"Number of files in data directory: {len(files)}")
        #print(f"First few files: {files[:5] if files else 'No files found'}")
    
    if os.path.exists(catalog_path):
        try:
            catalog = pd.read_csv(catalog_path)
            #print(f"Catalog shape: {catalog.shape}")
            #print(f"Catalog columns: {catalog.columns.tolist()}")
        except Exception as e:
            print(f"Error reading catalog: {str(e)}")

class MoonquakeClassifier:
    def __init__(self, data_dir, catalog_path):
        """Initialize the classifier with paths to data directory and catalog file."""
        #print("\n=== Initializing MoonquakeClassifier ===")
        self.data_dir = data_dir
        #print(f"Loading catalog from: {catalog_path}")
        self.catalog = pd.read_csv(catalog_path)
        #print(f"Catalog loaded successfully with shape: {self.catalog.shape}")
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        
    def extract_features(self, velocity_data, time_data):
        """Extract features from velocity time series data."""
        #print("Extracting features...")
        try:
            # Time domain features
            time_features = [
                np.mean(velocity_data),
                np.std(velocity_data),
                np.max(np.abs(velocity_data)),
                np.min(velocity_data),
                np.percentile(velocity_data, 75),
                np.percentile(velocity_data, 25),
                np.mean(np.abs(np.diff(velocity_data))),
                np.std(np.abs(np.diff(velocity_data)))
            ]
            
            # Basic feature check
            #print(f"Time features extracted: {len(time_features)} features")
            
            # Frequency domain features
            time_step = time_data[1] - time_data[0]
            fft_result = np.fft.fft(velocity_data)
            frequencies = np.fft.fftfreq(len(velocity_data), d=time_step)
            fft_magnitude = np.abs(fft_result)
            
            low_freq, high_freq = 0.1, 10
            fft_filtered = fft_magnitude.copy()
            fft_filtered[(frequencies < low_freq) | (frequencies > high_freq)] = 0
            
            freq_features = [
                np.mean(fft_filtered),
                np.std(fft_filtered),
                np.max(fft_filtered),
                np.sum(fft_filtered),
                np.median(fft_filtered)
            ]
            
            #print(f"Frequency features extracted: {len(freq_features)} features")
            
            # Spectral features
            f, t, Sxx = signal.spectrogram(velocity_data, fs=1/time_step)
            spectral_features = [
                np.mean(Sxx),
                np.std(Sxx),
                np.max(Sxx)
            ]
            
            #print(f"Spectral features extracted: {len(spectral_features)} features")
            
            all_features = time_features + freq_features + spectral_features
            #print(f"Total features extracted: {len(all_features)}")
            return all_features
            
        except Exception as e:
            print(f"Error in feature extraction: {str(e)}")
            raise
    
    def process_single_file(self, file_path):
        """Process a single CSV file and extract features."""
        #print(f"\nProcessing file: {os.path.basename(file_path)}")
        try:
            df = pd.read_csv(file_path)
            #print(f"File loaded successfully with shape: {df.shape}")
            
            velocity = df['velocity(m/s)'].values
            time_rel = df['time_rel(sec)'].values
            #print(f"Velocity data shape: {velocity.shape}")
            
            features = self.extract_features(velocity, time_rel)
            return features
            
        except Exception as e:
            print(f"Error processing file {file_path}: {str(e)}")
            return None

    def prepare_dataset(self):
        """
        Prepare the complete dataset by processing all files and matching with catalog.
        Returns:
            tuple: (X, y, processed_files) - features, labels, and list of processed filenames
        """
        print("\n=== Preparing Dataset ===")
        X = []
        y = []
        processed_files = []
        
        # Get list of CSV files
        csv_files = [f for f in os.listdir(self.data_dir) if f.endswith('.csv')]
        #print(f"Found {len(csv_files)} CSV files in directory")
        
        # Debug: Print first few entries of the catalog
        #print("\nFirst few catalog entries:")
        #print(self.catalog['filename'].head())
        
        for filename in csv_files:
            # Normalize filename (remove extension)
            normalized_filename = filename.replace('.csv', '')
            #print(f"\nProcessing {filename} (normalized: {normalized_filename})")
            
            # Debug: Print exact matching condition
            matching_entries = self.catalog[self.catalog['filename'].str.contains(normalized_filename, regex=False)]
            #print(f"Number of matching entries found: {len(matching_entries)}")
            
            if len(matching_entries) > 0:
                #print(f"Found catalog entry for {filename}")
                file_path = os.path.join(self.data_dir, filename)
                features = self.process_single_file(file_path)
                
                if features is not None:
                    X.append(features)
                    y.append(matching_entries['mq_type'].iloc[0])
                    processed_files.append(filename)
                    #print(f"Successfully processed {filename}")
            else:
                print(f"No catalog entry found for {filename}")
        
        # Check if dataset is empty after processing all files
        if not X:
            print("No matching files found in the catalog. Dataset is empty.")
            return np.array([]), np.array([]), []
        
        # Convert to numpy arrays after processing all files
        X = np.array(X)
        y = np.array(y)
        
        return X, y, processed_files

    
    def train_and_evaluate(self, test_size=0.2, random_state=42):
        """Train and evaluate the classifier."""
        print("\n=== Training and Evaluation ===")
        try:
            # Prepare dataset
            X, y, processed_files = self.prepare_dataset()
            
            # Print dataset statistics
            print("\nDataset Statistics:")
            print(f"Total samples: {len(X)}")
            print("\nClass distribution:")
            unique_classes, counts = np.unique(y, return_counts=True)
            for cls, count in zip(unique_classes, counts):
                print(f"{cls}: {count}")
            
            # Encode labels
            y_encoded = self.label_encoder.fit_transform(y)
            print(f"Encoded {len(unique_classes)} classes")
            
            # Split dataset
            X_train, X_test, y_train, y_test = train_test_split(
                X, y_encoded, test_size=test_size, random_state=random_state, stratify=y_encoded
            )
            print(f"\nTrain set size: {len(X_train)}")
            print(f"Test set size: {len(X_test)}")
            
            # Scale features
            X_train_scaled = self.scaler.fit_transform(X_train)
            X_test_scaled = self.scaler.transform(X_test)
            
            # Train and evaluate models
            models = {
                'Random Forest': RandomForestClassifier(
                    n_estimators=100, 
                    max_depth=10,
                    random_state=random_state
                ),
                'Neural Network': MLPClassifier(
                    hidden_layer_sizes=(100, 50),
                    max_iter=1000,
                    random_state=random_state
                )
            }
            
            results = {}
            for name, model in models.items():
                print(f"\nTraining {name}...")
                model.fit(X_train_scaled, y_train)
                print(f"{name} training completed")
                
                # Make predictions
                y_pred = model.predict(X_test_scaled)
                
                # Print classification report
                print(f"\nClassification Report for {name}:")
                report = classification_report(
                    y_test,
                    y_pred,
                    target_names=self.label_encoder.classes_
                )
                print(f"\n{report}")
                
                results[name] = {
                    'model': model,
                    'predictions': y_pred,
                    'report': report
                }
            
            return results
            
        except Exception as e:
            print(f"Error in training and evaluation: {str(e)}")
            raise

def main():
    """Main function to run the moonquake classification pipeline."""
    try:
        # Define paths
        data_dir = 'C:\\Users\\DELL\\Desktop\\New folder\\space_apps_2024_seismic_detection\\data\\lunar\\training\\data\\S12_GradeA'
        catalog_path = 'C:\\Users\\DELL\\Desktop\\New folder\\space_apps_2024_seismic_detection\\data\\lunar\\training\\catalogs\\apollo12_catalog_GradeA_final.csv'


        
        # Test paths before proceeding
        # print("\n=== Testing Data Paths ===")
        test_data_paths(data_dir, catalog_path)
        
        # Create instance of classifier
        # print("\n=== Creating Classifier Instance ===")
        classifier = MoonquakeClassifier(data_dir, catalog_path)
        
        # Train and evaluate
        # print("\n=== Starting Training and Evaluation ===")
        results = classifier.train_and_evaluate()
        
        # Print final results
        print("\n=== Final Results ===")
        for model_name, model_results in results.items():
            print(f"\nResults for {model_name}:")
            print(f"Model: {model_results['model']}")
            print(f"Classification Report:\n{model_results['report']}")
            
    except Exception as e:
        print(f"\nAn error occurred in main: {str(e)}")
        import traceback
        print(traceback.format_exc())

if __name__ == "main":
    print("\n=== Starting Moonquake Classification Program ===")
    main()
    print("\n=== Program Completed ===")

In [2]:
main()


=== Training and Evaluation ===

=== Preparing Dataset ===

Dataset Statistics:
Total samples: 76

Class distribution:
deep_mq: 9
impact_mq: 64
shallow_mq: 3
Encoded 3 classes

Train set size: 60
Test set size: 16

Training Random Forest...
Random Forest training completed

Classification Report for Random Forest:

              precision    recall  f1-score   support

     deep_mq       0.00      0.00      0.00         2
   impact_mq       0.79      0.85      0.81        13
  shallow_mq       0.00      0.00      0.00         1

    accuracy                           0.69        16
   macro avg       0.26      0.28      0.27        16
weighted avg       0.64      0.69      0.66        16


Training Neural Network...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Neural Network training completed

Classification Report for Neural Network:

              precision    recall  f1-score   support

     deep_mq       0.00      0.00      0.00         2
   impact_mq       0.77      0.77      0.77        13
  shallow_mq       0.00      0.00      0.00         1

    accuracy                           0.62        16
   macro avg       0.26      0.26      0.26        16
weighted avg       0.62      0.62      0.62        16


=== Final Results ===

Results for Random Forest:
Model: RandomForestClassifier(max_depth=10, random_state=42)
Classification Report:
              precision    recall  f1-score   support

     deep_mq       0.00      0.00      0.00         2
   impact_mq       0.79      0.85      0.81        13
  shallow_mq       0.00      0.00      0.00         1

    accuracy                           0.69        16
   macro avg       0.26      0.28      0.27        16
weighted avg       0.64      0.69      0.66        16


Results for Neural Netwo

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [3]:
import pandas as pd
import numpy as np
from scipy import signal
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

class TransformerModel(nn.Module):
    def __init__(self, input_dim, num_classes, d_model=64, nhead=4, num_layers=2, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward=128, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)  # Global average pooling
        return self.fc(x)

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-np.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

class MoonquakeDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.FloatTensor(features)
        self.labels = torch.LongTensor(labels)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

class MoonquakeClassifier:
    def __init__(self, data_dir, catalog_path):
        """Initialize the classifier with paths to data directory and catalog file."""
        self.data_dir = data_dir
        self.catalog = pd.read_csv(catalog_path)
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    def extract_features(self, velocity_data, time_data):
        """Extract features from velocity time series data."""
        #print("Extracting features...")
        try:
            # Time domain features
            time_features = [
                np.mean(velocity_data),
                np.std(velocity_data),
                np.max(np.abs(velocity_data)),
                np.min(velocity_data),
                np.percentile(velocity_data, 75),
                np.percentile(velocity_data, 25),
                np.mean(np.abs(np.diff(velocity_data))),
                np.std(np.abs(np.diff(velocity_data)))
            ]
            
            # Basic feature check
            #print(f"Time features extracted: {len(time_features)} features")
            
            # Frequency domain features
            time_step = time_data[1] - time_data[0]
            fft_result = np.fft.fft(velocity_data)
            frequencies = np.fft.fftfreq(len(velocity_data), d=time_step)
            fft_magnitude = np.abs(fft_result)
            
            low_freq, high_freq = 0.1, 10
            fft_filtered = fft_magnitude.copy()
            fft_filtered[(frequencies < low_freq) | (frequencies > high_freq)] = 0
            
            freq_features = [
                np.mean(fft_filtered),
                np.std(fft_filtered),
                np.max(fft_filtered),
                np.sum(fft_filtered),
                np.median(fft_filtered)
            ]
            
            #print(f"Frequency features extracted: {len(freq_features)} features")
            
            # Spectral features
            f, t, Sxx = signal.spectrogram(velocity_data, fs=1/time_step)
            spectral_features = [
                np.mean(Sxx),
                np.std(Sxx),
                np.max(Sxx)
            ]
            
            #print(f"Spectral features extracted: {len(spectral_features)} features")
            
            all_features = time_features + freq_features + spectral_features
            #print(f"Total features extracted: {len(all_features)}")
            return all_features
            
        except Exception as e:
            print(f"Error in feature extraction: {str(e)}")
            raise
    
    def process_single_file(self, file_path):
        """Process a single CSV file and extract features."""
        #print(f"\nProcessing file: {os.path.basename(file_path)}")
        try:
            df = pd.read_csv(file_path)
            #print(f"File loaded successfully with shape: {df.shape}")
            
            velocity = df['velocity(m/s)'].values
            time_rel = df['time_rel(sec)'].values
            #print(f"Velocity data shape: {velocity.shape}")
            
            features = self.extract_features(velocity, time_rel)
            return features
            
        except Exception as e:
            print(f"Error processing file {file_path}: {str(e)}")
            return None
    def prepare_dataset(self):
        """
        Prepare the complete dataset by processing all files and matching with catalog.
        Returns:
            tuple: (X, y, processed_files) - features, labels, and list of processed filenames
        """
        # print("\n=== Preparing Dataset ===")
        X = []
        y = []
        processed_files = []
        
        # Get list of CSV files
        csv_files = [f for f in os.listdir(self.data_dir) if f.endswith('.csv')]
        #print(f"Found {len(csv_files)} CSV files in directory")
        
        # Debug: Print first few entries of the catalog
        #print("\nFirst few catalog entries:")
        #print(self.catalog['filename'].head())
        
        for filename in csv_files:
            # Normalize filename (remove extension)
            normalized_filename = filename.replace('.csv', '')
            #print(f"\nProcessing {filename} (normalized: {normalized_filename})")
            
            # Debug: Print exact matching condition
            matching_entries = self.catalog[self.catalog['filename'].str.contains(normalized_filename, regex=False)]
            #print(f"Number of matching entries found: {len(matching_entries)}")
            
            if len(matching_entries) > 0:
                #print(f"Found catalog entry for {filename}")
                file_path = os.path.join(self.data_dir, filename)
                features = self.process_single_file(file_path)
                
                if features is not None:
                    X.append(features)
                    y.append(matching_entries['mq_type'].iloc[0])
                    processed_files.append(filename)
                    #print(f"Successfully processed {filename}")
            else:
                print(f"No catalog entry found for {filename}")
        
        # Check if dataset is empty after processing all files
        if not X:
            print("No matching files found in the catalog. Dataset is empty.")
            return np.array([]), np.array([]), []
        
        # Convert to numpy arrays after processing all files
        X = np.array(X)
        y = np.array(y)
        
        # print(f"\nDataset preparation completed:")
        # print(f"X shape: {X.shape}")
        # print(f"y shape: {y.shape}")
        # print(f"Processed {len(processed_files)} files successfully")
        
        return X, y, processed_files
    def train_and_evaluate(self, test_size=0.2, random_state=42, batch_size=32, num_epochs=50):
        print("\n=== Training and Evaluation ===")
        try:
            X, y, processed_files = self.prepare_dataset()
            
            print("\nDataset Statistics:")
            print(f"Total samples: {len(X)}")
            print("\nClass distribution:")
            unique_classes, counts = np.unique(y, return_counts=True)
            for cls, count in zip(unique_classes, counts):
                print(f"{cls}: {count}")
            
            y_encoded = self.label_encoder.fit_transform(y)
            print(f"Encoded {len(unique_classes)} classes")
            
            X_train, X_test, y_train, y_test = train_test_split(
                X, y_encoded, test_size=test_size, random_state=random_state, stratify=y_encoded
            )
            print(f"\nTrain set size: {len(X_train)}")
            print(f"Test set size: {len(X_test)}")
            
            X_train_scaled = self.scaler.fit_transform(X_train)
            X_test_scaled = self.scaler.transform(X_test)
            
            train_dataset = MoonquakeDataset(X_train_scaled, y_train)
            test_dataset = MoonquakeDataset(X_test_scaled, y_test)
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            test_loader = DataLoader(test_dataset, batch_size=batch_size)
            
            model = TransformerModel(input_dim=X_train_scaled.shape[1], num_classes=len(unique_classes)).to(self.device)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=0.001)
            
            print("\nTraining Transformer Model...")
            for epoch in range(num_epochs):
                model.train()
                for batch_features, batch_labels in train_loader:
                    batch_features, batch_labels = batch_features.to(self.device), batch_labels.to(self.device)
                    optimizer.zero_grad()
                    outputs = model(batch_features.unsqueeze(1))  # Add sequence dimension
                    loss = criterion(outputs, batch_labels)
                    loss.backward()
                    optimizer.step()
                
                if (epoch + 1) % 10 == 0:
                    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")
            
            print("Training completed")
            
            model.eval()
            all_preds = []
            all_labels = []
            with torch.no_grad():
                for batch_features, batch_labels in test_loader:
                    batch_features, batch_labels = batch_features.to(self.device), batch_labels.to(self.device)
                    outputs = model(batch_features.unsqueeze(1))
                    _, predicted = torch.max(outputs.data, 1)
                    all_preds.extend(predicted.cpu().numpy())
                    all_labels.extend(batch_labels.cpu().numpy())
            
            print("\nClassification Report for Transformer Model:")
            report = classification_report(
                all_labels,
                all_preds,
                target_names=self.label_encoder.classes_
            )
            print(report)
            
            return {'Transformer': {'model': model, 'report': report}}
            
        except Exception as e:
            print(f"Error in training and evaluation: {str(e)}")
            raise

def main():
    """Main function to run the moonquake classification pipeline."""
    # Define paths
    data_dir = 'C:\\Users\\DELL\\Desktop\\New folder\\space_apps_2024_seismic_detection\\data\\lunar\\training\\data\\S12_GradeA'
    catalog_path = 'C:\\Users\\DELL\\Desktop\\New folder\\space_apps_2024_seismic_detection\\data\\lunar\\training\\catalogs\\apollo12_catalog_GradeA_final.csv'
    
    classifier = MoonquakeClassifier(data_dir, catalog_path)
    results = classifier.train_and_evaluate()
    
    print("\n=== Final Results ===")
    for model_name, model_results in results.items():
        print(f"\nResults for {model_name}:")
        print(f"Model: {model_results['model']}")
        print(f"Classification Report:\n{model_results['report']}")

if __name__ == "__main__":
    print("\n=== Starting Moonquake Classification Program ===")
    main()
    print("\n=== Program Completed ===")


=== Starting Moonquake Classification Program ===

=== Training and Evaluation ===

Dataset Statistics:
Total samples: 76

Class distribution:
deep_mq: 9
impact_mq: 64
shallow_mq: 3
Encoded 3 classes

Train set size: 60
Test set size: 16





Training Transformer Model...
Epoch [10/50], Loss: 0.3658
Epoch [20/50], Loss: 0.3958
Epoch [30/50], Loss: 0.4663
Epoch [40/50], Loss: 0.2943
Epoch [50/50], Loss: 0.2659
Training completed

Classification Report for Transformer Model:
              precision    recall  f1-score   support

     deep_mq       0.00      0.00      0.00         2
   impact_mq       0.81      1.00      0.90        13
  shallow_mq       0.00      0.00      0.00         1

    accuracy                           0.81        16
   macro avg       0.27      0.33      0.30        16
weighted avg       0.66      0.81      0.73        16


=== Final Results ===

Results for Transformer:
Model: TransformerModel(
  (embedding): Linear(in_features=16, out_features=64, bias=True)
  (pos_encoder): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [21]:
import pandas as pd
import numpy as np
from scipy import signal
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import os

def test_data_paths(data_dir, catalog_path):
    """Test if the paths exist and contain data"""
    #print("\nTesting data paths:")
    #print(f"Data directory exists: {os.path.exists(data_dir)}")
    #print(f"Catalog file exists: {os.path.exists(catalog_path)}")
    
    if os.path.exists(data_dir):
        files = os.listdir(data_dir)
        #print(f"Number of files in data directory: {len(files)}")
        #print(f"First few files: {files[:5] if files else 'No files found'}")
    
    if os.path.exists(catalog_path):
        try:
            catalog = pd.read_csv(catalog_path)
            #print(f"Catalog shape: {catalog.shape}")
            #print(f"Catalog columns: {catalog.columns.tolist()}")
        except Exception as e:
            print(f"Error reading catalog: {str(e)}")

class MoonquakeClassifier:
    def __init__(self, data_dir, catalog_path):
        """Initialize the classifier with paths to data directory and catalog file."""
        #print("\n=== Initializing MoonquakeClassifier ===")
        self.data_dir = data_dir
        #print(f"Loading catalog from: {catalog_path}")
        self.catalog = pd.read_csv(catalog_path)
        #print(f"Catalog loaded successfully with shape: {self.catalog.shape}")
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        
    def extract_features(self, velocity_data, time_data):
        """Extract features from velocity time series data."""
        #print("Extracting features...")
        try:
            # Time domain features
            time_features = [
                np.mean(velocity_data),
                np.std(velocity_data),
                np.max(np.abs(velocity_data)),
                np.min(velocity_data),
                np.percentile(velocity_data, 75),
                np.percentile(velocity_data, 25),
                np.mean(np.abs(np.diff(velocity_data))),
                np.std(np.abs(np.diff(velocity_data)))
            ]
            
            # Basic feature check
            #print(f"Time features extracted: {len(time_features)} features")
            
            # Frequency domain features
            time_step = time_data[1] - time_data[0]
            fft_result = np.fft.fft(velocity_data)
            frequencies = np.fft.fftfreq(len(velocity_data), d=time_step)
            fft_magnitude = np.abs(fft_result)
            
            low_freq, high_freq = 0.1, 10
            fft_filtered = fft_magnitude.copy()
            fft_filtered[(frequencies < low_freq) | (frequencies > high_freq)] = 0
            
            freq_features = [
                np.mean(fft_filtered),
                np.std(fft_filtered),
                np.max(fft_filtered),
                np.sum(fft_filtered),
                np.median(fft_filtered)
            ]
            
            #print(f"Frequency features extracted: {len(freq_features)} features")
            
            # Spectral features
            f, t, Sxx = signal.spectrogram(velocity_data, fs=1/time_step)
            spectral_features = [
                np.mean(Sxx),
                np.std(Sxx),
                np.max(Sxx)
            ]
            
            #print(f"Spectral features extracted: {len(spectral_features)} features")
            
            all_features = time_features + freq_features + spectral_features
            #print(f"Total features extracted: {len(all_features)}")
            return all_features
            
        except Exception as e:
            print(f"Error in feature extraction: {str(e)}")
            raise
    
    def process_single_file(self, file_path):
        """Process a single CSV file and extract features."""
        #print(f"\nProcessing file: {os.path.basename(file_path)}")
        try:
            df = pd.read_csv(file_path)
            #print(f"File loaded successfully with shape: {df.shape}")
            
            velocity = df['velocity(m/s)'].values
            time_rel = df['time_rel(sec)'].values
            #print(f"Velocity data shape: {velocity.shape}")
            
            features = self.extract_features(velocity, time_rel)
            return features
            
        except Exception as e:
            print(f"Error processing file {file_path}: {str(e)}")
            return None

    def prepare_dataset(self):
        """
        Prepare the complete dataset by processing all files and matching with catalog.
        Returns:
            tuple: (X, y, processed_files) - features, labels, and list of processed filenames
        """
        print("\n=== Preparing Dataset ===")
        X = []
        y = []
        processed_files = []
        
        # Get list of CSV files
        csv_files = [f for f in os.listdir(self.data_dir) if f.endswith('.csv')]
        #print(f"Found {len(csv_files)} CSV files in directory")
        
        # Debug: Print first few entries of the catalog
        #print("\nFirst few catalog entries:")
        #print(self.catalog['filename'].head())
        
        for filename in csv_files:
            # Normalize filename (remove extension)
            normalized_filename = filename.replace('.csv', '')
            #print(f"\nProcessing {filename} (normalized: {normalized_filename})")
            
            # Debug: Print exact matching condition
            matching_entries = self.catalog[self.catalog['filename'].str.contains(normalized_filename, regex=False)]
            #print(f"Number of matching entries found: {len(matching_entries)}")
            
            if len(matching_entries) > 0:
                #print(f"Found catalog entry for {filename}")
                file_path = os.path.join(self.data_dir, filename)
                features = self.process_single_file(file_path)
                
                if features is not None:
                    X.append(features)
                    y.append(matching_entries['mq_type'].iloc[0])
                    processed_files.append(filename)
                    #print(f"Successfully processed {filename}")
            else:
                print(f"No catalog entry found for {filename}")
        
        # Check if dataset is empty after processing all files
        if not X:
            print("No matching files found in the catalog. Dataset is empty.")
            return np.array([]), np.array([]), []
        
        # Convert to numpy arrays after processing all files
        X = np.array(X)
        y = np.array(y)
        
        return X, y, processed_files

    
    def train_and_evaluate(self, test_size=0.2, random_state=42):
        """Train and evaluate the classifier."""
        print("\n=== Training and Evaluation ===")
        try:
            # Prepare dataset
            X, y, processed_files = self.prepare_dataset()
            
            # Print dataset statistics
            print("\nDataset Statistics:")
            print(f"Total samples: {len(X)}")
            print("\nClass distribution:")
            unique_classes, counts = np.unique(y, return_counts=True)
            for cls, count in zip(unique_classes, counts):
                print(f"{cls}: {count}")
            
            # Encode labels
            y_encoded = self.label_encoder.fit_transform(y)
            print(f"Encoded {len(unique_classes)} classes")
            
            # Split dataset
            X_train, X_test, y_train, y_test = train_test_split(
                X, y_encoded, test_size=test_size, random_state=random_state, stratify=y_encoded
            )
            print(f"\nTrain set size: {len(X_train)}")
            print(f"Test set size: {len(X_test)}")
            
            # Scale features
            X_train_scaled = self.scaler.fit_transform(X_train)
            X_test_scaled = self.scaler.transform(X_test)
            
            # Train and evaluate models
            models = {
                'Random Forest': RandomForestClassifier(
                    n_estimators=100, 
                    max_depth=10,
                    random_state=random_state
                ),
                'Neural Network': MLPClassifier(
                    hidden_layer_sizes=(100, 50),
                    max_iter=1000,
                    random_state=random_state
                )
            }
            
            results = {}
            for name, model in models.items():
                print(f"\nTraining {name}...")
                model.fit(X_train_scaled, y_train)
                print(f"{name} training completed")
                
                # Make predictions
                y_pred = model.predict(X_test_scaled)
                
                # Print classification report
                print(f"\nClassification Report for {name}:")
                report = classification_report(
                    y_test,
                    y_pred,
                    target_names=self.label_encoder.classes_
                )
                print(f"\n{report}")
                
                results[name] = {
                    'model': model,
                    'predictions': y_pred,
                    'report': report
                }
            
            return results
            
        except Exception as e:
            print(f"Error in training and evaluation: {str(e)}")
            raise

def main():
    """Main function to run the moonquake classification pipeline."""
    try:
        # Define paths
        data_dir = 'C:\\Users\\DELL\\Desktop\\New folder\\space_apps_2024_seismic_detection\\data\\lunar\\training\\data\\S12_GradeA'
        catalog_path = 'C:\\Users\\DELL\\Desktop\\New folder\\space_apps_2024_seismic_detection\\data\\lunar\\training\\catalogs\\apollo12_catalog_GradeA_final.csv'


        
        # Test paths before proceeding
        # print("\n=== Testing Data Paths ===")
        test_data_paths(data_dir, catalog_path)
        
        # Create instance of classifier
        # print("\n=== Creating Classifier Instance ===")
        classifier = MoonquakeClassifier(data_dir, catalog_path)
        
        # Train and evaluate
        # print("\n=== Starting Training and Evaluation ===")
        results = classifier.train_and_evaluate()
        
        # Print final results
        print("\n=== Final Results ===")
        for model_name, model_results in results.items():
            print(f"\nResults for {model_name}:")
            print(f"Model: {model_results['model']}")
            print(f"Classification Report:\n{model_results['report']}")
            
    except Exception as e:
        print(f"\nAn error occurred in main: {str(e)}")
        import traceback
        print(traceback.format_exc())

import gradio as gr
import pandas as pd
import os
import tempfile
from pathlib import Path



def process_files(data_dir_file, catalog_file):
    """
    Process the uploaded files and run the moonquake classification
    
    Args:
        data_dir_file: Uploaded ZIP file containing CSV data files
        catalog_file: Uploaded catalog CSV file
    
    Returns:
        str: Classification results as formatted text
    """
    try:
        # Create temporary directory for processing
        with tempfile.TemporaryDirectory() as temp_dir:
            # Save catalog file
            catalog_path = os.path.join(temp_dir, "catalog.csv")
            catalog_file.save(catalog_path)
            
            # Create directory for data files
            data_dir = os.path.join(temp_dir, "data")
            os.makedirs(data_dir, exist_ok=True)
            
            # Extract data files
            if data_dir_file.name.endswith('.zip'):
                import zipfile
                with zipfile.ZipFile(data_dir_file.name, 'r') as zip_ref:
                    zip_ref.extractall(data_dir)
            else:
                return "Please upload a ZIP file containing the CSV data files"

            # Test paths
            test_data_paths(data_dir, catalog_path)
            
            # Initialize and run classifier
            classifier = MoonquakeClassifier(data_dir, catalog_path)
            results = classifier.train_and_evaluate()
            
            # Format results
            output_text = "=== Moonquake Classification Results ===\n\n"
            for model_name, model_results in results.items():
                output_text += f"\nResults for {model_name}:\n"
                output_text += f"Classification Report:\n{model_results['report']}\n"
                output_text += "-" * 50 + "\n"
            
            return output_text
            
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Create Gradio interface
def create_interface():
    with gr.Blocks(title="Moonquake Classifier") as interface:
        gr.Markdown("""
        # Moonquake Classification Interface
        
        Upload your data files to classify moonquake events.
        
        **Instructions:**
        1. Upload a ZIP file containing your CSV data files
        2. Upload your catalog CSV file
        3. Click 'Submit' to run the classification
        """)
        
        with gr.Row():
            data_dir_input = gr.File(
                label="Data Directory (ZIP file containing CSV files)",
                file_types=[".zip"]
            )
            catalog_input = gr.File(
                label="Catalog File (CSV)",
                file_types=[".csv"]
            )
            
        submit_btn = gr.Button("Submit")
        
        output = gr.Textbox(
            label="Classification Results",
            lines=20,
            max_lines=30
        )
        
        submit_btn.click(
            fn=process_files,
            inputs=[data_dir_input, catalog_input],
            outputs=output
        )
        
    return interface

# Launch the interface
if __name__ == "__main__":
    interface = create_interface()
    interface.launch(
        share=True,
        server_name="0.0.0.0",
        server_port=7860
    )

* Running on local URL:  http://0.0.0.0:7860
* Running on public URL: https://dec0cff370893434bb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [19]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.3.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Using cached aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.3-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Using cached ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.2 (from gradio)
  Downloading gradio_client-1.4.2-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.26.1-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl.metadata (3.1 kB)
Collecting pydub (from gradio)
  Using cached pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from gradio)
  Down



In [23]:
import pandas as pd
import numpy as np
from scipy import signal
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import os
from tempfile import TemporaryDirectory
import shutil

def test_data_paths(data_dir, catalog_path):
    """Test if the paths exist and contain data"""
    #print("\nTesting data paths:")
    #print(f"Data directory exists: {os.path.exists(data_dir)}")
    #print(f"Catalog file exists: {os.path.exists(catalog_path)}")
    
    if os.path.exists(data_dir):
        files = os.listdir(data_dir)
        #print(f"Number of files in data directory: {len(files)}")
        #print(f"First few files: {files[:5] if files else 'No files found'}")
    
    if os.path.exists(catalog_path):
        try:
            catalog = pd.read_csv(catalog_path)
            #print(f"Catalog shape: {catalog.shape}")
            #print(f"Catalog columns: {catalog.columns.tolist()}")
        except Exception as e:
            print(f"Error reading catalog: {str(e)}")

class MoonquakeClassifier:
    def __init__(self, data_dir, catalog_path):
        """Initialize the classifier with paths to data directory and catalog file."""
        #print("\n=== Initializing MoonquakeClassifier ===")
        self.data_dir = data_dir
        #print(f"Loading catalog from: {catalog_path}")
        self.catalog = pd.read_csv(catalog_path)
        #print(f"Catalog loaded successfully with shape: {self.catalog.shape}")
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        
    def extract_features(self, velocity_data, time_data):
        """Extract features from velocity time series data."""
        #print("Extracting features...")
        try:
            # Time domain features
            time_features = [
                np.mean(velocity_data),
                np.std(velocity_data),
                np.max(np.abs(velocity_data)),
                np.min(velocity_data),
                np.percentile(velocity_data, 75),
                np.percentile(velocity_data, 25),
                np.mean(np.abs(np.diff(velocity_data))),
                np.std(np.abs(np.diff(velocity_data)))
            ]
            
            # Basic feature check
            #print(f"Time features extracted: {len(time_features)} features")
            
            # Frequency domain features
            time_step = time_data[1] - time_data[0]
            fft_result = np.fft.fft(velocity_data)
            frequencies = np.fft.fftfreq(len(velocity_data), d=time_step)
            fft_magnitude = np.abs(fft_result)
            
            low_freq, high_freq = 0.1, 10
            fft_filtered = fft_magnitude.copy()
            fft_filtered[(frequencies < low_freq) | (frequencies > high_freq)] = 0
            
            freq_features = [
                np.mean(fft_filtered),
                np.std(fft_filtered),
                np.max(fft_filtered),
                np.sum(fft_filtered),
                np.median(fft_filtered)
            ]
            
            #print(f"Frequency features extracted: {len(freq_features)} features")
            
            # Spectral features
            f, t, Sxx = signal.spectrogram(velocity_data, fs=1/time_step)
            spectral_features = [
                np.mean(Sxx),
                np.std(Sxx),
                np.max(Sxx)
            ]
            
            #print(f"Spectral features extracted: {len(spectral_features)} features")
            
            all_features = time_features + freq_features + spectral_features
            #print(f"Total features extracted: {len(all_features)}")
            return all_features
            
        except Exception as e:
            print(f"Error in feature extraction: {str(e)}")
            raise
    
    def process_single_file(self, file_path):
        """Process a single CSV file and extract features."""
        #print(f"\nProcessing file: {os.path.basename(file_path)}")
        try:
            df = pd.read_csv(file_path)
            #print(f"File loaded successfully with shape: {df.shape}")
            
            velocity = df['velocity(m/s)'].values
            time_rel = df['time_rel(sec)'].values
            #print(f"Velocity data shape: {velocity.shape}")
            
            features = self.extract_features(velocity, time_rel)
            return features
            
        except Exception as e:
            print(f"Error processing file {file_path}: {str(e)}")
            return None

    def prepare_dataset(self):
        """
        Prepare the complete dataset by processing all files and matching with catalog.
        Returns:
            tuple: (X, y, processed_files) - features, labels, and list of processed filenames
        """
        print("\n=== Preparing Dataset ===")
        X = []
        y = []
        processed_files = []
        
        # Get list of CSV files
        csv_files = [f for f in os.listdir(self.data_dir) if f.endswith('.csv')]
        #print(f"Found {len(csv_files)} CSV files in directory")
        
        # Debug: Print first few entries of the catalog
        #print("\nFirst few catalog entries:")
        #print(self.catalog['filename'].head())
        
        for filename in csv_files:
            # Normalize filename (remove extension)
            normalized_filename = filename.replace('.csv', '')
            #print(f"\nProcessing {filename} (normalized: {normalized_filename})")
            
            # Debug: Print exact matching condition
            matching_entries = self.catalog[self.catalog['filename'].str.contains(normalized_filename, regex=False)]
            #print(f"Number of matching entries found: {len(matching_entries)}")
            
            if len(matching_entries) > 0:
                #print(f"Found catalog entry for {filename}")
                file_path = os.path.join(self.data_dir, filename)
                features = self.process_single_file(file_path)
                
                if features is not None:
                    X.append(features)
                    y.append(matching_entries['mq_type'].iloc[0])
                    processed_files.append(filename)
                    #print(f"Successfully processed {filename}")
            else:
                print(f"No catalog entry found for {filename}")
        
        # Check if dataset is empty after processing all files
        if not X:
            print("No matching files found in the catalog. Dataset is empty.")
            return np.array([]), np.array([]), []
        
        # Convert to numpy arrays after processing all files
        X = np.array(X)
        y = np.array(y)
        
        return X, y, processed_files

    
    def train_and_evaluate(self, test_size=0.2, random_state=42):
        """Train and evaluate the classifier."""
        print("\n=== Training and Evaluation ===")
        try:
            # Prepare dataset
            X, y, processed_files = self.prepare_dataset()
            
            # Print dataset statistics
            print("\nDataset Statistics:")
            print(f"Total samples: {len(X)}")
            print("\nClass distribution:")
            unique_classes, counts = np.unique(y, return_counts=True)
            for cls, count in zip(unique_classes, counts):
                print(f"{cls}: {count}")
            
            # Encode labels
            y_encoded = self.label_encoder.fit_transform(y)
            print(f"Encoded {len(unique_classes)} classes")
            
            # Split dataset
            X_train, X_test, y_train, y_test = train_test_split(
                X, y_encoded, test_size=test_size, random_state=random_state, stratify=y_encoded
            )
            print(f"\nTrain set size: {len(X_train)}")
            print(f"Test set size: {len(X_test)}")
            
            # Scale features
            X_train_scaled = self.scaler.fit_transform(X_train)
            X_test_scaled = self.scaler.transform(X_test)
            
            # Train and evaluate models
            models = {
                'Random Forest': RandomForestClassifier(
                    n_estimators=100, 
                    max_depth=10,
                    random_state=random_state
                ),
                'Neural Network': MLPClassifier(
                    hidden_layer_sizes=(100, 50),
                    max_iter=1000,
                    random_state=random_state
                )
            }
            
            results = {}
            for name, model in models.items():
                print(f"\nTraining {name}...")
                model.fit(X_train_scaled, y_train)
                print(f"{name} training completed")
                
                # Make predictions
                y_pred = model.predict(X_test_scaled)
                
                # Print classification report
                print(f"\nClassification Report for {name}:")
                report = classification_report(
                    y_test,
                    y_pred,
                    target_names=self.label_encoder.classes_
                )
                print(f"\n{report}")
                
                results[name] = {
                    'model': model,
                    'predictions': y_pred,
                    'report': report
                }
            
            return results
            
        except Exception as e:
            print(f"Error in training and evaluation: {str(e)}")
            raise

def main():
    """Main function to run the moonquake classification pipeline."""
    try:
        # Define paths
        data_dir = 'C:\\Users\\DELL\\Desktop\\New folder\\space_apps_2024_seismic_detection\\data\\lunar\\training\\data\\S12_GradeA'
        catalog_path = 'C:\\Users\\DELL\\Desktop\\New folder\\space_apps_2024_seismic_detection\\data\\lunar\\training\\catalogs\\apollo12_catalog_GradeA_final.csv'


        
        # Test paths before proceeding
        # print("\n=== Testing Data Paths ===")
        test_data_paths(data_dir, catalog_path)
        
        # Create instance of classifier
        # print("\n=== Creating Classifier Instance ===")
        classifier = MoonquakeClassifier(data_dir, catalog_path)
        
        # Train and evaluate
        # print("\n=== Starting Training and Evaluation ===")
        results = classifier.train_and_evaluate()
        
        # Print final results
        print("\n=== Final Results ===")
        for model_name, model_results in results.items():
            print(f"\nResults for {model_name}:")
            print(f"Model: {model_results['model']}")
            print(f"Classification Report:\n{model_results['report']}")
            
    except Exception as e:
        print(f"\nAn error occurred in main: {str(e)}")
        import traceback
        print(traceback.format_exc())

def process_uploads(catalog_file, data_folder):
    """
    Process uploaded files and return classification results
    
    Args:
        catalog_file: Uploaded catalog CSV file
        data_folder: Uploaded folder containing CSV data files
    
    Returns:
        str: Classification results as formatted string
    """
    try:
        # Create a temporary directory to store uploaded files
        with TemporaryDirectory() as temp_dir:
            # Save the catalog file
            catalog_path = os.path.join(temp_dir, "catalog.csv")
            shutil.copy2(catalog_file, catalog_path)
            
            # Create a directory for the data files
            data_dir = os.path.join(temp_dir, "data")
            os.makedirs(data_dir, exist_ok=True)
            
            # Extract and save all files from the uploaded folder
            for file_name in os.listdir(data_folder):
                if file_name.endswith('.csv'):
                    src_path = os.path.join(data_folder, file_name)
                    dst_path = os.path.join(data_dir, file_name)
                    shutil.copy2(src_path, dst_path)
            
            # Test paths
            test_data_paths(data_dir, catalog_path)
            
            # Initialize and run classifier
            classifier = MoonquakeClassifier(data_dir, catalog_path)
            results = classifier.train_and_evaluate()
            
            # Format results as string
            output = "=== Classification Results ===\n\n"
            for model_name, model_results in results.items():
                output += f"Results for {model_name}:\n"
                output += f"Classification Report:\n{model_results['report']}\n"
                output += "-" * 50 + "\n"
            
            return output
            
    except Exception as e:
        return f"An error occurred: {str(e)}\n{str(traceback.format_exc())}"

# Create Gradio interface
def create_gradio_interface():
    with gr.Blocks(title="Moonquake Classification Interface") as interface:
        gr.Markdown("""
        # Moonquake Classification Interface
        
        Upload your catalog file and data folder to classify moonquake data.
        
        **Instructions:**
        1. Upload a CSV catalog file
        2. Select a folder containing your moonquake data CSV files
        3. Click 'Submit' to process the data and view results
        """)
        
        with gr.Row():
            with gr.Column():
                catalog_input = gr.File(
                    label="Upload Catalog CSV",
                    file_types=[".csv"],
                    type="filepath"  # Changed from 'file' to 'filepath'
                )
                data_folder_input = gr.File(
                    label="Upload Data Folder",
                    file_count="directory",
                    type="filepath"  # Changed from 'file' to 'filepath'
                )
                submit_btn = gr.Button("Submit", variant="primary")
            
            with gr.Column():
                output_text = gr.TextArea(
                    label="Classification Results",
                    placeholder="Results will appear here...",
                    lines=20,
                    max_lines=30
                )
        
        submit_btn.click(
            fn=process_uploads,
            inputs=[catalog_input, data_folder_input],
            outputs=output_text
        )
    
    return interface

# Launch the interface
if __name__ == "__main__":
    interface = create_gradio_interface()
    interface.launch(share=True, server_name="0.0.0.0")

ERROR:    [Errno 10048] error while attempting to bind on address ('0.0.0.0', 7860): only one usage of each socket address (protocol/network address/port) is normally permitted


* Running on local URL:  http://0.0.0.0:7861
* Running on public URL: https://fb816b187c51b99093.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
