In [None]:
import numpy as np
import pandas as pd
import os
from google.colab import files

# Upload 1 to 3 V2c files manually
uploaded = files.upload()

# Function to parse .V2c files
def parse_v2c_data(contents):
    acceleration = []
    for line in contents:
        try:
            values = [float(val) for val in line.strip().split()]
            acceleration.extend(values)
        except ValueError:
            continue
    return np.array(acceleration)

# Load and process uploaded files
acc_arrays = []
for filename in uploaded:
    with open(filename, "r") as f:
        lines = f.readlines()
        acc = parse_v2c_data(lines)
        acc_arrays.append(acc)

# Make sure all arrays are the same length
min_length = min(len(arr) for arr in acc_arrays)
acc_arrays = [arr[:min_length] for arr in acc_arrays]

# Dynamically calculate magnitude
acc_stack = np.vstack(acc_arrays)  # shape: (n_axes, n_points)
magnitude = np.sqrt(np.sum(acc_stack**2, axis=0))  # shape: (n_points,)

# Create a single row DataFrame with 'acc1', ..., 'accN', and 'earthquake?' = 1
def create_row_df(magnitude, count):
    segment = magnitude[:count]
    data = {f'acc{i+1}': val for i, val in enumerate(segment)}
    data['earthquake?'] = 1
    return pd.DataFrame([data])

# Append or create Excel files
def append_to_excel(filename, new_row_df):
    if os.path.exists(filename):
        existing_df = pd.read_excel(filename)
        combined_df = pd.concat([existing_df, new_row_df], ignore_index=True)
    else:
        combined_df = new_row_df
    combined_df.to_excel(filename, index=False)
    return combined_df

# Process all sizes
df_200 = create_row_df(magnitude, 200)
df_500 = create_row_df(magnitude, 500)
df_1000 = create_row_df(magnitude, 1000)

# Save to Excel
df_200_full = append_to_excel("earthquake_200.xlsx", df_200)
df_500_full = append_to_excel("earthquake_500.xlsx", df_500)
df_1000_full = append_to_excel("earthquake_1000.xlsx", df_1000)

# Show results
print("✅ Updated earthquake_200.xlsx:")
display(df_200_full.tail())

print("✅ Updated earthquake_500.xlsx:")
display(df_500_full.tail())

print("✅ Updated earthquake_1000.xlsx:")
display(df_1000_full.tail())


In [None]:
import numpy as np
import pandas as pd
import os

# Sampling setup
fs = 1000  # Hz
duration = 1  # seconds (enough for 1000 samples)
samples = fs * duration
t = np.linspace(0, duration, samples)

# Signal generators
def generate_human_walk():
    freq = np.random.uniform(1.5, 3.0)
    amp = np.random.uniform(0.1, 0.4)
    noise = np.random.normal(0, 0.05, samples)
    return amp * np.sin(2 * np.pi * freq * t) + noise

def generate_truck_driveby():
    freq = np.random.uniform(2.5, 5.0)
    amp = np.random.uniform(0.5, 1.5)
    decay = np.exp(-np.linspace(0, 3, samples))
    noise = np.random.normal(0, 0.1, samples)
    return amp * np.sin(2 * np.pi * freq * t) * decay + noise

def generate_random_noise():
    spikes = np.random.normal(0, 1, samples)
    smooth = np.convolve(spikes, np.ones(20)/20, mode='same')
    return smooth * np.random.uniform(0.1, 0.3)

# Combined generator
def generate_non_eq_signal():
    kind = np.random.choice(['human', 'truck', 'noise'])
    if kind == 'human':
        return generate_human_walk()
    elif kind == 'truck':
        return generate_truck_driveby()
    else:
        return generate_random_noise()

# Append to Excel
def append_to_excel(filename, signals, sample_size):
    rows = []
    for signal in signals:
        trimmed = signal[:sample_size]
        row = {f'acc{i+1}': val for i, val in enumerate(trimmed)}
        row['earthquake?'] = 0
        rows.append(row)

    df_new = pd.DataFrame(rows)

    if os.path.exists(filename):
        df_existing = pd.read_excel(filename)
        df_combined = pd.concat([df_existing, df_new], ignore_index=True)
    else:
        df_combined = df_new

    df_combined.to_excel(filename, index=False)
    print(f"✅ Added {len(signals)} non-earthquake signals to {filename}")

# Generate and distribute to all Excel files
non_eq_signals = [generate_non_eq_signal() for _ in range(3)]
append_to_excel('earthquake_200.xlsx', non_eq_signals, 200)
append_to_excel('earthquake_500.xlsx', non_eq_signals, 500)
append_to_excel('earthquake_1000.xlsx', non_eq_signals, 1000)


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import (
    accuracy_score, confusion_matrix, precision_score, recall_score,
    f1_score, roc_curve, auc, roc_auc_score
)

def evaluate_naive_bayes(file_path, label):
    # Load the dataset
    df = pd.read_excel(file_path)
    X = df.drop(columns=['earthquake?'])
    y = df['earthquake?']

    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=42)

    # Initialize and train the model
    model = GaussianNB()
    model.fit(X_train, y_train)

    # Predictions and probabilities
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]

    # ROC curve and AUC
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    auc_score = roc_auc_score(y_test, y_proba)

    # Plot ROC
    plt.plot(fpr, tpr, label=f'{label} (AUC = {auc_score:.2f})')

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    return {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall (True Positive Rate)": recall,
        "F1 Score": f1,
        "True Positive Rate": tp / (tp + fn) if (tp + fn) > 0 else 0,
        "False Positive Rate": fp / (fp + tn) if (fp + tn) > 0 else 0,
        "True Negative Rate": tn / (tn + fp) if (tn + fp) > 0 else 0,
        "False Negative Rate": fn / (fn + tp) if (fn + tp) > 0 else 0,
        "ROC AUC": auc_score
    }

# Evaluate each file
results_200 = evaluate_naive_bayes("earthquake_200.xlsx", "200 Accels")
results_500 = evaluate_naive_bayes("earthquake_500.xlsx", "500 Accels")
results_1000 = evaluate_naive_bayes("earthquake_1000.xlsx", "1000 Accels")

# Display metrics
print("Results for 200 accelerations:")
print(results_200)
print("\nResults for 500 accelerations:")
print(results_500)
print("\nResults for 1000 accelerations:")
print(results_1000)

# Finalize and show ROC plot
plt.plot([0, 1], [0, 1], 'k--')  # Diagonal
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Naive Bayes ROC Curve')
plt.legend(loc='lower right')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import (
    accuracy_score, confusion_matrix, precision_score, recall_score,
    f1_score, fbeta_score, roc_curve, auc
)

def evaluate_gradient_boosting(file_path, label):
    df = pd.read_excel(file_path)
    X = df.drop(columns=['earthquake?'])
    y = df['earthquake?']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    model = GradientBoostingClassifier(n_estimators=50, random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]  # Probabilities for ROC

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    f2 = fbeta_score(y_test, y_pred, beta=2)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    fpr, tpr, _ = roc_curve(y_test, y_prob)
    roc_auc = auc(fpr, tpr)

    # Plot ROC curve
    plt.plot(fpr, tpr, label=f'{label} (AUC = {roc_auc:.2f})')

    return {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1,
        "F2 Score": f2,
        "True Positive Rate": tp / (tp + fn) if (tp + fn) > 0 else 0,
        "False Positive Rate": fp / (fp + tn) if (fp + tn) > 0 else 0,
        "True Negative Rate": tn / (tn + fp) if (tn + fp) > 0 else 0,
        "False Negative Rate": fn / (fn + tp) if (fn + tp) > 0 else 0,
        "Confusion Matrix": confusion_matrix(y_test, y_pred),
        "ROC AUC": roc_auc
    }

# Run for each file and collect results
results_200 = evaluate_gradient_boosting("earthquake_200.xlsx", "200 Accelerations")
results_500 = evaluate_gradient_boosting("earthquake_500.xlsx", "500 Accelerations")
results_1000 = evaluate_gradient_boosting("earthquake_1000.xlsx", "1000 Accelerations")

# Finalize ROC plot
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier')
plt.title("ROC Curve - Gradient Boosting")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc="lower right")
plt.grid()
plt.tight_layout()
plt.show()

# Print results
print("Results for 200 accelerations:")
print(results_200)
print("\nResults for 500 accelerations:")
print(results_500)
print("\nResults for 1000 accelerations:")
print(results_1000)


In [None]:
import pandas as pd
for file in ["earthquake_200.xlsx", "earthquake_500.xlsx", "earthquake_1000.xlsx"]:
    df = pd.read_excel(file)
    counts = df['earthquake?'].value_counts()
    print(f"\n{file}:")
    print(f" - No earthquake (0): {counts.get(0, 0)}")
    print(f" - Earthquake (1): {counts.get(1, 0)}")


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    precision_score,
    recall_score,
    f1_score,
    fbeta_score,
    roc_auc_score,
    roc_curve
)
import matplotlib.pyplot as plt
from lightgbm import LGBMClassifier

# Store curves for plotting later
roc_data = []

def evaluate_lightgbm(file_path, label):
    df = pd.read_excel(file_path)
    X = df.drop(columns=['earthquake?'])
    y = df['earthquake?']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    model = LGBMClassifier(n_estimators=50, random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    f2 = fbeta_score(y_test, y_pred, beta=2)
    auc = roc_auc_score(y_test, y_proba)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    # Save ROC data
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    roc_data.append((fpr, tpr, auc, label))

    return {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1,
        "F2 Score": f2,
        "ROC AUC": auc,
        "True Positive Rate": tp / (tp + fn) if (tp + fn) > 0 else 0,
        "False Positive Rate": fp / (fp + tn) if (fp + tn) > 0 else 0,
        "True Negative Rate": tn / (tn + fp) if (tn + fp) > 0 else 0,
        "False Negative Rate": fn / (fn + tp) if (fn + tp) > 0 else 0,
        "Confusion Matrix": confusion_matrix(y_test, y_pred),
    }

# Run evaluations
results_200 = evaluate_lightgbm("earthquake_200.xlsx", label="200 Accelerations")
results_500 = evaluate_lightgbm("earthquake_500.xlsx", label="500 Accelerations")
results_1000 = evaluate_lightgbm("earthquake_1000.xlsx", label="1000 Accelerations")

# Plot all ROC curves together
plt.figure(figsize=(8, 6))
for fpr, tpr, auc, label in roc_data:
    plt.plot(fpr, tpr, label=f'{label} (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--', linewidth=1)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - LightGBM Models')
plt.legend(loc='lower right')
plt.grid(True)
plt.tight_layout()
plt.show()

# Print results
print("\nResults for 200 accelerations:")
print(results_200)
print("\nResults for 500 accelerations:")
print(results_500)
print("\nResults for 1000 accelerations:")
print(results_1000)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, confusion_matrix, precision_score, recall_score,
    f1_score, roc_auc_score, roc_curve
)
import matplotlib.pyplot as plt

def evaluate_random_forest(file_path, label):
    # Load the dataset
    df = pd.read_excel(file_path)
    X = df.drop(columns=['earthquake?'])
    y = df['earthquake?']

    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=42)

    # Initialize the Random Forest model and fit it to the data
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)

    # Predictions and probabilities for ROC curve
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]

    # Metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    # ROC curve data
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    plt.plot(fpr, tpr, label=f"{label} (AUC = {auc:.2f})")

    return {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall (True Positive Rate)": recall,
        "F1 Score": f1,
        "ROC AUC": auc,
        "True Positive Rate": tp / (tp + fn) if (tp + fn) > 0 else 0,
        "False Positive Rate": fp / (fp + tn) if (fp + tn) > 0 else 0,
        "True Negative Rate": tn / (tn + fp) if (tn + fp) > 0 else 0,
        "False Negative Rate": fn / (fn + tp) if (fn + tp) > 0 else 0,
    }

# Evaluate and collect results
plt.figure(figsize=(8, 6))

results_200 = evaluate_random_forest("earthquake_200.xlsx", label="200 Accelerations")
results_500 = evaluate_random_forest("earthquake_500.xlsx", label="500 Accelerations")
results_1000 = evaluate_random_forest("earthquake_1000.xlsx", label="1000 Accelerations")

# Plot settings
plt.plot([0, 1], [0, 1], 'k--', linewidth=1)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Random Forest')
plt.legend(loc='lower right')
plt.grid(True)
plt.tight_layout()
plt.show()

# Print results
print("\nResults for 200 accelerations:")
print(results_200)

print("\nResults for 500 accelerations:")
print(results_500)

print("\nResults for 1000 accelerations:")
print(results_1000)


In [None]:
import pandas as pd

# Input and output file mappings
files = {
    "earthquake_200.xlsx": "earthquake_200.xlsx",
    "earthquake_500.xlsx": "earthquake_500.xlsx",
    "earthquake_1000.xlsx": "earthquake_1000.xlsx"
}

for input_file, output_file in files.items():
    # Load the Excel file with headers
    df = pd.read_excel(input_file)

    # Separate features and label
    feature_columns = df.columns[:-1]         # All columns except the last one
    label_column = df.columns[-1]             # The 'earthquake?' column

    # Take absolute value of only the acceleration features
    df[feature_columns] = df[feature_columns].abs()

    # Save the result to a new Excel file
    df.to_excel(output_file, index=False)
    print(f"Saved: {output_file}")


In [2]:
#This is to ensure all the results are positive
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

def save_random_forest_model(file_path, model_save_path):
    # Load dataset
    df = pd.read_excel(file_path)
    X = df.drop(columns=['earthquake?'])
    y = df['earthquake?']

    # Train/test split
    X_train, _, y_train, _ = train_test_split(X, y, test_size=0.35, random_state=42)

    # Train model
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)

    # Save trained model
    joblib.dump(model, model_save_path)

# Save the model trained on 200 acceleration readings
save_random_forest_model("earthquake_200.xlsx", "random_forest_200.pkl")


In [None]:
import pandas as pd
import joblib

# Load the trained Random Forest model
model = joblib.load("random_forest_200.pkl")

# Load the 200-acceleration dataset (used to train this model)
data = pd.read_excel("earthquake_200.xlsx")

# Filter to get only false alarms (earthquake? == 0)
false_alarms = data[data["earthquake?"] == 0]

# Check if there are any false alarms
if false_alarms.empty:
    print("No false alarm samples found in the dataset.")
else:
    # Select the first false alarm sample (drop the 'earthquake?' column)
    sample = false_alarms.iloc[0, :-1].values

    # Convert to DataFrame (model expects 2D input)
    new_data = pd.DataFrame([sample])

    # Make prediction
    prediction = model.predict(new_data)[0]

    print(f"Prediction: {prediction} (0 = False Alarm, 1 = Earthquake)")
