<a href="https://www.kaggle.com/code/ouzhansalam/gunshot-audio-feature-extraction-classification?scriptVersionId=249426841" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [2]:
import os
import librosa
import numpy as np
import pandas as pd

def extract_features(file_path):
    """
    Ses dosyasından birçok özellik çıkarır.
    """
    try:
        audio, sample_rate = librosa.load(file_path, sr=None)
        zero_crossing = np.mean(librosa.feature.zero_crossing_rate(y=audio).T, axis=0)
        spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sample_rate).T, axis=0)
        spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=audio, sr=sample_rate).T, axis=0)
        spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=audio, sr=sample_rate).T, axis=0)
        
        spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sample_rate)
        contrast_mean = np.mean(spectral_contrast, axis=1)
        contrast_std = np.std(spectral_contrast, axis=1)
        
        chroma_stft = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
        chroma_stft_mean = np.mean(chroma_stft, axis=1)
        chroma_stft_std = np.std(chroma_stft, axis=1)
        
        rms_mean = np.mean(librosa.feature.rms(y=audio))
        
        mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
        melspectrogram_mean = np.mean(mel_spectrogram)
        melspectrogram_std = np.std(mel_spectrogram)
        
        flatness_mean = np.mean(librosa.feature.spectral_flatness(y=audio))
        
        poly_features = librosa.feature.poly_features(y=audio, sr=sample_rate, order=1)
        poly_mean = np.mean(poly_features, axis=1)
        
        mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfcc_mean = np.mean(mfcc, axis=1)
        mfcc_std = np.std(mfcc, axis=1)
        
        energy = np.sum(audio ** 2)
        
        features = np.hstack([
            zero_crossing, spectral_centroid, spectral_rolloff, spectral_bandwidth,
            contrast_mean, contrast_std, chroma_stft_mean, chroma_stft_std,
            rms_mean, melspectrogram_mean, melspectrogram_std, flatness_mean,
            poly_mean, mfcc_mean, mfcc_std, energy
        ])
        return features
    except Exception as e:
        print(f"❌ Error processing {file_path}: {e}")
        return None

def process_gunshot_dataset(dataset_dir, output_csv):
    """
    Gunshot audio dataset üzerinde özellik çıkarımı yapar ve CSV'e kaydeder.
    """
    features_list = []
    for weapon_type in os.listdir(dataset_dir):
        weapon_path = os.path.join(dataset_dir, weapon_type)
        if os.path.isdir(weapon_path):
            for file_name in os.listdir(weapon_path):
                if file_name.lower().endswith((".wav", ".mp3", ".flac")):
                    file_path = os.path.join(weapon_path, file_name)
                    features = extract_features(file_path)
                    if features is not None:
                        features_list.append([*features, weapon_type])
    
    columns = (
        ['zero_crossing', 'centroid_mean', 'rolloff_mean', 'bandwidth_mean'] +
        [f'contrast_mean_{i}' for i in range(7)] +
        [f'contrast_std_{i}' for i in range(7)] +
        [f'chroma_stft_mean_{i}' for i in range(12)] +
        [f'chroma_stft_std_{i}' for i in range(12)] +
        ['rms_mean', 'melspectrogram_mean', 'melspectrogram_std', 'flatness_mean'] +
        [f'poly_mean_{i}' for i in range(2)] +
        [f'mfcc_mean_{i}' for i in range(40)] +
        [f'mfcc_std_{i}' for i in range(40)] +
        ['energy', 'weapon_type']
    )
    
    df = pd.DataFrame(features_list, columns=columns)
    df.to_csv(output_csv, index=False)
    print(f"✅ Özellik çıkarımı tamamlandı ve '{output_csv}' dosyasına kaydedildi.")

if __name__ == "__main__":
    dataset_dir = "/kaggle/input/gunshot-audio-dataset"
    process_gunshot_dataset(dataset_dir, "gunshot_features.csv")


✅ Özellik çıkarımı tamamlandı ve 'gunshot_features.csv' dosyasına kaydedildi.


In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectKBest, mutual_info_classif, RFE, SelectFromModel, f_classif, VarianceThreshold
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mutual_info_score

# Information Gain (IG)
def information_gain(X, y):
    scores = []
    for i in range(X.shape[1]):
        mutual_info = mutual_info_score(y, X[:, i])
        scores.append(mutual_info)
    return np.array(scores)

# Özellikleri discretize et
def discretize_features(X):
    return np.apply_along_axis(
        lambda col: np.digitize(col, bins=np.histogram_bin_edges(col, bins='auto')), axis=0, arr=X
    )

# Veriyi yükleme
def load_and_preprocess_data(csv_path):
    df = pd.read_csv(csv_path)
    X = df.drop(columns=['weapon_type']).values
    y = df['weapon_type'].values

    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y_encoded, df.drop(columns=['weapon_type']).columns, df['weapon_type']

# Özellik seçim algoritmaları
def feature_selection_algorithms(X, y, feature_names, top_k):
    feature_ranks = {name: 0 for name in feature_names}
    X_binned = discretize_features(X)

    # IG
    ig_scores = information_gain(X_binned, y)
    ig_indices = np.argsort(ig_scores)[-top_k:]
    for idx, fi in enumerate(ig_indices):
        feature_ranks[feature_names[fi]] += (top_k - idx)

    # RFE
    rfe_model = LogisticRegression(max_iter=500, random_state=42)
    rfe_selector = RFE(estimator=rfe_model, n_features_to_select=top_k, step=1)
    rfe_selector.fit(X, y)
    rfe_features = rfe_selector.get_support(indices=True)
    for idx, fi in enumerate(rfe_features):
        feature_ranks[feature_names[fi]] += (top_k - idx)

    # SelectKBest
    skb_selector = SelectKBest(k=top_k)
    skb_selector.fit(X, y)
    skb_features = skb_selector.get_support(indices=True)
    for idx, fi in enumerate(skb_features):
        feature_ranks[feature_names[fi]] += (top_k - idx)

    # ANOVA
    anova_selector = SelectKBest(score_func=f_classif, k=top_k)
    anova_selector.fit(X, y)
    anova_features = anova_selector.get_support(indices=True)
    for idx, fi in enumerate(anova_features):
        feature_ranks[feature_names[fi]] += (top_k - idx)

    # VarianceThreshold
    vt_selector = VarianceThreshold(threshold=0.01)
    vt_selector.fit(X)
    vt_features = set(np.where(vt_selector.get_support())[0])

    # RandomForest
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_model.fit(X, y)
    sfm_selector = SelectFromModel(estimator=rf_model, prefit=True, threshold="mean")
    sfm_features = sfm_selector.get_support(indices=True)
    for idx, fi in enumerate(sfm_features):
        feature_ranks[feature_names[fi]] += (top_k - idx)

    # Sıralama
    sorted_features = sorted(feature_ranks.items(), key=lambda x: x[1], reverse=True)
    top_features = sorted_features[:top_k]
    top_feature_names = [name for name, score in top_features]
    top_feature_scores = [score for name, score in top_features]

    return top_feature_names, top_feature_scores

# Seçilen özellikleri kaydet
def save_selected_features(X, top_feature_names, feature_names, target_series):
    selected_indices = [i for i, name in enumerate(feature_names) if name in top_feature_names]
    X_selected = X[:, selected_indices]
    selected_df = pd.DataFrame(X_selected, columns=top_feature_names)
    selected_df['weapon_type'] = target_series.values
    selected_df.to_csv("selected_features.csv", index=False)
    print(f"✅ Seçilen {len(top_feature_names)} özellik başarıyla kaydedildi.")

# ======================================
if __name__ == "__main__":
    csv_path = "gunshot_features.csv"
    top_k = 100   # 🔥 BURAYI istediğin kadar dinamik değiştir
    X_scaled, y_encoded, feature_names, weapon_types = load_and_preprocess_data(csv_path)

    top_feature_names, top_feature_scores = feature_selection_algorithms(
        X_scaled, y_encoded, feature_names, top_k=top_k
    )

    print(f"\nSeçilen En İyi {top_k} Özellikler:")
#    for name, score in zip(top_feature_names, top_feature_scores):
#        print(f"{name}: {score}")

    save_selected_features(X_scaled, top_feature_names, feature_names, weapon_types)


Seçilen En İyi 100 Özellikler:
✅ Seçilen 100 özellik başarıyla kaydedildi.


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint

# 1. VERİYİ YÜKLEME
csv_path = "/kaggle/working/selected_features.csv"
df = pd.read_csv(csv_path)

# 2. ÖZELLİKLER ve LABEL
features = df.drop(columns=['weapon_type']).values
labels = df['weapon_type'].values

# 3. LABEL ENCODING
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# 4. ONE-HOT ENCODING
one_hot_encoder = OneHotEncoder(sparse_output=False)
labels_one_hot = one_hot_encoder.fit_transform(labels_encoded.reshape(-1, 1))

# 5. TRAIN-TEST SPLIT
X_train, X_test, y_train, y_test = train_test_split(features, labels_one_hot, test_size=0.2, random_state=42)

# 6. STANDARDIZATION
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model

# MODEL
input_dim = X_train.shape[1]
num_classes = y_train.shape[1]

model = Sequential([
    Dense(1000, activation='relu', input_shape=(input_dim,)),
    Dense(750, activation='relu'),
    Dense(500, activation='relu'),
    Dense(250, activation='relu'),
    Dense(100, activation='relu'),
    Dense(50, activation='relu'),
    Dense(num_classes, activation='softmax')
])

# DERLE
model.compile(
    loss='categorical_crossentropy',
    optimizer=Adam(learning_rate=0.001),
    metrics=['accuracy']
)

# CALLBACK
checkpoint = ModelCheckpoint(
    "best_gunshot_cnn.keras", monitor='val_accuracy',
    save_best_only=True, mode='max', verbose=1
)

# EĞİT
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=60,
    batch_size=64,
    verbose=1,
    callbacks=[checkpoint]
)

# TEST
best_model = load_model("best_gunshot_cnn.keras")
test_loss, test_accuracy = best_model.evaluate(X_test, y_test, verbose=0)
print(f"\nPure Dense Model Test Accuracy: {test_accuracy:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1751982925.992888      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Epoch 1/60


I0000 00:00:1751982929.799954     111 service.cc:148] XLA service 0x7be4280121c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1751982929.800434     111 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1751982930.140167     111 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m 1/11[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m39s[0m 4s/step - accuracy: 0.1719 - loss: 2.1790

I0000 00:00:1751982931.246589     111 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step - accuracy: 0.3510 - loss: 1.8770
Epoch 1: val_accuracy improved from -inf to 0.56725, saving model to best_gunshot_cnn.keras
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 266ms/step - accuracy: 0.3604 - loss: 1.8550 - val_accuracy: 0.5673 - val_loss: 1.1510
Epoch 2/60
[1m 1/11[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - accuracy: 0.7344 - loss: 0.9219
Epoch 2: val_accuracy improved from 0.56725 to 0.63743, saving model to best_gunshot_cnn.keras
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.6803 - loss: 0.9120 - val_accuracy: 0.6374 - val_loss: 0.9339
Epoch 3/60
[1m 1/11[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 21ms/step - accuracy: 0.7188 - loss: 0.7011
Epoch 3: val_accuracy improved from 0.63743 to 0.68421, saving model to best_gunshot_cnn.keras
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/s

In [None]:
# 12. EN İYİ MODELİ YÜKLE
best_model = load_model("best_gunshot_cnn.keras")

# 13. TEST ÜZERİNDE DEĞERLENDİR
test_loss, test_accuracy = best_model.evaluate(X_test, y_test, verbose=0)
print(f"\n✅ EBest Model Test Accuracy: {test_accuracy:.4f}")