## Imports

In [None]:
import os
from os import path

import json

import csv

from collections import defaultdict

import re
 
import librosa

import numpy as np

import pandas as pd

import soundfile as sf

from pydub import AudioSegment 

from scipy import signal
from scipy.io import wavfile
import scipy.io.wavfile as io # kodda düzelt
from scipy.ndimage import label

from keras.preprocessing.sequence import pad_sequences

from sklearn.metrics import confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder





2024-11-12 20:39:34.448965: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Dataset

##### We used Cornell Paid bird sound dataset 


##### defining PATHs

In [None]:
dataset_PATH = "./data/cornell"  # Path to the directory containing all MP3 files
wav_files_PATH = "./data/cornell_wav"  # Path to the directory for WAV files converted from MP3
segmented_files_PATH = "./data/segmented_wav"  # Path to the directory for segmented files
mfcc_output_file = "./data/JSON/mfcc_data.json"  # Path for the JSON file storing MFCC data
labeled_mfcc_file = "./data/JSON/labeled_mfcc.json"  # Path for the JSON file with labeled MFCC data
bird_statistics_file = "./data/JSON/bird_statistics.json" # Path for the JSON file with bird_statistics
mapping_csv_path = "./data/CSV/bird-call-mapping.csv"
data_file = "./data/JSON/X_Y_data.json"


### Converting MP3 to Wav

In [None]:
def convert_mp3_to_wav(input_directory, output_directory):
    # Ensure that the output directory exists; create it if it doesn't
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    
    # Process each file in the input directory
    for filename in os.listdir(input_directory):
        if filename.endswith(".mp3"):
            mp3_path = os.path.join(input_directory, filename)  # Path to the current MP3 file
            wav_filename = os.path.splitext(filename)[0] + ".wav"  # Generate the corresponding WAV filename
            wav_path = os.path.join(output_directory, wav_filename)  # Full path for the new WAV file
            
            # Load the MP3 file and export it as a WAV file
            audio = AudioSegment.from_mp3(mp3_path)
            audio.export(wav_path, format="wav")


In [None]:
# Converts all MP3 files to WAV format
convert_mp3_to_wav(dataset_PATH, wav_files_PATH)

## Filtering

##### Birds typically produce sounds within the range of 1kHz to 8kHz, so we applied a bandpass filter to allow only frequencies between 1kHz and 8kHz.


In [None]:
# You can access the documentation using the links provided

def sos_filter(sample, sr, frequency_range=(1000, 8000)):
    # Link to scipy.signal.butter documentation: 
    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.butter.html#scipy.signal.butter

    sos = signal.butter(4, frequency_range, btype='band', output='sos', fs=sr)
    
    # Link to scipy.signal.sosfilt documentation:
    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sosfilt.html#scipy.signal.sosfilt

    return signal.sosfilt(sos, sample)


def ba_filter(sample, sr, frequency_range=(1000, 8000)):
    # Link to scipy.signal.butter documentation: 
    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.butter.html#scipy.signal.butter

    (b, a) = signal.butter(4, frequency_range, btype='band', output='ba', fs=sr)
    
    # Link to scipy.signal.lfilter documentation:
    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.lfilter.html#scipy.signal.lfilter

    return signal.lfilter(b, a, sample)


## Segmenting Signal

##### In this section, we identified the time intervals within the audio signals where bird calls occur.
###### (Article: [https://www.sciencedirect.com/science/article/pii/S0003682X14000024?casa_token=cD-XCEDrfZIAAAAA:_7mb2IOFtkG5pkaW34PUyexd2iGUzvNeIusmmfX3eIiTXdS6ECFLeEhV_ZysSG3LvB9jUd0sF1qH])


In [None]:
def segment_signal(signal, sr, silence_threshold=0.5):
    # The silence_threshold defines the duration (in seconds) of silence needed to consider a bird call as ended
    
    # Segments the signal based on the silence_threshold:
    # if the silence exceeds the threshold, it segments before the silence
    # and then continues searching for the next segment

    silence_threshold = int(silence_threshold * sr)  # Convert threshold from seconds to number of samples
    segments = []
    current_segment = None
    silence_counter = 0

    for i, sample in enumerate(signal):
        if sample > 0:  # Signal detected
            if current_segment is None:  # Start a new segment if not currently in one
                current_segment = [i, None]
            silence_counter = 0  # Reset silence counter during the signal
        elif current_segment is not None:  # Silence detected while in a segment
            silence_counter += 1
            if silence_counter >= silence_threshold:
                current_segment[1] = i - silence_threshold  # Mark the end of the current segment
                segments.append(np.array(current_segment))  # Save the completed segment
                current_segment = None  # Reset for the next potential segment
                silence_counter = 0  # Reset silence counter

    if current_segment is not None:  # Add the final segment if it extends to the end of the signal
        current_segment[1] = len(signal) - 1
        segments.append(np.array(current_segment))

    return np.array(segments)


## Standardizing Segments

##### We adjusted the segments by expanding or shrinking them on both sides to create 3-second regions.


In [None]:
def normalize_segments(segments, sr, target_length=3):
    target_length = int(target_length * sr)  # Convert target length from seconds to sample count

    normalized_segments = []

    for segment in segments:
        start, end = segment
        current_length = end - start + 1

        # If the segment length matches the target length, add it as is
        if current_length == target_length:
            normalized_segments.append((start, end))
        
        # If the segment is shorter than the target, expand from the start and end
        elif current_length < target_length:
            extra_length = target_length - current_length
            expand_start = extra_length // 2
            expand_end = extra_length - expand_start
            new_start = start - expand_start
            new_end = end + expand_end
            normalized_segments.append((new_start, new_end))

        # If the segment is longer than the target, trim from the start and end
        else:
            extra_length = current_length - target_length
            trim_start = extra_length // 2
            trim_end = extra_length - trim_start
            new_start = start + trim_start
            new_end = end - trim_end
            normalized_segments.append((new_start, new_end))

    return np.array(normalized_segments)


## Saving Segments

#### We extracted the identified segments from the audio file and saved them as 3-second WAV files.


In [None]:
def save_segments_as_wav(signal, sr, output_directory, segments):

    # Ensure the output directory exists; create it if necessary
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    
    # Save each segment as an individual .wav file
    for i, (start, end) in enumerate(segments):
        segment_signal = signal[start:end + 1]  # Extract the segment from the signal
        segment_path = f"{output_directory}/segment_{i + 1}.wav"  # Define the file path
        sf.write(segment_path, segment_signal, sr)  # Save the segment as a .wav file


## Complete Process in a Single Function

#### We loaded the given audio file, applied the specified filter, and performed activity detection using the Hilbert follower method. Then, we identified time intervals using `segment_signal` and `normalize_segments`, and finally saved the audio segments with `save_segments_as_wav`.


In [None]:
def segmentate(output_directory, sample, sr, theta=0.28, silence_threshold=0.5, target_length=3, filter=sos_filter):

    sample = filter(sample, sr)  # Apply the specified filter to the sample

    # Perform Hilbert transform for activity detection
    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.hilbert.html#scipy.signal.hilbert
    h_sample = signal.hilbert(sample)
    h_conj = np.conjugate(h_sample)
    y_sample = np.sqrt(h_sample * h_conj)  # Compute the amplitude envelope

    # Detect activity based on threshold
    activity = y_sample > theta
    segments = segment_signal(activity, sr, silence_threshold=silence_threshold)  # Segment based on activity
    std_segments = normalize_segments(segments, sr, target_length=target_length)  # Standardize segment lengths

    # Save the standardized segments as WAV files
    save_segments_as_wav(sample, sr, output_directory, std_segments)


## Apply to All Files

#### We applied the same process to all audio files in the specified directory.


In [None]:
def segmentate_all(input_directory, output_directory, theta=0.28, silence_threshold=0.5, target_length=3):
    # Ensure that the output directory exists; create it if it doesn’t
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    
    # Process each WAV file in the input directory
    for filename in os.listdir(input_directory):
        if filename.endswith(".wav"):
            wav_path = os.path.join(input_directory, filename)  # Path to the current WAV file
            seg_output_directory = os.path.splitext(filename)[0]  # Directory name for segmented output
            seg_output_path = os.path.join(output_directory, seg_output_directory)  # Full path for output segments
            
            sample, sr = librosa.load(wav_path, mono=True, sr=32000)  # Load the WAV file with specified sample rate
            segmentate(seg_output_path, sample, sr, theta, silence_threshold, target_length)  # Segment and save


In [None]:
# Apply segmentation to all WAV files in the specified directory
segmentate_all(wav_files_PATH, segmented_files_PATH)


## Feature Extraction

##### We extracted Mel-Frequency Cepstral Coefficients (MFCC) features for the 3-second bird call segments we obtained.


In [None]:
def segments_to_mfcc_data(input_directory, output_file):

    all_mfcc_data = []

    # Iterate over each directory in the main directory
    for dir_name in os.listdir(input_directory):
        dir_path = os.path.join(input_directory, dir_name)
        
        # Check if the path is a directory
        if os.path.isdir(dir_path):
            # Create a list to store MFCC data for all segments in this directory
            mfcc_list = []
            
            # Process each "segment_i.wav" file in the directory
            for file_name in os.listdir(dir_path):
                if file_name.endswith(".wav"):
                    file_path = os.path.join(dir_path, file_name)
                    
                    # Extract the segment number from the file name (e.g., "segment_3.wav" -> 3)
                    match = re.search(r"segment_(\d+)", file_name)
                    if match:
                        segment_number = int(match.group(1))  # Get segment number as an integer

                        # Load the audio file and extract MFCC features
                        y, sr = librosa.load(file_path, mono=True, sr=16000)  # 94 x 16 matrix
                        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=16, hop_length=512)
                        mfcc_data = mfcc.tolist()
                        
                        # Append the MFCC data and segment number to the list
                        mfcc_list.append({
                            "segment": segment_number,
                            "mfcc": mfcc_data
                        })
            
            # Append all MFCC data for this directory as a JSON object
            all_mfcc_data.append({
                "dir_name": dir_name,
                "mfcc": mfcc_list
            })

    # Write all data to a single JSON file
    with open(output_file, 'w') as f:
        json.dump(all_mfcc_data, f)


In [None]:
# Extract MFCC features for all segments in the specified directory and save to a JSON file
segments_to_mfcc_data(segmented_files_PATH, mfcc_output_file)


## JSON to Pandas DataFrame

#### We loaded the MFCC data from the `mfcc_output_file`, which contains the MFCC features extracted for each audio segment. The JSON file is structured by directories, with each directory containing multiple segments and their respective MFCC data. 

#### The goal is to convert this JSON structure into a Pandas DataFrame to facilitate further analysis and data manipulation. We looped through each directory and segment in the JSON data, creating a structured DataFrame with columns for the directory name (`file_name`), segment number (`segment`), and MFCC features (`mfcc`). This format provides a tabular representation of the MFCC data, making it easier to analyze, visualize, and apply further processing if needed.


In [None]:
# Load the JSON file containing all MFCC data
with open(mfcc_output_file, "r") as f:
    data = json.load(f)

# Initialize an empty list to store rows for the DataFrame
rows = []

# Traverse the JSON structure to prepare data in a format suitable for a DataFrame
for directory in data:
    dir_name = directory["dir_name"]
    for segment in directory["mfcc"]:
        # Add dir_name, segment number, and MFCC data for each segment
        rows.append({
            "file_name": dir_name,         # Folder name
            "segment": segment["segment"], # Segment number
            "mfcc": segment["mfcc"]        # MFCC data
        })

# Create the DataFrame
sounds_mfcc_data = pd.DataFrame(rows)
sounds_mfcc_data.columns = ["file_name", "segment", "mfcc"]

# Display the DataFrame
sounds_mfcc_data


Unnamed: 0,file_name,segment,mfcc
0,Short-billed Gull 01 Long calls US-AK,4,"[[-683.0958251953125, -666.9935913085938, -664..."
1,Short-billed Gull 01 Long calls US-AK,7,"[[-387.50823974609375, -255.884521484375, -237..."
2,Short-billed Gull 01 Long calls US-AK,6,"[[-354.79266357421875, -241.4017791748047, -21..."
3,Short-billed Gull 01 Long calls US-AK,3,"[[-199.59829711914062, -214.4839630126953, -28..."
4,Short-billed Gull 01 Long calls US-AK,2,"[[-231.86581420898438, -269.2934875488281, -28..."
...,...,...,...
31509,Pacific Wren 05 Calls (pacificus Group) US-CA,11,"[[-561.4692993164062, -552.6324462890625, -561..."
31510,Common Grackle 06 Calls US-NY,4,"[[-624.1295166015625, -594.8052978515625, -596..."
31511,Common Grackle 06 Calls US-NY,3,"[[-649.4216918945312, -631.2686767578125, -637..."
31512,Common Grackle 06 Calls US-NY,2,"[[-682.742919921875, -659.6217041015625, -676...."


## Extracting Metadata from File Names

#### We extracted metadata information embedded in the file names to use as additional data attributes. This involves parsing file names to obtain details such as segment numbers, recording dates, or other identifiers encoded in the naming convention. This extracted metadata can then be used in the analysis as separate features in the DataFrame, providing richer context for each segment.


In [None]:
def load_call_type_mapping(mapping_csv_path):
    """
    Loads call type mappings from a CSV file.
    """
    mapping = {}
    try:
        with open(mapping_csv_path, mode='r', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                call_type = row['Call_Type'].strip()
                closest_call_type = row['Closest_Call_Type'].strip()
                mapping[call_type] = closest_call_type
        print(f"Call type mappings successfully loaded from '{mapping_csv_path}'.")
    except FileNotFoundError:
        print(f"Mapping CSV file not found: {mapping_csv_path}")
    except Exception as e:
        print(f"An error occurred while loading the mapping CSV file: {e}")
    return mapping

def generate_bird_statistics(directory_path, mapping_csv_path, output_filename="bird_statistics.json"):
    # Load call type mappings
    call_type_mapping = load_call_type_mapping(mapping_csv_path)
    
    # Dictionaries to collect statistics
    species_count = defaultdict(int)        # Total file count for each species
    species_type_count = defaultdict(int)   # Total file count for each type (Calls, Song, etc.)
    species_type_details = defaultdict(lambda: defaultdict(list))  # File names by species and type
    total_files = 0  # Total file count

    # Regular expression pattern: <name> <number> <type> <field code>
    pattern = re.compile(r"^(.+?)\s+(\d+)\s+(.+?)(?:\s+([\w-]+))?\.mp3$", re.IGNORECASE)

    try:
        # Find and process all .mp3 files in the directory
        for file_name in os.listdir(directory_path):
            if file_name.lower().endswith(".mp3"):
                match = pattern.match(file_name)
                if match:
                    total_files += 1
                    species_name = match.group(1).strip()  # Name
                    original_species_type = match.group(3).strip()  # Original Type (Calls, Song, etc.)
                    
                    # Get call type from CSV mapping file
                    mapped_species_type = call_type_mapping.get(original_species_type, "Other")
                    
                    # Update counts by species and type
                    species_count[species_name] += 1
                    species_type_count[mapped_species_type] += 1
                    species_type_details[species_name][mapped_species_type].append(file_name)
                else:
                    print(f"File does not match the regular expression pattern: {file_name}")
    except FileNotFoundError:
        print(f"Directory not found: {directory_path}")
        return
    except Exception as e:
        print(f"An error occurred: {e}")
        return

    # Calculate the total number of species and types
    total_species_names = len(species_count)
    total_species_types = len(species_type_count)

    # Calculate the average file count per species
    average_files_per_species = total_files / total_species_names if total_species_names > 0 else 0

    # Create JSON structure
    json_data = {
        "total_files": total_files,
        "total_species": total_species_names,
        "total_types": total_species_types,
        "types": dict(species_type_count),  # File counts for each type
        "species": {}
    }

    for species, count in species_count.items():
        json_data["species"][species] = {
            "total_files": count,
            "types": {}
        }
        for species_type, files in species_type_details[species].items():
            json_data["species"][species]["types"][species_type] = {
                "count": len(files),
                "files": files
            }

    # Save JSON file
    output_path = os.path.join(directory_path, output_filename)
    try:
        with open(output_path, "w", encoding="utf-8") as json_file:
            json.dump(json_data, json_file, ensure_ascii=False, indent=4)
        print(f"Statistics saved to '{output_path}'.")
    except Exception as e:
        print(f"Error writing to JSON file: {e}")


In [None]:
generate_bird_statistics(dataset_PATH, mapping_csv_path)

In [None]:
# Load the JSON file
with open(bird_statistics_file, "r") as f:
    data = json.load(f)

# Initialize an empty list for storing rows for the DataFrame
rows = []

# Iterate over each species and add the data
for species_name, species_info in data["species"].items():
    species = species_name  # Name of the bird species
    for sound_type, type_info in species_info["types"].items():
        for file_name in type_info["files"]:
            # Add a row for each file
            rows.append({
                "species": species,                # Bird species name
                "sound_type": sound_type,          # Type of sound
                "file_name": file_name.split('.mp3')[0]  # Extract file name without extension
            })

# Create the DataFrame
meta_data_sounds = pd.DataFrame(rows)

# Ensure that "file_name" only contains the base name without extension
meta_data_sounds["file_name"] = meta_data_sounds["file_name"].apply(lambda x: x.split('.mp3')[0])

# Display the DataFrame
meta_data_sounds


Unnamed: 0,species,sound_type,file_name
0,Abert's Towhee,Song,Abert's Towhee 01 Song US-AZ
1,Abert's Towhee,Song,Abert's Towhee 02 Song US-AZ
2,Abert's Towhee,Song,Abert's Towhee 03 Song US-AZ
3,Abert's Towhee,Song,Abert's Towhee 04 Song US-AZ
4,Abert's Towhee,Calls,Abert's Towhee 05 Calls US-AZ
...,...,...,...
4949,Zenaida Dove,Song,Zenaida Dove 01 Song AI
4950,Zenaida Dove,Drum,Zenaida Dove 02 Wing sound DO-10
4951,Zone-tailed Hawk,Calls,Zone-tailed Hawk 01 Calls US-AZ
4952,Zone-tailed Hawk,Calls,Zone-tailed Hawk 02 Calls US-AZ


In [None]:
# Merge the MFCC data with metadata based on the 'file_name' column
mfcc_with_metadata = pd.merge(sounds_mfcc_data, meta_data_sounds, on='file_name', how='inner')

# Select only the relevant columns: MFCC features, species, and sound type
mfcc_with_label = mfcc_with_metadata[["mfcc", "species", "sound_type"]]

# Display the resulting DataFrame
mfcc_with_label


In [None]:
# Convert the DataFrame to JSON format with each row as a separate record
json_data = mfcc_with_label.to_json(orient="records")

# Save the JSON data to a file
with open(labeled_mfcc_file, 'w') as json_file:
    json.dump(json.loads(json_data), json_file, indent=2)  # Format the JSON with indentation for readability


In [None]:
# Load the JSON file back into a DataFrame
mfcc_with_label = pd.read_json(labeled_mfcc_file, orient="records")


In [None]:
# Group the data by species and count the number of entries for each species, then sort by the MFCC count in descending order
sorted_species = mfcc_with_label.groupby("species").count().sort_values(by="mfcc", ascending=False)

# Filter the species that have more than 100 MFCC records
filtered_species = sorted_species[sorted_species["mfcc"] > 100]

# Extract the list of species names that meet the filter criteria
species_to_extract = filtered_species.index.tolist()

# Filter the original DataFrame to include only the rows with species in the filtered list
mfcc_with_label_filtered = mfcc_with_label[mfcc_with_label["species"].isin(species_to_extract)]

# Display the resulting filtered DataFrame
mfcc_with_label_filtered


Unnamed: 0,mfcc,species,sound_type
9,"[[-537.6096191406, -493.4859619141, -474.05200...",Fox Sparrow,Song
10,"[[-553.4559936523, -527.3350830078, -529.01446...",Fox Sparrow,Song
11,"[[-578.8944091797, -550.6073608398, -545.75347...",Fox Sparrow,Song
12,"[[-548.2173461914, -527.176940918, -523.036621...",Fox Sparrow,Song
13,"[[-566.0667724609, -536.0628662109, -529.69946...",Fox Sparrow,Song
...,...,...,...
31381,"[[-388.7108459473, -414.0784606934, -476.01495...",House Wren,Song
31382,"[[-345.2535095215, -360.5129699707, -386.92645...",House Wren,Song
31383,"[[-429.8677368164, -383.9814758301, -391.42626...",House Wren,Song
31384,"[[-362.9503479004, -386.7874145508, -480.89138...",House Wren,Song


In [None]:
# Select the 'mfcc' and 'species' columns for feature matrix (X) and labels (y_labels)
X = mfcc_with_label_filtered['mfcc'].tolist()  # X is the feature matrix consisting of MFCC values
y_labels = mfcc_with_label_filtered['species']  # y_labels is the species column

# Encode the species column as numeric labels
label_encoder = LabelEncoder()
Y = label_encoder.fit_transform(y_labels)  # Y is the encoded labels array

# Create a list of original species names corresponding to each numeric label
class_names = label_encoder.classes_

class_names


array(['American Robin', 'Blue Jay', 'Blue-gray Gnatcatcher',
       'Cackling Goose', 'Dark-eyed Junco', 'Evening Grosbeak',
       'Fox Sparrow', 'Gray Catbird', 'House Sparrow', 'House Wren',
       "Hutton's Vireo", 'Long-billed Thrasher', 'Marsh Wren',
       'Purple Finch', 'Purple Martin', 'Red Crossbill', 'Red-eyed Vireo',
       'Red-winged Blackbird', 'Song Sparrow', 'Summer Tanager',
       'White-breasted Nuthatch', 'Willow Flycatcher', 'Wood Duck'],
      dtype=object)

In [None]:
# Step 2: Adjust the Shape of X and Normalize the Data
mfcc_arrays = [np.array(mfcc) for mfcc in X]  # Convert each MFCC list to a NumPy array
common_shape = 94  # Set a fixed number of columns

# Pad or truncate each MFCC array to ensure consistent dimensions
X = [
    np.pad(arr, ((0, 0), (0, max(0, common_shape - arr.shape[1]))), mode='constant')[:, :common_shape] 
    for arr in mfcc_arrays
]


## Saving Data

#### We save the X and Y lists as JSON files for future use.


In [None]:
# Create a DataFrame from X and Y lists
XY = pd.DataFrame({"X": X, "Y": Y})

# Save the DataFrame as a JSON file for future use
XY.to_json(data_file, orient="records", indent=2)

#### NOTE: If you’d like to run the code, we can also provide the `data_file` JSON separately.

In [None]:
# Load the JSON file back into a DataFrame
XY = pd.read_json(data_file, orient="records")

# Extract X and Y lists from the loaded DataFrame
X, Y = XY["X"].to_list(), XY["Y"].to_list()

In [14]:
# Initialize the scaler and normalize each MFCC array
scaler = StandardScaler()
X_normalized = [scaler.fit_transform(mfcc) for mfcc in X]

# Pad or truncate each array to a fixed time-step length
X_normalized = pad_sequences(X_normalized, maxlen=100, dtype="float32", padding="post", truncating="post")

# Add an extra dimension for the channel (needed for many neural network input shapes)
X_normalized = np.expand_dims(X_normalized, -1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_normalized, Y, test_size=0.2, random_state=42)


## Training the Model

#### We proceed to train a model using the prepared and normalized MFCC features (`X`) and the encoded labels (`Y`). After splitting the data into training and test sets, the model will be trained on the training set and evaluated on the test set to assess its performance.


In [15]:
# Step 2: Flatten the Data
# Reshape the training data to 2D by flattening each sample
X_train_flat = X_train.reshape(X_train.shape[0], -1)

# Reshape the test data to 2D by flattening each sample
X_test_flat = X_test.reshape(X_test.shape[0], -1)


#### without class_weight="balanced" and n_estimators=100

In [16]:
# Model 1: Train a Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)  # Initialize the model with 100 trees
rf_model.fit(X_train_flat, y_train)  # Train the model on the flattened training data

# Make predictions on the flattened test data
rf_predictions = rf_model.predict(X_test_flat)

# Calculate and display the accuracy of the model
rf_accuracy = accuracy_score(y_test, rf_predictions)
print(f"Random Forest Accuracy: {rf_accuracy:.4f}")


Random Forest Accuracy: 0.8025


#### with class_weight="balanced" and n_estimators=200

In [17]:
# Model 2: Train a Random Forest Model with Balanced Class Weights
rf_model = RandomForestClassifier(n_estimators=200, random_state=42, class_weight="balanced")  # Set balanced class weights
rf_model.fit(X_train_flat, y_train)  # Train the model on the flattened training data

# Make predictions on the flattened test data
rf_predictions = rf_model.predict(X_test_flat)

# Calculate and display the accuracy of the model
rf_accuracy = accuracy_score(y_test, rf_predictions)
print(f"Random Forest Accuracy: {rf_accuracy:.4f}")


Random Forest Accuracy: 0.8219


#### K-Nearest Neighbors Model with n_neighbors=5

In [18]:
# Model 3: K-Nearest Neighbors Model
knn_model = KNeighborsClassifier(n_neighbors=5)  # Initialize the KNN model with 5 neighbors
knn_model.fit(X_train_flat, y_train)  # Train the model on the flattened training data

# Make predictions on the flattened test data
knn_predictions = knn_model.predict(X_test_flat)

# Calculate and display the accuracy of the model
knn_accuracy = accuracy_score(y_test, knn_predictions)
print(f"K-Nearest Neighbors Accuracy: {knn_accuracy:.4f}")


K-Nearest Neighbors Accuracy: 0.6437


#### Support Vector Machine (SVM) Model with linear kernel

In [19]:
# Model 4: Support Vector Machine (SVM) Model
svm_model = SVC(kernel='linear')  # Initialize the SVM model with a linear kernel
svm_model.fit(X_train_flat, y_train)  # Train the model on the flattened training data

# Make predictions on the flattened test data
svm_predictions = svm_model.predict(X_test_flat)

# Calculate and display the accuracy of the model
svm_accuracy = accuracy_score(y_test, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")


SVM Accuracy: 0.7707


In [None]:
def calculate_class_metrics(y_true, predictions):
    """
    Calculates class-based accuracy, precision, recall, and F1 scores for true labels and predictions.

    Args:
    y_true (list or array): True labels.
    predictions (list or array): Predictions made by the model.

    Returns:
    DataFrame: A DataFrame containing accuracy, precision, recall, and F1 scores for each class.
    """

    # Confusion matrix and class-based metrics
    conf_matrix = confusion_matrix(y_true, predictions)
    class_report = classification_report(y_true, predictions, target_names=class_names, output_dict=True)
    class_metrics = pd.DataFrame(class_report).transpose()
    class_metrics = class_metrics.iloc[:-3, :3]

    accuracies = []
    for i in range(len(class_names)):
        # Calculate true positive, false positive, false negative, and true negative for each class
        tp = conf_matrix[i, i]
        fn = conf_matrix[i, :].sum() - tp
        fp = conf_matrix[:, i].sum() - tp
        tn = conf_matrix.sum() - (tp + fp + fn)
        accuracy = (tp + tn) / (tp + tn + fp + fn)
        accuracies.append(accuracy)
    
    # Add accuracy values to the DataFrame
    class_metrics['accuracy'] = accuracies

    # Return a DataFrame with accuracy, precision, recall, and F1
    return class_metrics


In [53]:
calculate_class_metrics(y_test,rf_predictions)

Unnamed: 0,precision,recall,f1-score,accuracy
American Robin,0.866667,0.928571,0.896552,0.989418
Blue Jay,1.0,0.785714,0.88,0.989418
Blue-gray Gnatcatcher,0.842105,0.941176,0.888889,0.992945
Cackling Goose,0.863636,0.95,0.904762,0.992945
Dark-eyed Junco,0.903226,0.965517,0.933333,0.992945
Evening Grosbeak,0.888889,1.0,0.941176,0.994709
Fox Sparrow,0.92,0.821429,0.867925,0.987654
Gray Catbird,1.0,0.62963,0.772727,0.982363
House Sparrow,0.785714,0.52381,0.628571,0.977072
House Wren,0.677419,0.677419,0.677419,0.964727


In [58]:
calculate_class_metrics(y_test,knn_predictions)

Unnamed: 0,precision,recall,f1-score,accuracy
American Robin,0.666667,0.571429,0.615385,0.964727
Blue Jay,0.85,0.607143,0.708333,0.975309
Blue-gray Gnatcatcher,0.555556,0.588235,0.571429,0.973545
Cackling Goose,0.933333,0.7,0.8,0.987654
Dark-eyed Junco,0.574468,0.931034,0.710526,0.961199
Evening Grosbeak,0.431373,0.916667,0.586667,0.945326
Fox Sparrow,0.611111,0.785714,0.6875,0.964727
Gray Catbird,0.692308,0.333333,0.45,0.961199
House Sparrow,0.909091,0.47619,0.625,0.978836
House Wren,0.736842,0.451613,0.56,0.961199


In [59]:
calculate_class_metrics(y_test,svm_predictions)

Unnamed: 0,precision,recall,f1-score,accuracy
American Robin,0.735294,0.892857,0.806452,0.978836
Blue Jay,0.791667,0.678571,0.730769,0.975309
Blue-gray Gnatcatcher,1.0,0.882353,0.9375,0.996473
Cackling Goose,1.0,0.95,0.974359,0.998236
Dark-eyed Junco,0.692308,0.931034,0.794118,0.975309
Evening Grosbeak,0.92,0.958333,0.938776,0.994709
Fox Sparrow,0.888889,0.857143,0.872727,0.987654
Gray Catbird,0.782609,0.666667,0.72,0.975309
House Sparrow,0.866667,0.619048,0.722222,0.982363
House Wren,0.612903,0.612903,0.612903,0.957672
