<a href="https://colab.research.google.com/github/sayandeepmaity/vseven_lumi/blob/main/standardized.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, LSTM, Concatenate
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.image import load_img, img_to_array


In [12]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load the CSV data
csv_file = "/content/drive/MyDrive/sevenlumi_data/features.csv"
df = pd.read_csv(csv_file)

# Check non-numeric columns
non_numeric_columns = df.select_dtypes(exclude=[np.number]).columns.tolist()
print(f"Non-numeric columns: {non_numeric_columns}")

# Encode categorical columns (Gun_Type, Mic) using LabelEncoder
encoder = LabelEncoder()

# Encoding 'Gun_Type' and 'Mic' columns
df['Gun_Type'] = encoder.fit_transform(df['Gun_Type'])
df['Mic'] = encoder.fit_transform(df['Mic'])

# Convert 'Distance' to numeric if it's in string format
df['Distance'] = pd.to_numeric(df['Distance'], errors='coerce')  # Handle errors gracefully by setting invalid parsing as NaN

# Now let's check the transformed data
print(f"Data types after conversion:\n{df.dtypes}")

# Extract features (excluding 'Gunshot', 'Gun_Type', 'Distance', 'X', 'Y', 'Z')
X_csv = df.drop(columns=['Gunshot', 'Gun_Type', 'Distance', 'X', 'Y', 'Z']).values

# Normalize CSV features
scaler = StandardScaler()
X_csv = scaler.fit_transform(X_csv)

# Check the final data shape and types
print(f"Shape of CSV data after transformation: {X_csv.shape}")

# Convert the normalized features back into a DataFrame
normalized_df = pd.DataFrame(X_csv, columns=df.drop(columns=['Gunshot', 'Gun_Type', 'Distance', 'X', 'Y', 'Z']).columns)

# Add the 'Gunshot' column back to the DataFrame (since it was excluded earlier)
normalized_df['Gunshot'] = df['Gunshot']

# Save the standardized data into a new CSV file
output_csv_file = "/content/drive/MyDrive/sevenlumi_data/standardized_features.csv"  # Define output path for the standardized CSV
normalized_df.to_csv(output_csv_file, index=False)

print(f"Standardized CSV saved to: {output_csv_file}")


Non-numeric columns: ['Gun_Type', 'Distance', 'Mic']
Data types after conversion:
Zero_Crossing_Rate         float64
Short_Time_Energy          float64
RMS_Energy                 float64
Spectral_Centroid          float64
Spectral_RollOff           float64
Spectral_Flux              float64
MFCC_1                     float64
MFCC_2                     float64
MFCC_3                     float64
MFCC_4                     float64
MFCC_5                     float64
Chroma_1                   float64
Chroma_2                   float64
Chroma_3                   float64
Chroma_4                   float64
Peak_Amplitude             float64
Gunshot                      int64
Gun_Type                     int64
Distance                   float64
X                            int64
Y                            int64
Z                            int64
Mic                          int64
Arrival_Time_Difference    float64
dtype: object
Shape of CSV data after transformation: (4880, 18)
Standardized 

In [None]:
import os
from tensorflow.keras.preprocessing.image import load_img, img_to_array, save_img
import numpy as np
from tqdm import tqdm  # Import tqdm for progress bar

# Define directories and parameters
image_dir = "/content/drive/MyDrive/sevenlumi_data/feature_images"
output_dir = "/content/drive/MyDrive/sevenlumi_data/standardized"  # New folder for saving images
image_size = (370, 370)  # Resize images to 370x370

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Initialize lists to store image data
X_images = []
image_paths = []  # List to store image paths

# Gather all image paths first so tqdm knows the total count
all_image_paths = []
for folder in os.listdir(image_dir):
    if os.path.isdir(os.path.join(image_dir, folder)):  # For each gun type folder
        for subfolder in os.listdir(os.path.join(image_dir, folder)):
            if os.path.isdir(os.path.join(image_dir, folder, subfolder)):
                for subsubfolder in os.listdir(os.path.join(image_dir, folder, subfolder)):
                    if os.path.isdir(os.path.join(image_dir, folder, subfolder, subsubfolder)):
                        for mic_folder in os.listdir(os.path.join(image_dir, folder, subfolder, subsubfolder)):
                            mic_path = os.path.join(image_dir, folder, subfolder, subsubfolder, mic_folder)
                            for file in os.listdir(mic_path):
                                if file.endswith(".png"):
                                    # Add the image path to the list
                                    all_image_paths.append(os.path.join(mic_path, file))

# Use tqdm to track the progress of image loading and standardization
for image_path in tqdm(all_image_paths, desc="Standardizing and Saving Images", unit="image"):
    # Load the image and resize to target size
    img = load_img(image_path, target_size=image_size)

    # Convert the image to an array and normalize the pixel values
    img_array = img_to_array(img) / 255.0  # Normalize to [0, 1]

    # Append the image array to the list
    X_images.append(img_array)

    # Save the standardized image to the 'standardized' folder
    # Create the output file path (keep the same folder structure inside the output directory)
    relative_path = os.path.relpath(image_path, image_dir)  # Get the relative path of the image
    output_image_path = os.path.join(output_dir, relative_path)
    os.makedirs(os.path.dirname(output_image_path), exist_ok=True)  # Create subdirectories if needed

    # Save the image
    save_img(output_image_path, img_array)

# Convert list of images to numpy array (if needed)
X_images = np.array(X_images)

# Check the shape of the image data
print(f"Shape of Image Data: {X_images.shape}")


Standardizing and Saving Images: 100%|██████████| 4880/4880 [28:16<00:00,  2.88image/s]


In [None]:
import os
from tensorflow.keras.preprocessing.image import load_img, img_to_array, save_img
import numpy as np
from tqdm import tqdm  # Import tqdm for progress bar
import json  # Import json for saving and loading the checkpoint

# Define directories and parameters
image_dir = "/content/drive/MyDrive/sevenlumi_data/feature_images"
output_dir = "/content/drive/MyDrive/sevenlumi_data/standardized"  # New folder for saving images
checkpoint_file = "/content/drive/MyDrive/sevenlumi_data/processed_images_checkpoint.json"  # File to save processed images
image_size = (370, 370)  # Resize images to 370x370

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Initialize lists to store image data
X_images = []
image_paths = []  # List to store image paths

# Gather all image paths first so tqdm knows the total count
all_image_paths = []
for folder in os.listdir(image_dir):
    if os.path.isdir(os.path.join(image_dir, folder)):  # For each gun type folder
        for subfolder in os.listdir(os.path.join(image_dir, folder)):
            if os.path.isdir(os.path.join(image_dir, folder, subfolder)):
                for subsubfolder in os.listdir(os.path.join(image_dir, folder, subfolder)):
                    if os.path.isdir(os.path.join(image_dir, folder, subfolder, subsubfolder)):
                        for mic_folder in os.listdir(os.path.join(image_dir, folder, subfolder, subsubfolder)):
                            mic_path = os.path.join(image_dir, folder, subfolder, subsubfolder, mic_folder)
                            for file in os.listdir(mic_path):
                                if file.endswith(".png"):
                                    # Add the image path to the list
                                    all_image_paths.append(os.path.join(mic_path, file))

# Load checkpoint if it exists
if os.path.exists(checkpoint_file):
    with open(checkpoint_file, "r") as f:
        processed_images = json.load(f)  # Load the processed image paths from the checkpoint file
else:
    processed_images = []

# Use tqdm to track the progress of image loading and standardization
for image_path in tqdm(all_image_paths, desc="Standardizing and Saving Images", unit="image"):
    # Skip already processed images
    if image_path in processed_images:
        continue

    # Load the image and resize to target size
    img = load_img(image_path, target_size=image_size)

    # Convert the image to an array and normalize the pixel values
    img_array = img_to_array(img) / 255.0  # Normalize to [0, 1]

    # Append the image array to the list
    X_images.append(img_array)

    # Save the standardized image to the 'standardized' folder
    # Create the output file path (keep the same folder structure inside the output directory)
    relative_path = os.path.relpath(image_path, image_dir)  # Get the relative path of the image
    output_image_path = os.path.join(output_dir, relative_path)
    os.makedirs(os.path.dirname(output_image_path), exist_ok=True)  # Create subdirectories if needed

    # Save the image
    save_img(output_image_path, img_array)

    # Mark this image as processed
    processed_images.append(image_path)

    # Save the checkpoint to continue from where it left off
    with open(checkpoint_file, "w") as f:
        json.dump(processed_images, f)

# Convert list of images to numpy array (if needed)
X_images = np.array(X_images)

# Check the shape of the image data
print(f"Shape of Image Data: {X_images.shape}")


Standardizing and Saving Images: 100%|██████████| 4880/4880 [04:28<00:00, 18.18image/s]
