# Chest X-Ray Model 1

Date: 8/18/2024

Author: Sylas Chacko

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import os
from pathlib import Path


## Data Preprocessing Labels

In [1]:
def list_files_in_folder(folder_path, output_csv):
    # Get a list of all files in the specified folder
    file_names = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    
    # Write the file names to a CSV file
    with open(output_csv, mode='w', newline='') as file:
        writer = csv.writer(file)
        # Write header
        writer.writerow(['File_Name'])
        # Write file names
        for file_name in file_names:
            writer.writerow([file_name])

# Example usage
folder_path = r'C:\Users\sylas\OneDrive\Projects\medical_images\data\sample\s_train_pipeline\s_train_final'  
output_csv = 'file_names.csv'  
list_files_in_folder(folder_path, output_csv)


In [6]:
# Define paths to the CSV files and directory
labels_file_path = r'C:\Users\sylas\OneDrive\Projects\medical_images\data\sample_labels.csv'
file_names_file = r'C:\Users\sylas\OneDrive\Projects\medical_images\file_names.csv'
output_csv = r'C:\Users\sylas\OneDrive\Projects\medical_images\Image_Labels.csv'  

# Read the CSV files
labels_df = pd.read_csv(labels_file_path)
file_names_df = pd.read_csv(file_names_file)

# Extract the file names from file_names.csv and truncate to the first 12 characters
file_names = [f[:12] for f in file_names_df['File_Name'].tolist()]

# Truncate the 'Image Index' in sample_labels.csv to the first 12 characters
labels_df['Truncated_Image_Index'] = labels_df['Image Index'].str[:12]

# Initialize a list to store the matched data
data = []

# Loop through each row in the labels DataFrame
for _, row in labels_df.iterrows():
    truncated_file_name = row['Truncated_Image_Index']
    disease = row['Finding Labels']
    
    # Check if the truncated file name exists in the list of file names from file_names.csv
    if truncated_file_name in file_names:
        # Find all matching augmented versions of the file
        matching_files = [f for f in file_names_df['File_Name'] if f.startswith(truncated_file_name)]
        # Append each augmented version with its disease label
        for match in matching_files:
            data.append({'File_Name': match, 'Disease': disease})

# Check if data was found and create a DataFrame
if data:
    df = pd.DataFrame(data)
    df.to_csv(output_csv, index=False)
    print('Image Labels CSV created at:', output_csv)
else:
    print('No matching data processed. Check file paths and file availability.')


Image Labels CSV created at: C:\Users\sylas\OneDrive\Projects\medical_images\Image_Labels.csv
