In [34]:
import pandas as pd

# read the TSV file into a DataFrame
df = pd.read_csv('task_damage_train.tsv', sep='\t')

# display the first few rows of the DataFrame
print(df.head())


             Event_name              Image_id  \
0      hurricane_harvey  905960092822003712_0   
1  california_wildfires  918008272363368448_0   
2        hurricane_irma  909396901254090752_0   
3       hurricane_maria  912097936200355841_0   
4       hurricane_maria  922610290281402368_1   

                                               Image                Label  
0  data_image/hurricane_harvey/8_9_2017/905960092...        severe_damage  
1  data_image/california_wildfires/11_10_2017/918...        severe_damage  
2  data_image/hurricane_irma/17_9_2017/9093969012...        severe_damage  
3  data_image/hurricane_maria/24_9_2017/912097936...        severe_damage  
4  data_image/hurricane_maria/23_10_2017/92261029...  little_or_no_damage  


In [35]:
# read the TSV file into a DataFrame
ds = pd.read_csv('task_damage_test.tsv', sep='\t')

# display the first few rows of the DataFrame
print(ds.head())

             Event_name              Image_id  \
0       hurricane_maria  912065374929264640_1   
1      hurricane_harvey  905930890735439873_1   
2        hurricane_irma  910225185176997895_0   
3  california_wildfires  917804456422473734_0   
4       hurricane_maria  913116029156339713_0   

                                               Image                Label  
0  data_image/hurricane_maria/24_9_2017/912065374...  little_or_no_damage  
1  data_image/hurricane_harvey/7_9_2017/905930890...          mild_damage  
2  data_image/hurricane_irma/19_9_2017/9102251851...        severe_damage  
3  data_image/california_wildfires/10_10_2017/917...        severe_damage  
4  data_image/hurricane_maria/27_9_2017/913116029...        severe_damage  


In [36]:
df = pd.concat([df,ds])
df.count()

Event_name    2997
Image_id      2997
Image         2997
Label         2997
dtype: int64

In [37]:
df = df.drop(['Event_name', 'Image_id'] , axis=1)

# display the updated DataFrame
print(df.head())

                                               Image                Label
0  data_image/hurricane_harvey/8_9_2017/905960092...        severe_damage
1  data_image/california_wildfires/11_10_2017/918...        severe_damage
2  data_image/hurricane_irma/17_9_2017/9093969012...        severe_damage
3  data_image/hurricane_maria/24_9_2017/912097936...        severe_damage
4  data_image/hurricane_maria/23_10_2017/92261029...  little_or_no_damage


In [38]:
# define the string to concatenate
prefix = 'C:/Users/kaust/Downloads/'

# apply the lambda function to concatenate the prefix with every entry in the 'Name' column
df['Image'] = df['Image'].apply(lambda x: prefix + x)

# display the updated DataFrame
print(df)

                                                 Image                Label
0    C:/Users/kaust/Downloads/data_image/hurricane_...        severe_damage
1    C:/Users/kaust/Downloads/data_image/california...        severe_damage
2    C:/Users/kaust/Downloads/data_image/hurricane_...        severe_damage
3    C:/Users/kaust/Downloads/data_image/hurricane_...        severe_damage
4    C:/Users/kaust/Downloads/data_image/hurricane_...  little_or_no_damage
..                                                 ...                  ...
524  C:/Users/kaust/Downloads/data_image/hurricane_...        severe_damage
525  C:/Users/kaust/Downloads/data_image/hurricane_...          mild_damage
526  C:/Users/kaust/Downloads/data_image/srilanka_f...          mild_damage
527  C:/Users/kaust/Downloads/data_image/hurricane_...        severe_damage
528  C:/Users/kaust/Downloads/data_image/hurricane_...          mild_damage

[2997 rows x 2 columns]


In [39]:
df.dropna()

Unnamed: 0,Image,Label
0,C:/Users/kaust/Downloads/data_image/hurricane_...,severe_damage
1,C:/Users/kaust/Downloads/data_image/california...,severe_damage
2,C:/Users/kaust/Downloads/data_image/hurricane_...,severe_damage
3,C:/Users/kaust/Downloads/data_image/hurricane_...,severe_damage
4,C:/Users/kaust/Downloads/data_image/hurricane_...,little_or_no_damage
...,...,...
524,C:/Users/kaust/Downloads/data_image/hurricane_...,severe_damage
525,C:/Users/kaust/Downloads/data_image/hurricane_...,mild_damage
526,C:/Users/kaust/Downloads/data_image/srilanka_f...,mild_damage
527,C:/Users/kaust/Downloads/data_image/hurricane_...,severe_damage


In [40]:
from IPython.display import Image

# specify the file path of your image with forward slashes
file_path = df.loc[1585, 'Image']

# display the image
Image(filename=file_path)

<IPython.core.display.Image object>

In [8]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
from torchvision.models import resnet50

# Define a custom dataset to load images and labels
class CustomDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        img_path = self.df.iloc[index, 0]
        label = self.df.iloc[index, 1]
        img = Image.open(img_path).convert('RGB')
        if self.transform is not None:
            img = self.transform(img)
        return img, label

In [9]:
# Define the transformations to apply to the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [10]:
# Create a custom dataset using the dataframe and transformations
dataset = CustomDataset(df, transform)

# Create a data loader to batch and shuffle the data
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Load the ResNet-50 model
model = resnet50(pretrained=True)

# Freeze the parameters of the convolutional layers
for param in model.parameters():
    param.requires_grad = False

# Replace the fully connected layer with a new one with the appropriate number of classes
num_classes = len(set(df['Label']))
model.fc = torch.nn.Linear(in_features=2048, out_features=num_classes)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.001)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\kaust/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|█████████████████████████████████████████████████████████████| 97.8M/97.8M [00:07<00:00, 13.2MB/s]


In [25]:
# Train the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
for epoch in range(3):
    label_dict = {'little_or_no_damage':0, 'mild_damage': 0, 'moderate_damage': 1, 'severe_damage': 2}
    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        label_tensors = [torch.tensor(label_dict[label]).to(device) for label in labels]
        labels = torch.stack(label_tensors)


        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch {epoch + 1} Loss: {running_loss / len(dataloader):.4f}')

Epoch 1 Loss: 262.3670
Epoch 2 Loss: 337.3029
Epoch 3 Loss: 367.0699


In [27]:
# Evaluate the model on the test data
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        label_tensors = [torch.tensor(label_dict[label]).to(device) for label in labels]
        labels = torch.stack(label_tensors)
        
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Test Accuracy: {accuracy:.4f}')

Test Accuracy: 0.4014


In [42]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.model_selection import train_test_split

# Convert the images to arrays and pad them to a fixed size
max_length = 1000
X = []
for image_path in df['Image']:
    img = load_img(image_path, target_size=(224, 224))
    img_array = img_to_array(img)
    img_array = np.pad(img_array, ((0, max_length - img_array.shape[0]), (0, 0), (0, 0)), 'constant')
    X.append(img_array)
X = np.array(X)

# Convert the labels to one-hot encoding
y = pd.get_dummies(df['Label']).values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [44]:
# Define the maximum sequence length
max_length = 1000

# Define the RNN model
model = tf.keras.Sequential([
    tf.keras.layers.Reshape((max_length, -1), input_shape=(max_length, 224, 3)),
    tf.keras.layers.LSTM(128),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(y.shape[1], activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [45]:
# Train the model
model.fit(X, y, batch_size=32, epochs=5, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x22597fb7d30>

In [46]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, batch_size=32)
print(f'Test loss: {loss}, Test accuracy: {accuracy}')

Test loss: 0.9739558696746826, Test accuracy: 0.6083333492279053
