In [1]:
import tensorflow as tf
import tensorflow_hub as hub


from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorboard
"""data_augmentation = tf.keras.Sequential([
  layers.RandomFlip("horizontal_and_vertical"),
  layers.RandomRotation(0.2),
])"""
from sklearn.model_selection import train_test_split

In [2]:
train_labels=pd.read_csv("train_data.csv")
test_labels=pd.read_csv("test.csv")

In [3]:
train_labels["city"].value_counts()

city
Ankara      2360
Izmir       2322
Istanbul    2318
Name: count, dtype: int64

In [4]:
cities=train_labels.city.to_numpy()
cities

array(['Istanbul', 'Istanbul', 'Ankara', ..., 'Ankara', 'Izmir', 'Ankara'],
      dtype=object)

In [5]:
filenames_train=[f"train/train/{value}" for key,value in train_labels.filename.items()]

In [6]:
#filenames_train

In [7]:
unique_cities=np.unique(cities)
unique_cities

array(['Ankara', 'Istanbul', 'Izmir'], dtype=object)

In [8]:
boolean_cities=[label==unique_cities for label in cities]

In [9]:
#boolean_cities

In [10]:
x=filenames_train
y=boolean_cities

In [11]:
#x

In [12]:
IMG_SIZE=224
class main_app_process:

    def __init__(self) -> None:
        pass
    def image_process(self,image_path, img_size=IMG_SIZE):
      """
      Takes an image file path and turns the image into a Tensor.
      """
      # Read in an image file
      image = tf.io.read_file(image_path)
      # Turn the jpeg image into numerical Tensor with 3 colour channels (Red, Green, Blue)
      image = tf.image.decode_jpeg(image, channels=3)
      # Convert the colour channel values from 0-255 to 0-1 values
      image = tf.image.convert_image_dtype(image, tf.float32)
      # Resize the image to our desired value (224, 224)
      image = tf.image.resize(image, size=[IMG_SIZE, IMG_SIZE])

      return image

    def image_for_train_and_valid(self,path,label):
      images=self.image_process(path)
      return images,label

    def create_data_batches(self,x,y=None,batch_size=32,valid_data=False,test_data=False):
      if test_data :
        print("Creating test set batches")
        data=tf.data.Dataset.from_tensor_slices(tf.constant(x))
        data_batch=data.map(self.image_process).batch(32)
        return data_batch
      elif valid_data:
        print("Creating valid set batches")
        data=tf.data.Dataset.from_tensor_slices((tf.constant(x),tf.constant(y)))
        data_batch=data.map(self.image_for_train_and_valid).batch(32)
        return data_batch
      else:
        print("Creating train set batches")

        data = tf.data.Dataset.from_tensor_slices((tf.constant(x),
                                                  tf.constant(y)))
        data=data.shuffle(buffer_size=len(x))
        data=data.map(self.image_for_train_and_valid)
        data_batch=data.batch(32)

        return data_batch

    def imager(self,image,label):
      ax=plt.figure(figsize=[10,10])
      for i in range(25):
        plt.subplot(5,5,i+1)
        
        plt.imshow(image[i])
        plt.title(unique_cities[label[i].argmax()])
        plt.axis("off")
    
   

In [13]:
x_train,x_valid,y_train,y_valid=train_test_split(x,y,test_size=0.2)

In [14]:
train_data=main_app_process().create_data_batches(x_train,y_train)
valid_data=main_app_process().create_data_batches(x_valid,y_valid,valid_data=True)

Creating train set batches
Creating valid set batches


In [15]:
train_images,train_labelssss=next(train_data.as_numpy_iterator())

In [16]:
len(train_images)

32

In [17]:
#main_app_process().imager(train_images,train_labelssss)

In [18]:
from tensorflow.keras import backend as K

def macro_f1_score(y_true, y_pred):
    y_pred = tf.argmax(y_pred, axis=-1)
    y_true = tf.argmax(y_true, axis=-1)
    
    tp = tf.reduce_sum(tf.cast(y_true * y_pred, tf.float32), axis=0)
    fp = tf.reduce_sum(tf.cast((1 - y_true) * y_pred, tf.float32), axis=0)
    fn = tf.reduce_sum(tf.cast(y_true * (1 - y_pred), tf.float32), axis=0)

    f1 = 2 * tp / (2 * tp + fp + fn + K.epsilon())
    return tf.reduce_mean(f1)  # Macro average

In [19]:
IMG_SIZE=224
# Setup input shape to the model
INPUT_SHAPE = [None, IMG_SIZE, IMG_SIZE, 3] # batch, height, width, colour channels

# Setup output shape of our model
OUTPUT_SHAPE = len(unique_cities)

# Setup model URL from TensorFlow Hub
MODEL_URL = "https://www.kaggle.com/models/google/mobilenet-v2/tensorFlow2/140-224-classification/2"
#"https://www.kaggle.com/models/google/mobilenet-v2/TensorFlow2/tf2-preview-classification/4"
#
#"https://kaggle.com/models/google/mobilenet-v2/TensorFlow2/130-224-classification/1"


base_model = hub.KerasLayer(MODEL_URL, trainable=True)
# Create a function which builds a Keras model
def create_model(input_shape=INPUT_SHAPE, output_shape=OUTPUT_SHAPE, model_url=MODEL_URL):
  print("Building model with:", MODEL_URL)
   
  # Setup the model layers
  model = tf.keras.Sequential([
    
        base_model,
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(units=OUTPUT_SHAPE, activation="softmax")
    ])

  # Compile the model
  model.compile(
      loss=tf.keras.losses.CategoricalCrossentropy(),
      optimizer=tf.keras.optimizers.Adam(1e-4),
      metrics=["accuracy"]
  )

  # Build the model
  model.build(INPUT_SHAPE)

  return model

In [20]:


early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_accuracy",
                                                  patience=10)

In [21]:
NUM_EPOCHS=200
def train_model():
 model=create_model()
    
 model.fit(train_data,epochs=NUM_EPOCHS,validation_data=valid_data,validation_freq=1,callbacks=[early_stopping])
 return model

In [22]:
model=train_model()

Building model with: https://www.kaggle.com/models/google/mobilenet-v2/tensorFlow2/140-224-classification/2
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200


In [23]:
model_pred_y=model.predict(valid_data,verbose=2)

44/44 - 32s - 32s/epoch - 723ms/step


In [24]:
y_pred_classes = np.argmax(model_pred_y, axis=1)
y_true=np.argmax(y_valid,axis=1)
y_pred_classes
y_true

array([2, 1, 0, ..., 1, 0, 2], dtype=int64)

In [25]:
from sklearn.metrics import f1_score

In [26]:
f1 = f1_score(y_true, y_pred_classes, average='macro')
f1

0.8788582126068221

In [30]:
#y_train

In [31]:
#y_valid

In [32]:
model.save("trained_model2.h5")

  saving_api.save_model(


In [33]:
filenames_test=[f"test/test/{value}" for key,value in test_labels.filename.items()]

In [34]:
test_data=main_app_process().create_data_batches(filenames_test,test_data=True)

Creating test set batches


In [35]:
model_pred_test=model.predict(test_data,verbose=2)

63/63 - 50s - 50s/epoch - 800ms/step


In [36]:
t_pred_classes = np.argmax(model_pred_test, axis=1)
t_pred_classes

array([2, 2, 1, ..., 0, 1, 0], dtype=int64)

In [37]:
predicted_city_names = [unique_cities[class_idx] for class_idx in t_pred_classes]

In [38]:
submission = pd.DataFrame({
    "filename": test_labels["filename"],  # Test veri setindeki ID'ler
    "city": predicted_city_names
})

In [39]:
submission.head()

Unnamed: 0,filename,city
0,image_17000.jpg,Izmir
1,image_17001.jpg,Izmir
2,image_17002.jpg,Istanbul
3,image_17003.jpg,Istanbul
4,image_17004.jpg,Izmir


In [40]:
submission[25:26]

Unnamed: 0,filename,city
25,image_17025.jpg,Izmir


In [41]:
submission.to_csv("submission2.csv", index=False)

In [44]:
submission[150:200]

Unnamed: 0,filename,city
150,image_17150.jpg,Istanbul
151,image_17151.jpg,Izmir
152,image_17152.jpg,Istanbul
153,image_17153.jpg,Istanbul
154,image_17154.jpg,Istanbul
155,image_17155.jpg,Izmir
156,image_17156.jpg,Ankara
157,image_17157.jpg,Istanbul
158,image_17158.jpg,Ankara
159,image_17159.jpg,Izmir
