# Import Libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
sns.set(style="whitegrid")
import matplotlib.pyplot as plt
%matplotlib inline

import os
import glob as gb
import cv2
import tensorflow as tf
import keras

import warnings
warnings.filterwarnings('ignore')

In [None]:
pip install mlflow

In [None]:
pip install codecarbon

In [None]:
from codecarbon import EmissionsTracker
import mlflow
mlflow.set_tracking_uri("https://dagshub.com/martinaalba21/TAED2_DataExplorers.mlflow")

In [None]:
import os

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

# Recuperar las variables de entorno desde Kaggle Secrets
mlflow_username = user_secrets.get_secret("MLFLOW_TRACKING_USERNAME")
mlflow_password = user_secrets.get_secret("MLFLOW_TRACKING_PASSWORD")

In [None]:

# Configura las variables de entorno para MLflow
os.environ['MLFLOW_TRACKING_USERNAME'] = mlflow_username
os.environ['MLFLOW_TRACKING_PASSWORD'] = mlflow_password

# Explore Data

In [None]:
mlflow.set_experiment("original-code")
mlflow.sklearn.autolog(log_model_signatures=False, log_datasets=False)

In [None]:
train_path ='../input/intel-image-classification/seg_train/'
test_path ='../input/intel-image-classification/seg_test/'
predict_path ='../input/intel-image-classification/seg_pred/'

In [None]:
for folder in  os.listdir(train_path + 'seg_train') : 
    files = gb.glob(pathname= str( train_path +'seg_train//' + folder + '/*.jpg'))
    print(f'For training data , found {len(files)} in folder {folder}')

In [None]:
for folder in  os.listdir(test_path +'seg_test') : 
    files = gb.glob(pathname= str( test_path +'seg_test//' + folder + '/*.jpg'))
    print(f'For testing data , found {len(files)} in folder {folder}')

In [None]:
files = gb.glob(pathname= str(predict_path +'seg_pred/*.jpg'))
print(f'For Prediction data , found {len(files)}')

# Preprocessing

In [None]:
code = {'buildings':0 ,'forest':1,'glacier':2,'mountain':3,'sea':4,'street':5}

def getcode(n) : 
    for x , y in code.items() : 
        if n == y : 
            return x   

In [None]:
size = []
for folder in  os.listdir(train_path +'seg_train') : 
    files = gb.glob(pathname= str( train_path +'seg_train//' + folder + '/*.jpg'))
    for file in files: 
        image = plt.imread(file)
        size.append(image.shape)
pd.Series(size).value_counts()

In [None]:
size = []
for folder in  os.listdir(test_path +'seg_test') : 
    files = gb.glob(pathname= str( test_path +'seg_test//' + folder + '/*.jpg'))
    for file in files: 
        image = plt.imread(file)
        size.append(image.shape)
pd.Series(size).value_counts()

In [None]:
size = []
files = gb.glob(pathname= str(predict_path +'seg_pred/*.jpg'))
for file in files: 
    image = plt.imread(file)
    size.append(image.shape)
pd.Series(size).value_counts()

In [None]:
s = 100

In [None]:
x_train = []
y_train = []
for folder in  os.listdir(train_path +'seg_train') : 
    files = gb.glob(pathname= str( train_path +'seg_train//' + folder + '/*.jpg'))
    for file in files: 
        image = cv2.imread(file)
        image_array = cv2.resize(image , (s,s))
        x_train.append(list(image_array))
        y_train.append(code[folder])

In [None]:
plt.figure(figsize=(25,25))
for n , i in enumerate(list(np.random.randint(0,len(x_train),36))) : 
    plt.subplot(6,6,n+1)
    plt.imshow(x_train[i])   
    plt.axis('off')
    plt.title(getcode(y_train[i]))

In [None]:
x_test = []
y_test = []
for folder in  os.listdir(test_path +'seg_test') : 
    files = gb.glob(pathname= str(test_path + 'seg_test//' + folder + '/*.jpg'))
    for file in files: 
        image = cv2.imread(file)
        image_array = cv2.resize(image , (s,s))
        x_test.append(list(image_array))
        y_test.append(code[folder])

In [None]:
plt.figure(figsize=(25,25))
for n , i in enumerate(list(np.random.randint(0,len(x_test),36))) : 
    plt.subplot(6,6,n+1)
    plt.imshow(x_test[i])    
    plt.axis('off')
    plt.title(getcode(y_test[i]))

In [None]:
x_pred = []
files = gb.glob(pathname= str(predict_path + 'seg_pred/*.jpg'))
for file in files: 
    image = cv2.imread(file)
    image_array = cv2.resize(image , (s,s))
    x_pred.append(list(image_array))       

In [None]:
plt.figure(figsize=(25,25))
for n , i in enumerate(list(np.random.randint(0,len(x_pred),36))) : 
    plt.subplot(6,6,n+1)
    plt.imshow(x_pred[i])    
    plt.axis('off')

# Build Model

In [None]:
x_train = np.array(x_train)
x_test = np.array(x_test)
x_pred_array = np.array(x_pred)
y_train = np.array(y_train)
y_test = np.array(y_test)

print(f'X_train shape  is {x_train.shape}')
print(f'X_test shape  is {x_test.shape}')
print(f'X_pred shape  is {x_pred_array.shape}')
print(f'y_train shape  is {y_train.shape}')
print(f'y_test shape  is {y_test.shape}')

In [None]:
KerasModel = keras.models.Sequential([
        keras.layers.Conv2D(200,kernel_size=(3,3),activation='relu',input_shape=(s,s,3)),
        keras.layers.Conv2D(150,kernel_size=(3,3),activation='relu'),
        keras.layers.MaxPool2D(4,4),
        keras.layers.Conv2D(120,kernel_size=(3,3),activation='relu'),    
        keras.layers.Conv2D(80,kernel_size=(3,3),activation='relu'),    
        keras.layers.Conv2D(50,kernel_size=(3,3),activation='relu'),
        keras.layers.MaxPool2D(4,4),
        keras.layers.Flatten() ,    
        keras.layers.Dense(120,activation='relu') ,    
        keras.layers.Dense(100,activation='relu') ,    
        keras.layers.Dense(50,activation='relu') ,        
        keras.layers.Dropout(rate=0.5) ,            
        keras.layers.Dense(6,activation='softmax') ,    
        ])

In [None]:
KerasModel.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001), 
                   loss='sparse_categorical_crossentropy', 
                   metrics=['accuracy'])

In [None]:
# Track the CO2 emissions of training the model
emissions_output_folder = "/kaggle/working/"
with EmissionsTracker(
    project_name="image-classification",
    measure_power_secs=1,
    tracking_mode="process",
    output_dir=emissions_output_folder,
    output_file="emissions.csv",
    on_csv_write="append",
    default_cpu_power=45,
):
    # Then fit the model to the training data
    KerasModel.fit(x_train, y_train, epochs=20,batch_size=64,verbose=1)

# Log the CO2 emissions to MLflow
emissions = pd.read_csv(emissions_output_folder + "emissions.csv")
emissions_metrics = emissions.iloc[-1, 4:13].to_dict()
emissions_params = emissions.iloc[-1, 13:].to_dict()
mlflow.log_params(emissions_params)
mlflow.log_metrics(emissions_metrics)

In [None]:
print(KerasModel.summary())

In [None]:
loss, accuracy = KerasModel.evaluate(x_test, y_test)

In [None]:
print(f"Accuracy: {accuracy * 100:.2f}%")

In [None]:
with mlflow.start_run(nested=True):
    ThisModel = KerasModel.fit(x_train, y_train, epochs=20,batch_size=64,verbose=1)
    mlflow.log_params({'Epochs': 20, 'Batch size': 64, 'Learning rate': 0.0001})
    mlflow.log_metrics({'Train Accuracy': ThisModel.history['accuracy'][-1], 'Test Accuracy': accuracy})

In [None]:
KerasModel.predict(x_test)

In [None]:
y_result=KerasModel.predict(x_pred_array)

In [None]:
plt.figure(figsize=(25,25))
for n , i in enumerate(list(np.random.randint(0,len(x_pred),36))) : 
    plt.subplot(6,6,n+1)
    plt.imshow(x_pred[i])    
    plt.axis('off')
    plt.title(getcode(np.argmax(y_result[i])))