In [5]:
import os 
import pandas as pd
import numpy as np
import rasterio
from rasterio.plot import show
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import warnings

warnings.filterwarnings('ignore', category=rasterio.errors.NotGeoreferencedWarning)

def create_df_from_tif(path): 
    data = []
    for class_folder in os.listdir(path): 
        class_path = os.path.join(path, class_folder)   #Creating a path with the folders of the file 

        for filename in os.listdir(class_path): 
            if filename.endswith('.tif'):               #Ensuring only '.tif' files are present
                file_path = os.path.join(class_path, filename)

                with rasterio.open(file_path) as src: 
                    image = src.read()

                    data.append({
                        'class' : class_folder, 
                        'red_channel' : image[0].flatten(), #Creating respective RGB channels
                        'green_channel': image[1].flatten(),
                        'blue_channel' : image[2].flatten()
                    })
    df = pd.DataFrame(data)
    
    le = LabelEncoder()
    df['class'] = le.fit_transform(df['class'])
    class_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
    print(f'--- CLASS MAPPING --- \n{class_mapping}')
    
    return df

path = '/Users/pimpijnenburg/Desktop/Thesis/USTC_SmokeRS Dataset/data/USTC_SmokeRS'

create_df_from_tif(path).to_parquet('/Users/pimpijnenburg/Desktop/Thesis/USTC_SmokeRS Dataset/data/created_data')

--- CLASS MAPPING --- 
{'Cloud': 0, 'Dust': 1, 'Haze': 2, 'Land': 3, 'Seaside': 4, 'Smoke': 5}
