In [3]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

In [4]:
feature_list = [
    'fire_mask',
    'fire_mask_next_day', 
    'elevation', 
    'wind_direction', 
    'wind_speed', 
    'energy_release_component', 
    'burn_index', 
    'precipitation', 
    'tempature_min', 
    'tempature_max', 
    'drought_index', 
    'vegetation', 
    'population_density'
]
kernel = 64

columns = [
  tf.io.FixedLenFeature(shape=[kernel, kernel], dtype=tf.float32) for _ in feature_list
]
features_dict = dict(zip(feature_list, columns))


def _parse_tfrecordset(proto):
    return tf.io.parse_single_example(proto, features_dict)


In [5]:
raw_dataset = tf.data.TFRecordDataset("data/FireEyeData.tfrecord.gz", compression_type="GZIP")
parsed_dataset = raw_dataset.map(_parse_tfrecordset)

In [6]:


def compute_statistics(dataset):
    # Initialize statistics
    feature_sums = {feature: 0 for feature in feature_list}
    feature_sumsq = {feature: 0 for feature in feature_list}
    feature_min = {feature: np.inf for feature in feature_list}
    feature_max = {feature: -np.inf for feature in feature_list}
    count = 0
    
    for record in dataset:
        count += 1
        for feature in feature_list:
            feature_data = record[feature].numpy()
            feature_sums[feature] += np.sum(feature_data)
            feature_sumsq[feature] += np.sum(feature_data**2)
            feature_min[feature] = np.minimum(feature_min[feature], np.amin(feature_data))
            feature_max[feature] = np.maximum(feature_max[feature], np.amax(feature_data))
    


    return feature_min, feature_max
f_min, f_max = compute_statistics(parsed_dataset)
print(f_min)
print(f_max)

{'fire_mask': 0.0, 'fire_mask_next_day': 0.0, 'elevation': 1093.0, 'wind_direction': -52.614295959472656, 'wind_speed': 0.9025954008102417, 'energy_release_component': 7.9000349044799805, 'burn_index': -17.64985466003418, 'precipitation': -0.45260024070739746, 'tempature_min': 254.54405212402344, 'tempature_max': 269.9469909667969, 'drought_index': -7.221144199371338, 'vegetation': -4443.0, 'population_density': 0.0}
{'fire_mask': 2.0, 'fire_mask_next_day': 2.0, 'elevation': 4236.0, 'wind_direction': 414.0893249511719, 'wind_speed': 10.726812362670898, 'energy_release_component': 105.19528198242188, 'burn_index': 120.07875061035156, 'precipitation': 87.22443389892578, 'tempature_min': 297.5557556152344, 'tempature_max': 312.6605529785156, 'drought_index': 10.659252166748047, 'vegetation': 9461.0, 'population_density': 7890.939453125}


In [45]:
#Prepare Final Dataset



x_list = [feature_list[0]] + feature_list[2:]
regression_list = []


for i in range(3):
    for j in range(3):
        regression_list.extend([f"{feature}_{i}_{j}" for feature in x_list])
regression_list.extend([feature_list[1], 'fire_mask_next_day'])
df = pd.DataFrame()

for record in parsed_dataset:
    print(".")
    for i in range(1, 63):
        for j in range(1, 63):
            #Don't want uncertainty
            if (not np.isclose(record['fire_mask_next_day'].numpy()[i, j], 1)) and (not np.any(np.isclose(record['fire_mask'].numpy()[i-1:i+2, j-1: j+2], 1))):
                if (np.isclose(record['fire_mask_next_day'].numpy()[i, j], 2)) or (np.random.rand() < 1/400):
                    data = {}
                    for k in range(3):
                        for m in range(3):
                            for f in x_list[1:]:
                                data[f"{f}_{k}_{m}"] = [record[f].numpy()[i + k - 1, j + m - 1]]

                            data[f"fire_mask_{k}_{m}"] = [np.isclose(record['fire_mask'].numpy()[i + k - 1, j + m -1], 2)]
                    data['fire_mask_next_day'] = [np.isclose(record['fire_mask_next_day'].numpy()[i, j], 2)]
                    

                    df = pd.concat([df, pd.DataFrame.from_dict(data)]).reset_index(drop=True)


print(df)

        

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
      elevation_0_0  wind_direction_0_0  wind_speed_0_0  \
0            1693.0          303.287994        1.992800   
1            1825.0          297.924316        1.998562   
2            1695.0          304.008575        1.899787   
3            1807.0          295.143890        2.000338   
4            1940.0          287.805725        1.968183   
...         

In [47]:
(df['fire_mask_next_day'] == True).sum()
(df['fire_mask_next_day'] == False).sum()

2757

In [48]:
df.to_csv('fire_eye.csv')

In [49]:
X = df.drop(columns=['fire_mask_next_day'])
y = df['fire_mask_next_day']

In [51]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [54]:
from sklearn.preprocessing import StandardScaler

# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler on the training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [57]:
from sklearn.linear_model import LogisticRegression

# Create a logistic regression model
model = LogisticRegression(max_iter=10000)

# Train the model
model.fit(X_train_scaled, y_train)

In [58]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Make predictions on the testing data
y_pred = model.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Confusion matrix
confusion = confusion_matrix(y_test, y_pred)
print(f"Confusion Matrix:\n{confusion}")

# Classification report
report = classification_report(y_test, y_pred)
print(f"Classification Report:\n{report}")


Accuracy: 0.8593886462882097
Confusion Matrix:
[[468  94]
 [ 67 516]]
Classification Report:
              precision    recall  f1-score   support

       False       0.87      0.83      0.85       562
        True       0.85      0.89      0.87       583

    accuracy                           0.86      1145
   macro avg       0.86      0.86      0.86      1145
weighted avg       0.86      0.86      0.86      1145

