In [1]:
import numpy as np
import pandas as pd
from torch.utils.data import TensorDataset
from DBEngine import db_engine
from datetime import datetime
import math

In [2]:
def get_annotated(cursor):
  query = """
    select ST_X(point) as lat, ST_Y(point) as lon, date, brightness, brightness_t31, radiative_power, dist_road, dist_rail, avg_population, intent 
    from geo_firms where intent is not NULL and source != 6
  """
  cursor.execute(query)
  return np.array(cursor.fetchall())

def get_raw(cursor):
  query = """
    select id, ST_X(point) as lat, ST_Y(point) as lon, date, brightness, brightness_t31, radiative_power, dist_road, dist_rail, avg_population 
    from geo_firms where intent is NULL
  """
  cursor.execute(query)
  return np.array(cursor.fetchall())

def get_extent(cursor):
  query = """
    with ukraine as (select ST_Union(geom) as geom from ukraine_regions)
    select ST_XMin(geom), ST_XMax(geom), ST_YMin(geom), ST_YMax(geom) from ukraine
  """
  cursor.execute(query)
  return cursor.fetchone()
  
def get_min_max(cursor):
  query = """
    select min(brightness), max(brightness), min(brightness_t31), max(brightness_t31), min(radiative_power), max(radiative_power), min(avg_population), max(avg_population) from geo_firms
  """
  cursor.execute(query)
  return cursor.fetchone()

In [3]:
data = db_engine.execute(get_annotated)

df = pd.DataFrame(data, columns=["lat", "lon", "date", "brightness", "brightness_t31", "radiative_power", "dist_road", "dist_rail", "avg_population", "intent"])
df.head(10)

Unnamed: 0,lat,lon,date,brightness,brightness_t31,radiative_power,dist_road,dist_rail,avg_population,intent
0,31.8882,51.6665,2021-03-27,305.6,282.4,9.2,,,5.741589,1
1,32.2776,51.2285,2021-04-30,301.8,278.7,5.7,,,7.310967,1
2,28.1198,50.9087,2021-05-12,306.3,293.1,5.4,,,3.15715,1
3,32.238,46.9239,2021-08-22,319.8,301.7,7.4,,,12.057454,1
4,31.311,51.5166,2022-02-26,301.9,281.0,7.5,9.481799,,556.319607,2
5,30.2457,50.5856,2022-02-26,302.7,278.6,7.9,34.25642,,149.800956,2
6,30.1982,50.584,2022-02-26,351.8,281.9,73.4,180.934105,629.070528,135.22653,2
7,30.1873,50.5955,2022-02-26,343.8,282.6,56.1,328.244816,,84.257003,2
8,30.2019,50.593,2022-02-26,320.7,280.8,21.5,98.55449,791.546167,84.635519,2
9,30.31,50.2298,2022-02-27,329.2,270.0,47.8,726.09937,1233.507614,23.282222,2


In [4]:
extent = db_engine.execute(get_extent)
min_max = db_engine.execute(get_min_max)
min_date = datetime.timestamp(datetime(2021, 1, 1, 0, 0, 0))
max_date = datetime.timestamp(datetime(2022, 12, 31, 0, 0, 0))

print(extent)
print(min_date, max_date)
print(min_max)

(22.140447616577227, 40.21807098388693, 44.385971069336094, 52.37503433227544)
1609470000.0 1672455600.0
(295.0, 453.83, 238.29, 375.53, 0.0, 1416.37, 0.3848654873565471, 5597.442598401175)


In [5]:
def normalize(df: pd.DataFrame):
  df['lat'] = (df['lat'] - extent[0]) / (extent[1] - extent[0])
  df['lon'] = (df['lon'] - extent[2]) / (extent[3] - extent[2])
  df['date'] = df['date'].apply(lambda x: datetime.timestamp(datetime.strptime(str(x), "%Y-%m-%d")))
  df['date'] = (df['date'] - min_date) / (max_date - min_date)

  df['brightness'] = (df['brightness'] - min_max[0])/(min_max[1] - min_max[0])
  df['brightness_t31'] = (df['brightness_t31'] - min_max[2])/(min_max[3] - min_max[2])
  df['radiative_power'] = (df['radiative_power'] - min_max[4])/(min_max[5] - min_max[4])

  df['dist_road'] = np.where(np.isnan(df['dist_road'].astype('float32')), 0, 1/math.e ** (df['dist_road'] / 100))
  df['dist_rail'] = np.where(np.isnan(df['dist_rail'].astype('float32')), 0, 1/math.e ** (df['dist_rail'] / 100))

  df['avg_population'] = (df['avg_population'] - min_max[6])/(min_max[7] - min_max[6])

  if('intent' in df):
    df['accident'] = np.where(df['intent'] == 1, 1, 0)
    df['attack'] = np.where(df['intent'] == 2, 1, 0)

    df = df.drop(columns=['intent'])

  return df

In [6]:
df = normalize(df)
df.head(10)

Unnamed: 0,lat,lon,date,brightness,brightness_t31,radiative_power,dist_road,dist_rail,avg_population,accident,attack
0,0.539216,0.911312,0.116598,0.066738,0.321408,0.006495,0.0,0.0,0.000957,1,0
1,0.560757,0.856487,0.163237,0.042813,0.294448,0.004024,0.0,0.0,0.001237,1,0
2,0.33076,0.816457,0.179698,0.071145,0.399373,0.003813,0.0,0.0,0.000495,1,0
3,0.558566,0.317675,0.319616,0.156142,0.462037,0.005225,0.0,0.0,0.002085,1,0
4,0.507288,0.892549,0.577503,0.043443,0.311207,0.005295,0.909538,0.0,0.099326,0,1
5,0.448358,0.776014,0.577503,0.04848,0.293719,0.005578,0.709948,0.0,0.026695,0,1
6,0.445731,0.775814,0.577503,0.357615,0.317765,0.051823,0.163762,0.001853,0.024092,0,1
7,0.445128,0.777254,0.577503,0.307247,0.322865,0.039608,0.037536,0.0,0.014985,0,1
8,0.445935,0.776941,0.577503,0.161808,0.309749,0.01518,0.373236,0.000365,0.015053,0,1
9,0.451915,0.731479,0.578875,0.215325,0.231055,0.033748,0.000702,4e-06,0.004091,0,1


In [7]:
test_df = df.sample(frac = 0.2)

df = df.drop(test_df.index)

print("Test data: " + str(len(test_df)))
print("Train data: " + str(len(df)))

Test data: 2917
Train data: 11670


In [8]:
X_train_np = df.to_numpy()[:, :-2]
y_train_np = df.to_numpy()[:, -2:]

X_train_np.shape, y_train_np.shape

((11670, 9), (11670, 2))

In [9]:
X_test_np = test_df.to_numpy()[:, :-2]
y_test_np = test_df.to_numpy()[:, -2:]

X_test_np.shape, y_test_np.shape

((2917, 9), (2917, 2))

In [10]:
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

train_dataset = TensorDataset(
  torch.tensor(X_train_np.astype('float64'), dtype=torch.float),
  torch.tensor(y_train_np.astype('float64'), dtype=torch.float)
)

train_dataloader = DataLoader(train_dataset, batch_size=128)

for X, y in train_dataloader:
  print(X.shape, y.shape)
  print(X[0])
  break

torch.Size([128, 9]) torch.Size([128, 2])
tensor([0.5392, 0.9113, 0.1166, 0.0667, 0.3214, 0.0065, 0.0000, 0.0000, 0.0010])


In [11]:
test_dataset = TensorDataset(
  torch.tensor(X_test_np.astype('float64'), dtype=torch.float), 
  torch.tensor(y_test_np.astype('float64'), dtype=torch.float)
)

test_dataloader = DataLoader(test_dataset, batch_size=64)

for X, y in test_dataloader:
  print(X.shape, y.shape)
  print(X[0])
  break

torch.Size([64, 9]) torch.Size([64, 2])
tensor([0.8991, 0.5803, 0.5967, 0.2003, 0.2915, 0.0246, 0.7178, 0.1438, 0.0283])


In [12]:
from torch import nn
import torch.nn.functional as F

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

class NeuralNet(nn.Module):
  def __init__(self):
    super(NeuralNet, self).__init__()

    self.f_connected1 = nn.Linear(9, 64)
    self.out = nn.Linear(64, 2)
  
  def forward(self, x):
    x = F.relu(self.f_connected1(x))
    x = self.out(x)
    return x

  def save(self, model_path):
    torch.save(model.state_dict(), model_path)

  def load(self, model_path):
    self.load_state_dict(torch.load(model_path))
    self.eval()


model = NeuralNet().to(device)
print(model)

Using cpu device
NeuralNet(
  (f_connected1): Linear(in_features=9, out_features=64, bias=True)
  (out): Linear(in_features=64, out_features=2, bias=True)
)


In [13]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [14]:
def train(dataloader, model, loss_fn, optimizer):
  model.train()
  train_loss = 0

  for i, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)

    y_hat = model.forward(X)
    ce = loss_fn(y_hat, y)
    train_loss += ce.item()

    optimizer.zero_grad()
    ce.backward()
    optimizer.step()
  
  num_batches = len(dataloader)
  train_ce = train_loss / num_batches
  print(f'Train Loss: {train_ce}')

In [15]:
def test(dataloader, model, loss_fn):
  model.eval()
  test_loss = 0

  with torch.no_grad():
    for X, y in dataloader:
      X, y = X.to(device), y.to(device)
      y_hat = model(X)
      test_loss += loss_fn(y_hat, y).item()
  
  num_batches = len(dataloader)
  test_ce = test_loss / num_batches

  print(f'Test Loss: {test_ce}\n')

epochs = 20
curr_epoch = 0

In [66]:
for x in range(epochs):
  curr_epoch += 1
  print(f"Epoch {curr_epoch}:")

  train(train_dataloader, model, loss_fn, optimizer)
  test(test_dataloader, model, loss_fn)

Epoch 101:
Train Loss: 0.044659286774929016
Test Loss: 0.024323055806630495

Epoch 102:
Train Loss: 0.044102432965977954
Test Loss: 0.023907995029889127

Epoch 103:
Train Loss: 0.0435161466347302
Test Loss: 0.02354871015363049

Epoch 104:
Train Loss: 0.04301835136596349
Test Loss: 0.023192677687367668

Epoch 105:
Train Loss: 0.04256001526508281
Test Loss: 0.02283105711103417

Epoch 106:
Train Loss: 0.042064625182221156
Test Loss: 0.02244256030889633

Epoch 107:
Train Loss: 0.04148406482337795
Test Loss: 0.022098505839138576

Epoch 108:
Train Loss: 0.04097644568171587
Test Loss: 0.021740416000323858

Epoch 109:
Train Loss: 0.040430241661278676
Test Loss: 0.02136669663773121

Epoch 110:
Train Loss: 0.03984405003197737
Test Loss: 0.02098368576844223

Epoch 111:
Train Loss: 0.03920252241689171
Test Loss: 0.020667987108817728

Epoch 112:
Train Loss: 0.038679977987078965
Test Loss: 0.020294501343691398

Epoch 113:
Train Loss: 0.03802374758814277
Test Loss: 0.019961945902373966

Epoch 114:
Tr

In [67]:
def predict(dataset: TensorDataset):
  model.eval()
  output = []
  for tensor in dataset.tensors:
    output.append(model(tensor))

  return output

In [68]:
data = db_engine.execute(get_raw)

predict_df = pd.DataFrame(data, columns=["id", "lat", "lon", "date", "brightness", "brightness_t31", "radiative_power", "dist_road", "dist_rail", "avg_population"])
print("Available data: " + str(len(predict_df)))

Available data: 121555


In [69]:
predict_df = normalize(predict_df)
predict_np = predict_df.to_numpy()[:, 1:]

prediction_dataset = TensorDataset(
  torch.tensor(predict_np.astype('float64'), dtype=torch.float)
)

predict_df.head(10)

Unnamed: 0,id,lat,lon,date,brightness,brightness_t31,radiative_power,dist_road,dist_rail,avg_population
0,115,0.578967,0.263414,0.09465,0.09507,0.338895,0.008614,1e-06,0.0,0.005248
1,125,0.751628,0.485217,0.096022,0.040924,0.209196,0.00939,0.344029,0.0,0.000361
2,127,0.851072,0.346114,0.096022,0.103255,0.30392,0.014615,0.148732,0.00302,0.102545
3,128,0.895812,0.41248,0.096022,0.154253,0.321408,0.018639,0.053424,0.0,0.00073
4,129,0.959277,0.487608,0.096022,0.348801,0.312664,0.054223,0.0,0.0,0.002151
5,130,0.958475,0.487295,0.096022,0.159919,0.294448,0.016733,0.0,0.0,0.00117
6,131,0.959615,0.488434,0.096022,0.102625,0.288618,0.011367,0.000226,0.0,0.002423
7,132,0.958813,0.488121,0.096022,0.237361,0.307563,0.029653,4.2e-05,0.0,0.002222
8,136,0.851376,0.34644,0.098765,0.093811,0.243442,0.010379,0.255028,0.48285,0.101121
9,137,0.943407,0.458793,0.100137,0.036517,0.282061,0.008119,0.000358,0.0,0.002672


In [70]:
predicted = predict(prediction_dataset)

classes = predicted[0].detach().numpy()

In [74]:
predict_df['accident'] = classes[:, 0]
predict_df['attack'] = classes[:, 1]

predict_df.sort_values(by=['attack'], ascending=False).head(10)

Unnamed: 0,id,lat,lon,date,brightness,brightness_t31,radiative_power,dist_road,dist_rail,avg_population,accident,attack
68704,79673,0.719839,0.437306,0.898491,0.453315,0.273827,0.008437,0.555189,0.018591,0.19395,-11.042339,10.470775
121132,135721,0.691862,0.518767,0.902606,0.453315,0.281696,0.004483,0.684382,0.558943,0.075674,-10.84326,10.281022
68342,79307,0.620654,0.978635,0.893004,0.248631,0.3319,0.002415,0.705438,0.0,0.000608,-10.760063,10.201723
69194,80164,0.53483,0.953665,0.916324,0.291947,0.30239,0.00401,0.0,0.0,0.000676,-10.706186,10.15037
120950,135539,0.76915,0.741605,0.898491,0.453315,0.413218,0.033939,0.880704,0.0,0.002526,-10.697997,10.142565
121464,136053,0.882699,0.592605,0.919067,0.194107,0.253279,0.001878,0.814718,0.0,0.000822,-10.688504,10.133517
119073,133626,0.775194,0.696528,0.844993,0.28137,0.384509,0.0018,0.711238,0.01275,0.315585,-10.687486,10.132546
121134,135723,0.691896,0.518734,0.902606,0.453315,0.277762,0.007039,0.409386,0.323652,0.075374,-10.638066,10.085442
69320,80290,0.882793,0.592654,0.919067,0.196248,0.256995,0.002528,0.739776,0.0,0.000827,-10.62018,10.068394
17336,24588,0.470287,0.76975,0.887517,0.043254,0.346328,0.016705,0.732836,6e-06,0.489226,-10.594839,10.035484


In [76]:
def update(cursor):
  query = "update geo_firms set source=6, accident=%(accident)s, attack=%(attack)s where id = %(id)s"
  cursor.executemany(query, predict_df.to_dict('records'))

db_engine.execute(update)
db_engine.commit()