In [1]:
import netCDF4 as nc
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import reverse_geocoder as rg
import psycopg2 
from sqlalchemy import create_engine
import json
from pymongo import MongoClient
import warnings
warnings.filterwarnings('ignore')

In [2]:
def create_geoframe(dataset):
    lat = dataset['lat'][:]
    lon = dataset['lon'][:]
    lons, lats = np.meshgrid(lon, lat)
    lons_flatt = lons.flatten()
    lats_flatt = lats.flatten()
    whole_world = {
    'lon': lons_flatt, 
    'lat': lats_flatt, }
    geoframe = pd.DataFrame(whole_world)
    return geoframe

In [3]:
def create_column(dataset,name_variable):
    df_temp = pd.DataFrame()
    variable = dataset[name_variable][:]
    variable = variable[0]
    variable_flatt = variable.flatten()
    df_temp[name_variable] = variable_flatt
    return df_temp

In [4]:
def add_country(dataframe):
    coords = tuple(zip(dataframe['lat'], dataframe['lon']))
    results_rg = rg.search(coords)
    results_admin2 = [x.get('cc') for x in results_rg]
    dataframe['country'] = results_admin2
    return dataframe

In [5]:
def create_vegetation_class_breakdown(dataset):
    burned_area_in_vegetation_class= dataset['burned_area_in_vegetation_class'][:][0]
    i_vegetation_class = 10 
    df_temp = pd.DataFrame()
    for burned_area_per_veg_class in burned_area_in_vegetation_class:
        s_vegetation_class= f"{i_vegetation_class}_burned_area"
        burned_area_per_veg_class_flatt= burned_area_per_veg_class.flatten()
        df_temp[s_vegetation_class] = burned_area_per_veg_class_flatt
        i_vegetation_class= i_vegetation_class + 10
    df_temp['cropland_burned_area'] = df_temp['20_burned_area'] + df_temp['30_burned_area'] + df_temp['40_burned_area']
    df_temp['mosaic_tree_grass_burned_area'] = df_temp['100_burned_area'] + df_temp['100_burned_area']
    df_temp['other_vegetation_burned_area']= df_temp['120_burned_area'] + df_temp['130_burned_area'] + df_temp['140_burned_area']+ df_temp['150_burned_area']
    df_temp['flooded_area_burned_area']= df_temp['160_burned_area'] + df_temp['170_burned_area'] + df_temp['180_burned_area']
    df_temp.drop(columns=['20_burned_area','30_burned_area','40_burned_area','100_burned_area','110_burned_area','120_burned_area', '130_burned_area', '140_burned_area', '150_burned_area','160_burned_area','170_burned_area','180_burned_area'], inplace=True)
    replace_names={ '10_burned_area': 'no_data' + "_burned_area",
            '50_burned_area': 'tree_cover_broadleaved_evergreen' + "_burned_area",
            '60_burned_area': 'tree_cover_broadleaved_deciduous' + "_burned_area", 
            '70_burned_area': 'tree_cover_needleleaved_evergreen' + "_burned_area", 
            '80_burned_area': 'tree_cover_needleleaved_deciduous' + "_burned_area", 
            '90_burned_area': 'tree_cover_mixed_leave' + "_burned_area", }
    df_temp.rename(columns = replace_names, inplace = True)
    return df_temp

In [6]:
def add_date(filename, dataframe ):
    string_date = filename[-11:-7] + "-" + filename[-7:-5] + "-" + filename[-5:-3]
    dataframe['date'] = string_date
    return dataframe

In [7]:
import os
import re
#C:\dev\eda_esa_fire\data
path = "C://dev//eda_esa_fire//data"
dir_list = os.listdir(path)
dir_list

['MODIS_20010101.nc',
 'MODIS_20010201.nc',
 'MODIS_20010301.nc',
 'MODIS_20010401.nc',
 'MODIS_20010501.nc',
 'MODIS_20010601.nc',
 'MODIS_20010701.nc',
 'MODIS_20010801.nc',
 'MODIS_20010901.nc',
 'MODIS_20011001.nc',
 'MODIS_20011101.nc',
 'MODIS_20011201.nc',
 'MODIS_20020101.nc',
 'MODIS_20020201.nc',
 'MODIS_20020301.nc',
 'MODIS_20020401.nc',
 'MODIS_20020501.nc',
 'MODIS_20020601.nc',
 'MODIS_20020701.nc',
 'MODIS_20020801.nc',
 'MODIS_20020901.nc',
 'MODIS_20021001.nc',
 'MODIS_20021101.nc',
 'MODIS_20021201.nc',
 'MODIS_20030101.nc',
 'MODIS_20030201.nc',
 'MODIS_20030301.nc',
 'MODIS_20030401.nc',
 'MODIS_20030501.nc',
 'MODIS_20030601.nc',
 'MODIS_20030701.nc',
 'MODIS_20030801.nc',
 'MODIS_20030901.nc',
 'MODIS_20031001.nc',
 'MODIS_20031101.nc',
 'MODIS_20031201.nc',
 'MODIS_20040101.nc',
 'MODIS_20040201.nc',
 'MODIS_20040301.nc',
 'MODIS_20040401.nc',
 'MODIS_20040501.nc',
 'MODIS_20040601.nc',
 'MODIS_20040701.nc',
 'MODIS_20040801.nc',
 'MODIS_20040901.nc',
 'MODIS_20

In [8]:
filtered = list() 
for files in dir_list:
    if re.search("(2001)", files):
        filtered.append(files)
filtered

['MODIS_20010101.nc',
 'MODIS_20010201.nc',
 'MODIS_20010301.nc',
 'MODIS_20010401.nc',
 'MODIS_20010501.nc',
 'MODIS_20010601.nc',
 'MODIS_20010701.nc',
 'MODIS_20010801.nc',
 'MODIS_20010901.nc',
 'MODIS_20011001.nc',
 'MODIS_20011101.nc',
 'MODIS_20011201.nc',
 'MODIS_20200101.nc']

In [46]:
path  = "data/MODIS_20010101.nc"
ds = nc.Dataset(path)
df_data = create_geoframe(ds)
df_data = add_country(df_data)
df_data = add_date(path, df_data)

df_all_data = pd.concat([df_data,
            create_column(ds, 'burned_area'),
            create_column(ds, 'standard_error'),
            create_column(ds,'fraction_of_burnable_area'),
            create_column(ds,'fraction_of_observed_area'),
            create_column(ds, 'number_of_patches'),
            create_vegetation_class_breakdown(ds)], axis = 1, sort = False) 

df_all_data = df_all_data[((df_all_data['fraction_of_observed_area'] > 0.8) & (df_all_data['burned_area']>0))]

In [47]:
df_all_data

Unnamed: 0,lon,lat,country,date,burned_area,standard_error,fraction_of_burnable_area,fraction_of_observed_area,number_of_patches,no_data_burned_area,tree_cover_broadleaved_evergreen_burned_area,tree_cover_broadleaved_deciduous_burned_area,tree_cover_needleleaved_evergreen_burned_area,tree_cover_needleleaved_deciduous_burned_area,tree_cover_mixed_leave_burned_area,cropland_burned_area,mosaic_tree_grass_burned_area,other_vegetation_burned_area,flooded_area_burned_area
195105,-3.625,56.125,GB,2001-01-01,804970.0,545465.0,0.829803,0.959910,3.0,751305.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53664.0,0.0
196545,-3.625,55.875,GB,2001-01-01,2039257.0,707836.0,0.938498,0.977980,1.0,1502610.0,0.0,0.0,0.0,0.0,0.0,375652.0,0.0,160994.0,0.0
212491,22.875,53.125,PL,2001-01-01,3005221.0,838988.0,0.996411,0.981294,1.0,0.0,0.0,590311.0,0.0,0.0,0.0,0.0,0.0,53664.0,2361245.0
219698,24.625,51.875,UA,2001-01-01,1609940.0,720975.0,0.974907,0.963874,3.0,965964.0,0.0,429317.0,0.0,0.0,0.0,0.0,0.0,214658.0,0.0
219704,26.125,51.875,UA,2001-01-01,21948848.0,929124.0,0.991455,0.826491,2.0,1287952.0,0.0,268323.0,0.0,0.0,0.0,697640.0,0.0,1985592.0,17709340.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
758740,145.125,-41.625,AU,2001-01-01,8157029.0,862988.0,0.992195,0.996348,5.0,0.0,8157029.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
760187,146.875,-41.875,AU,2001-01-01,8479017.0,655052.0,0.789258,0.968100,1.0,0.0,0.0,8479017.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
763060,145.125,-42.375,AU,2001-01-01,1931928.0,281529.0,0.087729,0.878885,2.0,0.0,1931928.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
765101,-64.625,-42.875,AR,2001-01-01,107329.0,471463.0,0.357684,0.987550,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,107329.0,0.0


In [48]:
#Load to Database

In [49]:
#!"C:\Program Files\MongoDB\Server\6.0\bin\mongod.exe" --dbpath "C:\mongodb_data"

In [9]:
client = MongoClient()
client = MongoClient('mongodb://localhost:27017/')
mydb = client["esa_fire"]
mycol = mydb["esa_fire_2001"]
print(mydb.name)

esa_fire


In [10]:
print(client.address)
print(client.list_database_names)

('localhost', 27017)
<bound method MongoClient.list_database_names of MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)>


In [11]:
mycol.drop() 

In [12]:
mycol.find().count()

0

In [13]:
for modis_file in dir_list:
    #print(modis_file)
    path  = "data/" + modis_file
    ds = nc.Dataset(path)
    df_data = create_geoframe(ds)
    df_data = add_country(df_data)
    df_data = add_date(path, df_data)

    df_all_data = pd.concat([df_data,
            create_column(ds, 'burned_area'),
            create_column(ds, 'standard_error'),
            create_column(ds,'fraction_of_burnable_area'),
            create_column(ds,'fraction_of_observed_area'),
            create_column(ds, 'number_of_patches'),
            create_vegetation_class_breakdown(ds)], axis = 1, sort = False) 

    df_all_data = df_all_data[((df_all_data['fraction_of_observed_area'] > 0.8) & (df_all_data['burned_area']>0))]
    print(f"Filname: {modis_file} Size: {str(df_all_data.shape[0])}")
    data_dict = df_all_data.to_dict("records")# Insert collection
    mycol.insert_many(data_dict)


Loading formatted geocoded file...
Filname: MODIS_20010101.nc Size: 9550
Filname: MODIS_20010201.nc Size: 8887
Filname: MODIS_20010301.nc Size: 7111
Filname: MODIS_20010401.nc Size: 9889
Filname: MODIS_20010501.nc Size: 10082
Filname: MODIS_20010601.nc Size: 5337
Filname: MODIS_20010701.nc Size: 14470
Filname: MODIS_20010801.nc Size: 20956
Filname: MODIS_20010901.nc Size: 19023
Filname: MODIS_20011001.nc Size: 13997
Filname: MODIS_20011101.nc Size: 9686
Filname: MODIS_20011201.nc Size: 8846
Filname: MODIS_20020101.nc Size: 9632
Filname: MODIS_20020201.nc Size: 9045
Filname: MODIS_20020301.nc Size: 7938
Filname: MODIS_20020401.nc Size: 9900
Filname: MODIS_20020501.nc Size: 11424
Filname: MODIS_20020601.nc Size: 10574
Filname: MODIS_20020701.nc Size: 18934
Filname: MODIS_20020801.nc Size: 23391
Filname: MODIS_20020901.nc Size: 24199
Filname: MODIS_20021001.nc Size: 18986
Filname: MODIS_20021101.nc Size: 13008
Filname: MODIS_20021201.nc Size: 10250
Filname: MODIS_20030101.nc Size: 10944
F

In [14]:
mycol.find().count()

3204039

In [15]:
mycol.count_documents({ 'date': '2001-01-01'})

9550

In [17]:
#bawue =mycol.find({'df_all_data' : 'DE' })
print("Anzahl an Städten in Baden-Württemberg: ",mycol.count_documents({'country' : 'DE' }))

Anzahl an Städten in Baden-Württemberg:  96


In [18]:
query={
     '$and': [
            {'country' : 'DE' },
          ]
}
print("Number of found products: ",mycol.count_documents(query))

Number of found products:  96


In [19]:
cursor = mycol.find(query)
df =  pd.DataFrame(list(cursor))
df

Unnamed: 0,_id,lon,lat,country,date,burned_area,standard_error,fraction_of_burnable_area,fraction_of_observed_area,number_of_patches,no_data_burned_area,tree_cover_broadleaved_evergreen_burned_area,tree_cover_broadleaved_deciduous_burned_area,tree_cover_needleleaved_evergreen_burned_area,tree_cover_needleleaved_deciduous_burned_area,tree_cover_mixed_leave_burned_area,cropland_burned_area,mosaic_tree_grass_burned_area,other_vegetation_burned_area,flooded_area_burned_area
0,6306310a28dd00e55a294f9d,11.625,51.875,DE,2002-10-01,160994.0,482436.0,0.925664,0.973029,3.0,160994.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,6306310f28dd00e55a299414,11.625,51.875,DE,2002-11-01,697640.0,711347.0,0.925664,0.974973,2.0,697640.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,6306310f28dd00e55a299415,11.875,51.875,DE,2002-11-01,53664.0,720374.0,0.934856,0.974486,1.0,53664.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,6306310f28dd00e55a29943a,11.625,51.625,DE,2002-11-01,536646.0,818735.0,0.940242,0.979529,1.0,536646.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,6306310f28dd00e55a29945b,6.625,51.375,DE,2002-11-01,751305.0,526805.0,0.595106,0.895701,5.0,375652.0,0.0,0.0,160994.0,0.0,53664.0,53664.0,0.0,107329.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,6306348428dd00e55a530aa7,11.875,51.875,DE,2019-07-01,160994.0,560935.0,0.922367,1.000000,3.0,160994.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
92,6306348428dd00e55a530c02,9.875,50.125,DE,2019-07-01,1073293.0,694338.0,0.971613,1.000000,1.0,375652.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,590311.0,0.0
93,6306348828dd00e55a5347e3,11.875,51.875,DE,2019-08-01,1609940.0,482329.0,0.922367,0.996097,1.0,1609940.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
94,6306348828dd00e55a534bce,12.875,48.125,DE,2019-08-01,214658.0,767514.0,0.949443,1.000000,3.0,53664.0,0.0,0.0,107329.0,0.0,0.0,53664.0,0.0,0.0,0.0


In [20]:
df_groupby = df.groupby(by='country').sum()
df_groupby = df_groupby.drop(columns=['lon', 'lat', 'burned_area', 'standard_error', 'fraction_of_burnable_area','fraction_of_observed_area', 'number_of_patches' ])
df_groupby = df_groupby.transpose()
df_groupby = df_groupby.reset_index()

In [21]:
df_groupby.columns

Index(['index', 'DE'], dtype='object', name='country')

In [23]:
import plotly.express as px
fig = px.bar(df_groupby, x='index', y='DE')
fig.show()