In [1]:
import netCDF4 as nc
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import reverse_geocoder as rg
import psycopg2 
from sqlalchemy import create_engine
import json
from pymongo import MongoClient
import warnings
warnings.filterwarnings('ignore')

In [2]:
def create_geoframe(dataset):
    lat = dataset['lat'][:]
    lon = dataset['lon'][:]
    lons, lats = np.meshgrid(lon, lat)
    lons_flatt = lons.flatten()
    lats_flatt = lats.flatten()
    whole_world = {
    'lon': lons_flatt, 
    'lat': lats_flatt, }
    geoframe = pd.DataFrame(whole_world)
    return geoframe

In [3]:
def create_column(dataset,name_variable):
    df_temp = pd.DataFrame()
    variable = dataset[name_variable][:]
    variable = variable[0]
    variable_flatt = variable.flatten()
    df_temp[name_variable] = variable_flatt
    return df_temp

In [4]:
def add_country(dataframe):
    coords = tuple(zip(dataframe['lat'], dataframe['lon']))
    results_rg = rg.search(coords)
    results_admin2 = [x.get('cc') for x in results_rg]
    dataframe['country'] = results_admin2
    return dataframe

In [5]:
def create_vegetation_class_breakdown(dataset):
    burned_area_in_vegetation_class= dataset['burned_area_in_vegetation_class'][:][0]
    i_vegetation_class = 10 
    df_temp = pd.DataFrame()
    for burned_area_per_veg_class in burned_area_in_vegetation_class:
        s_vegetation_class= f"{i_vegetation_class}_burned_area"
        burned_area_per_veg_class_flatt= burned_area_per_veg_class.flatten()
        df_temp[s_vegetation_class] = burned_area_per_veg_class_flatt
        i_vegetation_class= i_vegetation_class + 10
    df_temp['cropland_burned_area'] = df_temp['20_burned_area'] + df_temp['30_burned_area'] + df_temp['40_burned_area']
    df_temp['mosaic_tree_grass_burned_area'] = df_temp['100_burned_area'] + df_temp['100_burned_area']
    df_temp['other_vegetation_burned_area']= df_temp['120_burned_area'] + df_temp['130_burned_area'] + df_temp['140_burned_area']+ df_temp['150_burned_area']
    df_temp['flooded_area_burned_area']= df_temp['160_burned_area'] + df_temp['170_burned_area'] + df_temp['180_burned_area']
    df_temp.drop(columns=['20_burned_area','30_burned_area','40_burned_area','100_burned_area','110_burned_area','120_burned_area', '130_burned_area', '140_burned_area', '150_burned_area','160_burned_area','170_burned_area','180_burned_area'], inplace=True)
    replace_names={ '10_burned_area': 'no_data' + "_burned_area",
            '50_burned_area': 'tree_cover_broadleaved_evergreen' + "_burned_area",
            '60_burned_area': 'tree_cover_broadleaved_deciduous' + "_burned_area", 
            '70_burned_area': 'tree_cover_needleleaved_evergreen' + "_burned_area", 
            '80_burned_area': 'tree_cover_needleleaved_deciduous' + "_burned_area", 
            '90_burned_area': 'tree_cover_mixed_leave' + "_burned_area", }
    df_temp.rename(columns = replace_names, inplace = True)
    return df_temp

In [6]:
file = "data/MODIS_20010101.nc"
ds = nc.Dataset(file)
df_data = create_geoframe(ds)
df_data = add_country(df_data)
df_data

Loading formatted geocoded file...


Unnamed: 0,lon,lat,country
0,-179.875,89.875,RU
1,-179.625,89.875,RU
2,-179.375,89.875,RU
3,-179.125,89.875,RU
4,-178.875,89.875,RU
...,...,...,...
1036795,178.875,-89.875,AQ
1036796,179.125,-89.875,AQ
1036797,179.375,-89.875,AQ
1036798,179.625,-89.875,AQ


In [7]:
df_all_data = pd.concat([df_data,
            create_column(ds, 'burned_area'),
            create_column(ds, 'standard_error'),
            create_column(ds,'fraction_of_burnable_area'),
            create_column(ds,'fraction_of_observed_area'),
            create_column(ds, 'number_of_patches'),
            create_vegetation_class_breakdown(ds)], axis = 1, sort = False) 

In [8]:
date = file[-11:-7] + "-" + file[-7:-5] + "-" + file[-5:-3]
df_all_data['date'] = date

In [9]:
df_all_data

Unnamed: 0,lon,lat,country,burned_area,standard_error,fraction_of_burnable_area,fraction_of_observed_area,number_of_patches,no_data_burned_area,tree_cover_broadleaved_evergreen_burned_area,tree_cover_broadleaved_deciduous_burned_area,tree_cover_needleleaved_evergreen_burned_area,tree_cover_needleleaved_deciduous_burned_area,tree_cover_mixed_leave_burned_area,cropland_burned_area,mosaic_tree_grass_burned_area,other_vegetation_burned_area,flooded_area_burned_area,date
0,-179.875,89.875,RU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2001-01-01
1,-179.625,89.875,RU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2001-01-01
2,-179.375,89.875,RU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2001-01-01
3,-179.125,89.875,RU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2001-01-01
4,-178.875,89.875,RU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2001-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1036795,178.875,-89.875,AQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2001-01-01
1036796,179.125,-89.875,AQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2001-01-01
1036797,179.375,-89.875,AQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2001-01-01
1036798,179.625,-89.875,AQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2001-01-01


In [10]:
#Load to Database

In [11]:
#"C:\Program Files\MongoDB\Server\6.0\bin\mongod.exe" --dbpath "C:\mongodb_data"

In [12]:
client = MongoClient()
client = MongoClient('mongodb://localhost:27017/')
mydb = client["esa_fire"]
mycol = mydb["esa_fire_2001"]
print(mydb.name)

esa_fire


In [13]:
print(client.address)
print(client.list_database_names)

('localhost', 27017)
<bound method MongoClient.list_database_names of MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)>


In [14]:
#data.reset_index(inplace=True)
data_dict = df_all_data.to_dict("records")# Insert collection


In [15]:
mycol.insert_many(data_dict)

<pymongo.results.InsertManyResult at 0x22ff402fc88>

In [18]:
#bawue =mycol.find({'df_all_data' : 'DE' })
print("Anzahl an Städten in Baden-Württemberg: ",mycol.count_documents({'country' : 'DE' }))

Anzahl an Städten in Baden-Württemberg:  840


In [47]:
query={
     '$and': [
            {'country' : 'US' },
            { 'burned_area'    : { '$gt':0 }},
            { 'fraction_of_observed_area'    : { '$gt':0.8 }, }
          ]
}
print("Number of found products: ",mycol.count_documents(query))

Number of found products:  43


In [48]:
cursor = mycol.find(query)
df =  pd.DataFrame(list(cursor))
df

Unnamed: 0,_id,lon,lat,country,burned_area,standard_error,fraction_of_burnable_area,fraction_of_observed_area,number_of_patches,no_data_burned_area,tree_cover_broadleaved_evergreen_burned_area,tree_cover_broadleaved_deciduous_burned_area,tree_cover_needleleaved_evergreen_burned_area,tree_cover_needleleaved_deciduous_burned_area,tree_cover_mixed_leave_burned_area,cropland_burned_area,mosaic_tree_grass_burned_area,other_vegetation_burned_area,flooded_area_burned_area,date
0,630247bc43db4d1db824735e,-103.375,43.875,US,697640.0,709895.0,0.993549,0.998353,3.0,0.0,0.0,0.0,214658.0,0.0,0.0,214658.0,0.0,268323.0,0.0,2001-01-01
1,630247bc43db4d1db824735f,-103.125,43.875,US,1126958.0,598482.0,0.997977,0.999228,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1126958.0,0.0,2001-01-01
2,630247bc43db4d1db824b0ed,-123.625,41.125,US,1073293.0,1112645.0,0.996682,0.997318,4.0,0.0,0.0,0.0,1019628.0,0.0,53664.0,0.0,0.0,0.0,0.0,2001-01-01
3,630247bc43db4d1db8256520,-110.875,33.125,US,536646.0,720629.0,0.975452,0.996769,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,536646.0,0.0,2001-01-01
4,630247bc43db4d1db8256aa8,-116.875,32.875,US,18889964.0,1055429.0,0.844838,0.971037,3.0,0.0,0.0,0.0,0.0,0.0,0.0,268323.0,0.0,18621640.0,0.0,2001-01-01
5,630247bc43db4d1db8256aa9,-116.625,32.875,US,24793076.0,1180796.0,0.999835,0.999669,5.0,0.0,0.0,0.0,804970.0,53664.0,1717269.0,375652.0,107328.0,21787854.0,0.0,2001-01-01
6,630247bc43db4d1db825ae6b,-100.125,29.875,US,214658.0,1079506.0,1.0,1.0,1.0,0.0,0.0,0.0,160994.0,0.0,0.0,0.0,0.0,53664.0,0.0,2001-01-01
7,630247bc43db4d1db825ae87,-93.125,29.875,US,7459388.0,1205241.0,0.813116,0.972427,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53664.0,7405724.0,2001-01-01
8,630247bc43db4d1db825ae91,-90.625,29.875,US,107329.0,561354.0,0.880516,0.991633,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,107329.0,2001-01-01
9,630247bc43db4d1db825aea7,-85.125,29.875,US,107329.0,585425.0,0.962518,0.99351,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,107329.0,2001-01-01


In [49]:
df_groupby = df.groupby(by='country').sum()
df_groupby = df_groupby.drop(columns=['lon', 'lat', 'burned_area', 'standard_error', 'fraction_of_burnable_area','fraction_of_observed_area', 'number_of_patches' ])
df_groupby = df_groupby.transpose()
df_groupby = df_groupby.reset_index()

In [50]:
df_groupby.columns

Index(['index', 'US'], dtype='object', name='country')

In [51]:
import plotly.express as px
fig = px.bar(df_groupby, x='index', y='US')
fig.show()