# Creating the Dataset
## Data Fetching

In [27]:
import numpy as np
import pandas as pd

from avalancheutils.bulletins import fetch_bulletins
from avalancheutils.lawis import fetch_event_overview, fetch_events_details
from avalancheutils.weather import fetch_weather_data

### Fetching the Avalanche Data
In order to fetch detailed data about the incidents, first it is needed to fetch the general overview of all the events in the LAWIS database.

In [28]:
incident_overview = fetch_event_overview(api='incident', save=False)
incident_overview

Fetched an event overview containing 4254 events.


Unnamed: 0,incident_id,datum,country_id,region_id,subregion_id,ort,elevation,latitude,longitude,incline,aspect_id,danger_id,n_injured,n_dead,n_uninjured,involved_sum,involved,valid_time,revision
0,9349,2018-02-17 10:49:00,1,5,147,Kreuzkogel,,47.41434,14.50995,,3.0,2.0,0.0,0.0,1.0,1.0,yes,True,1
1,9350,2018-02-17 14:30:00,1,6,90,"Obertauern, Zehnerkar, Breiter Hang",1950.0,47.24274,13.54301,30.0,1.0,2.0,1.0,0.0,3.0,4.0,yes,True,1
2,9379,2018-02-24 14:00:00,2,9,18,Couloir de la Tsa,3200.0,46.02297,7.51830,,7.0,2.0,3.0,1.0,0.0,4.0,yes,True,1
3,9353,2018-02-15 13:04:00,12,38,58,"Nizke Tatry, Veľký Gapel",1700.0,48.91818,19.63015,37.0,8.0,2.0,0.0,1.0,2.0,3.0,yes,True,1
4,9354,2018-02-17 11:34:00,1,1,175,Katzenkopf,,47.27433,11.98533,,9.0,3.0,0.0,0.0,0.0,0.0,yes,True,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4249,10951,2024-05-01 11:00:00,1,6,273,Schareck,2340.0,47.03804,13.03561,,3.0,2.0,1.0,0.0,,,yes,True,1
4250,10936,2024-04-25 11:00:00,1,4,294,Wurtenkees / Mölltaler Gletscher,2750.0,47.03577,13.01013,35.0,8.0,2.0,0.0,1.0,1.0,2.0,yes,True,4
4251,10953,2024-03-24 15:30:00,1,5,254,Steir. Bodensee,,47.36550,13.81707,,2.0,2.0,0.0,0.0,0.0,0.0,none,True,3
4252,10921,2024-03-26 10:05:00,1,2,86,Piz Buin / Grüne Kuppe,2410.0,46.86359,10.11382,30.0,1.0,3.0,1.0,1.0,1.0,3.0,yes,True,9


In [29]:
incident_overview.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4254 entries, 0 to 4253
Data columns (total 19 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   incident_id   4254 non-null   int64  
 1   datum         4254 non-null   object 
 2   country_id    4254 non-null   int64  
 3   region_id     4254 non-null   int64  
 4   subregion_id  4254 non-null   int64  
 5   ort           4254 non-null   object 
 6   elevation     3416 non-null   float64
 7   latitude      4254 non-null   float64
 8   longitude     4254 non-null   float64
 9   incline       2746 non-null   float64
 10  aspect_id     3380 non-null   float64
 11  danger_id     3409 non-null   float64
 12  n_injured     3778 non-null   float64
 13  n_dead        3969 non-null   float64
 14  n_uninjured   1741 non-null   float64
 15  involved_sum  1665 non-null   float64
 16  involved      4254 non-null   object 
 17  valid_time    4254 non-null   bool   
 18  revision      4254 non-null 

To prevent unnecessary API calls and streamline further data analysis, all avalanche incidents are stored as JSON files in the `data/cache/incidents` folder. Only new and missing incidents will be downloaded. If you need to re-fetch incident details, simply delete all incident files or change the output folder.

In [30]:
incidents = fetch_events_details(event_ids=incident_overview['incident_id'], api='incident',
                                 output_dir='data/cache/incidents')
incidents

Unnamed: 0,id,valid_time,date,reported_date,reported_name,reported_email,involved_dead,involved_injured,involved_uninjured,involved_sweeped,...,involved_equipment_lvs_text,involved_equipment_airbag_id,involved_equipment_airbag_text,involved_ascent_descent_id,involved_ascent_descent_text,avalanche_release_id,avalanche_release_text,avalanche_humidity_id,avalanche_humidity_text,not_buried
0,9349,True,2018-02-17T10:49:00+01:00,2018-02-17T15:02:44+01:00,lwd,,0.0,0.0,1.0,0.0,...,,,,,,,,,,
1,9350,True,2018-02-17T14:30:00+01:00,2018-02-17T18:20:50+01:00,Beobachter Obertauern,,0.0,1.0,3.0,0.0,...,,,,,,,,,,
2,9379,True,2018-02-24T14:00:00+01:00,2018-02-26T08:01:55+01:00,SLF LWD Davos,,1.0,3.0,0.0,4.0,...,,,,,,,,,,
3,9353,True,2018-02-15T13:04:00+01:00,2018-02-18T11:20:21+01:00,SLP HZS,,1.0,0.0,2.0,1.0,...,,,,,,,,,,
4,9354,True,2018-02-17T11:34:00+01:00,2018-02-19T08:00:14+01:00,LWD Tirol,,0.0,0.0,0.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4249,10951,True,2024-05-01T11:00:00+02:00,2024-05-03T09:33:12+02:00,LWD Salzburg,lawine@salzburg.gv.at,0.0,1.0,,1.0,...,unknown,0.0,unknown,1.0,ascent,2.0,spontaneous,3.0,wet,1.0
4250,10936,True,2024-04-25T11:00:00+02:00,2024-04-26T07:49:53+02:00,AEG Spittal / LWD,,1.0,0.0,1.0,1.0,...,yes,2.0,some,2.0,descent,1.0,artificial,1.0,dry,0.0
4251,10953,True,2024-03-24T15:30:00+01:00,2024-05-06T13:29:25+02:00,LWD Steiermark,lawine.steiermark@geosphere.at,0.0,0.0,0.0,0.0,...,,,,,,2.0,spontaneous,3.0,wet,0.0
4252,10921,True,2024-03-26T10:05:00+01:00,2024-03-26T18:08:19+01:00,A. Pecl / LWD Vlbg,lawinenwarndienst@lwz-vorarlberg.at,1.0,1.0,1.0,2.0,...,yes,0.0,unknown,2.0,descent,1.0,artificial,0.0,unknown,


In [31]:
incidents.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4254 entries, 0 to 4253
Data columns (total 54 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   id                                4254 non-null   int64  
 1   valid_time                        4254 non-null   bool   
 2   date                              4254 non-null   object 
 3   reported_date                     4254 non-null   object 
 4   reported_name                     4254 non-null   object 
 5   reported_email                    3811 non-null   object 
 6   involved_dead                     3969 non-null   float64
 7   involved_injured                  3778 non-null   float64
 8   involved_uninjured                1741 non-null   float64
 9   involved_sweeped                  3682 non-null   float64
 10  involved_buried_partial           3666 non-null   float64
 11  involved_buried_total             3664 non-null   float64
 12  involv

### Converting the Date Column
The date column contains the timezone data as well, but the timezone is incorrectly used, as the time is always stored with the Central European Timezone. We can get rid of the timezone and convert the column into the datetime format.

In [32]:
incidents['date'] = incidents['date'].apply(lambda x: x.split('+')[0])
incidents['date'] = pd.to_datetime(incidents['date'])

In [33]:
incidents['reported_date'] = incidents['reported_date'].apply(lambda x: x.split('+')[0])
incidents['reported_date'] = pd.to_datetime(incidents['reported_date'])

### Unifying null values
Besides the missing values, some of the records also contain value `unknown` which can be replaced by `NaN` to make the missing values labels more consistent.

In [34]:
incidents.replace("unknown", np.nan, inplace=True)

### EAWS Name Convention Compatibility
In further analysis, the dataset will be compared with other data from EAWS sources. Therefore, it is necessary to standardize the features to match those used in other EAWS services.

#### Names of the Avalanche Problems

In [35]:
incidents['danger_problem_text'].dropna().unique()

array(['wet snow', 'wind-drifted snow', 'old snow', 'fresh snow',
       'gliding snow'], dtype=object)

The names of the avalanche problem categories have evolved over the years, and the LAWIS service continues to use the former names, which are no longer compatible with other EAWS systems (such as the avalanche bulletins). Let's update them.

In [36]:
corrections = {
    "fresh snow": "new_snow",
    "wind-drifted snow": "wind_slab",
    "old snow": "persistent_weak_layers",
    "wet snow": "wet_snow",
    "gliding snow": "gliding_snow"
}
incidents['danger_problem_text'] = incidents['danger_problem_text'].replace(corrections).infer_objects(copy=False)

### Adding the Involved Sum
The overview fetched in [Fetching the Avalanche Data](#Fetching-the-Avalanche-Data) section features one important feature missing in the detailed records - `involved_sum`. Let's add it to the dataset.

In [37]:
incidents = incidents.merge(incident_overview[['incident_id', 'involved_sum']], left_on='id', right_on='incident_id',
                            how='left')
incidents = incidents.drop(columns=['incident_id'])
incidents.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4254 entries, 0 to 4253
Data columns (total 55 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   id                                4254 non-null   int64         
 1   valid_time                        4254 non-null   bool          
 2   date                              4254 non-null   datetime64[ns]
 3   reported_date                     4254 non-null   datetime64[ns]
 4   reported_name                     4254 non-null   object        
 5   reported_email                    3811 non-null   object        
 6   involved_dead                     3969 non-null   float64       
 7   involved_injured                  3778 non-null   float64       
 8   involved_uninjured                1741 non-null   float64       
 9   involved_sweeped                  3682 non-null   float64       
 10  involved_buried_partial           3666 non-null 

### Features Name Consistency
The feature `not_buried` compromises the number of involved individuals that weren't buried by the avalanche, therefore for better data consistency it will be renamed to `involved_not_buried`. There is also a typo in the name of the feature `involved_sweeped`, it should be called `involved_swept`.

In [38]:
incidents = incidents.rename(columns={'not_buried': 'involved_not_buried', 'involved_sweeped': 'involved_swept'})

### Fetching the Weather Data
The weather data is provided by Open-Meteo.com, for documentation of the weather variables they provide please refer to their [docs](https://open-meteo.com/en/docs/historical-weather-api).

In this dataset I use the following variables:
| Resulting Feature | Open-Meteo Variable           | Description                                                              |
|-------------------|-------------------------------|--------------------------------------------------------------------------|
| `temp`            | `temperature_2m`              | Air temperature at 2 meters above ground (°C)                            |
| `snow_depth`      | `snow_depth`                  | Snow depth (m)                                                           |
| `temp_mean`       | `temperature_2m_mean`         | Average temperature (°C)                                                 |
| `temp_diff`       | `temperature_2m_mean`         | Difference in average daily temperatures (°C)                            |
| `rain_sum`        | `rain_sum`                    | Total rainfall (mm)                                                      |
| `snow_sum`        | `snowfall_sum`                | Total snowfall (cm)                                                      |
| `wind_speed_mean` | `wind_speed_10m_max`          | Average maximum wind speed without gusts at 10 meters above ground (m/s) |
| `wind_dir_mean`   | `wind_direction_10m_dominant` | Dominant wind direction at 10 meters above ground (°)                    |
| `radiation_sum`   | `shortwave_radiation_sum`     | Total solar radiation (MJ/m²)                                            |

The daily aggregated variables are collected for 3 days before each incident happened.

In [39]:
weather_data = fetch_weather_data(incidents, 'data/cache/weather/incidents_weather.csv')

Fetching weather data for the incident with id: 9349
Fetching weather data for the incident with id: 9350
Fetching weather data for the incident with id: 9379
Fetching weather data for the incident with id: 9353
Fetching weather data for the incident with id: 9354
Fetching weather data for the incident with id: 9355
Fetching weather data for the incident with id: 9347
Fetching weather data for the incident with id: 9364
Fetching weather data for the incident with id: 9365
Fetching weather data for the incident with id: 9366
Fetching weather data for the incident with id: 9372
Fetching weather data for the incident with id: 9370
Fetching weather data for the incident with id: 9371
Fetching weather data for the incident with id: 9377
Fetching weather data for the incident with id: 9380
Fetching weather data for the incident with id: 9351
Fetching weather data for the incident with id: 9373
Fetching weather data for the incident with id: 9348
Fetching weather data for the incident with id

In [40]:
weather_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4254 entries, 0 to 4253
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               4254 non-null   int64  
 1   temp             4254 non-null   float32
 2   snow_depth       4254 non-null   float32
 3   temp_mean        4254 non-null   float32
 4   temp_diff        4254 non-null   float32
 5   rain_sum         4254 non-null   float32
 6   snow_sum         4254 non-null   float32
 7   wind_speed_mean  4254 non-null   float32
 8   wind_dir_mean    4254 non-null   float64
 9   radiation_sum    4254 non-null   float32
dtypes: float32(8), float64(1), int64(1)
memory usage: 199.5 KB


### Merging the Weather Data with Avalanche Incidents
To keep the consistency of the dataset, all new columns containing weather variables will be labeled with a prefix `weather_`

In [41]:
prefix_dict = {col: 'weather_' + col for col in weather_data.drop(columns=['id']).columns}
incidents = incidents.merge(weather_data, on='id', how='left').rename(columns=prefix_dict)
incidents.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4254 entries, 0 to 4253
Data columns (total 64 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   id                                4254 non-null   int64         
 1   valid_time                        4254 non-null   bool          
 2   date                              4254 non-null   datetime64[ns]
 3   reported_date                     4254 non-null   datetime64[ns]
 4   reported_name                     4254 non-null   object        
 5   reported_email                    3811 non-null   object        
 6   involved_dead                     3969 non-null   float64       
 7   involved_injured                  3778 non-null   float64       
 8   involved_uninjured                1741 non-null   float64       
 9   involved_swept                    3682 non-null   float64       
 10  involved_buried_partial           3666 non-null 

### Saving the Raw Data

Since some columns will be removed in the following cells, here is an opportunity to save the raw data to a CSV file for your own further investigation. It has also been decided to further analyze avalanche data from the Tyrol federal state. For reproducibility, the dataset has been frozen to only include historical data until **2024-03-01**. You can save the dataset, including the most recent data from all available countries, for your own analysis.

In [42]:
file_name = ...  # name of the file to save the dataset
# incidents.to_csv(file_name, index=False) # uncomment this line to save the dataset

### Removing Unnecessary Columns
The downloaded data contains many identifiers for individual feature categories. However, these identifiers are used only internally and lack standardization. It also appears that many were added incrementally as the service expanded and more Avalanche Warning Services joined (e.g., Austria has the country ID = 1). Due to their redundancy and lower informational value compared to the text features, these identifiers will be removed from the dataset.

In [43]:
columns_to_drop = list(incidents.filter(like='_id', axis=1).columns)

As the `location_country_code` feature contains the ISO 3166-1 two-letter country code, the `location_country_text` can also be removed.

In [44]:
columns_to_drop.append('location_country_text')

The `involved_equipment` is the residue after converting the individual JSON files and only contains empty arrays and null values.  

In [45]:
columns_to_drop.append('involved_equipment')

To protect privacy, columns containing information about the individuals who reported the incident and their contact details will be removed. Additionally, comments and images of the incidents will be removed, as their analysis is outside the scope of this work.

In [46]:
columns_to_drop += list(incidents.filter(like='reported_', axis=1).columns)
columns_to_drop += ['images', 'comments']

In [47]:
incidents = incidents[(incidents['location_region_text'] == 'Tirol') & (incidents['date'] < '2024-03-01') & (incidents['reported_date'] < '2024-04-01')]

In [48]:
incidents = incidents.drop(columns=columns_to_drop)
incidents.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3060 entries, 4 to 4178
Data columns (total 43 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   id                                3060 non-null   int64         
 1   valid_time                        3060 non-null   bool          
 2   date                              3060 non-null   datetime64[ns]
 3   involved_dead                     2919 non-null   float64       
 4   involved_injured                  2731 non-null   float64       
 5   involved_uninjured                797 non-null    float64       
 6   involved_swept                    2613 non-null   float64       
 7   involved_buried_partial           2646 non-null   float64       
 8   involved_buried_total             2648 non-null   float64       
 9   danger_rating_level               2297 non-null   float64       
 10  danger_rating_text                2323 non-null   obj

#### Reordering the Columns

In [49]:
before = len(incidents.columns)
cols_order = ['id', 'date',  'valid_time', 'location_longitude', 'location_latitude',
              'danger_rating_level',
              'danger_rating_text', 'danger_problem_text', 'involved_sum', 'involved_dead', 'involved_injured',
              'involved_uninjured', 'involved_swept', 'involved_buried_total', 'involved_buried_partial',
              'involved_not_buried',
              'avalanche_extent_length', 'avalanche_extent_width', 'avalanche_breakheight', 'avalanche_type_text',
              'avalanche_size_text', 'location_elevation', 'location_slope_angle', 'location_aspect_text', 'location_name', 'location_country_code', 'location_region_text', 'location_subregion_text',
              'involved_equipment_standard_text', 'involved_equipment_lvs_text', 'involved_equipment_airbag_text',
              'involved_ascent_descent_text', 'avalanche_release_text', 'avalanche_humidity_text',
              'weather_temp', 'weather_snow_depth', 'weather_temp_mean', 'weather_temp_diff', 'weather_rain_sum',
              'weather_snow_sum', 'weather_wind_speed_mean', 'weather_wind_dir_mean', 'weather_radiation_sum']
incidents = incidents[cols_order]
after = len(incidents.columns)
assert (before == after)

#### Avalanche Incidents in Tyrol

In [50]:
incidents = incidents[(incidents['location_region_text'] == 'Tirol') & (incidents['date'] < '2024-03-01')]
incidents

Unnamed: 0,id,date,valid_time,location_longitude,location_latitude,danger_rating_level,danger_rating_text,danger_problem_text,involved_sum,involved_dead,...,avalanche_humidity_text,weather_temp,weather_snow_depth,weather_temp_mean,weather_temp_diff,weather_rain_sum,weather_snow_sum,weather_wind_speed_mean,weather_wind_dir_mean,weather_radiation_sum
4,9354,2018-02-17 11:34:00,True,11.98533,47.27433,3.0,considerable,,0.0,0.0,...,,-1.072000,1.62,-11.436062,12.231251,0.400000,15.330001,1.765809,236.022186,30.590000
5,9355,2018-02-17 12:42:00,True,10.51200,47.04033,3.0,considerable,,2.0,0.0,...,,-5.520000,1.73,-13.941355,11.135417,0.100000,11.830001,2.170215,239.169565,27.330000
6,9347,2018-02-16 09:58:00,True,10.17150,46.88017,3.0,considerable,,2.0,0.0,...,,-5.238000,2.12,-13.038000,9.931250,0.000000,9.170000,1.789226,271.672250,33.340000
11,9370,2018-02-22 14:20:00,True,10.30706,47.10953,2.0,moderate,,2.0,0.0,...,,-11.348000,2.03,-15.169874,-2.739584,0.000000,1.260000,1.921453,140.138434,45.429996
12,9371,2018-02-21 15:24:00,True,10.28800,47.10217,2.0,moderate,,3.0,0.0,...,,-12.007500,2.05,-12.555938,-4.852083,0.000000,2.590000,1.958323,328.446203,40.349998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4172,10871,2024-01-27 14:40:00,True,10.86917,46.93223,2.0,moderate,persistent_weak_layers,2.0,0.0,...,dry,-9.252501,10.14,-7.532708,-8.310416,4.000000,16.309999,4.410570,295.006771,22.520000
4173,10817,2024-01-27 15:25:00,True,11.09482,46.99571,2.0,moderate,persistent_weak_layers,2.0,0.0,...,dry,-11.929500,1.69,-8.196167,-7.350000,5.099999,16.939999,4.362330,305.166596,23.040001
4176,10873,2024-02-29 10:54:00,True,11.07445,46.88392,2.0,moderate,wind_slab,1.0,0.0,...,dry,-3.077000,1.89,-6.460854,2.887499,0.000000,14.140000,3.061700,172.783760,43.250000
4177,10874,2023-02-05 11:00:00,True,11.14106,47.13618,5.0,very high,wind_slab,1.0,0.0,...,dry,-4.789000,1.02,-4.615563,-2.458334,0.000000,26.949999,2.545115,297.204999,20.219999


The `location_region_text` and `location_country_code` are now redundant, because they have the same value for all the filtered records.

In [51]:
incidents = incidents.drop(columns=['location_region_text', 'location_country_code'])

##### Final version of the dataset:

In [52]:
incidents

Unnamed: 0,id,date,valid_time,location_longitude,location_latitude,danger_rating_level,danger_rating_text,danger_problem_text,involved_sum,involved_dead,...,avalanche_humidity_text,weather_temp,weather_snow_depth,weather_temp_mean,weather_temp_diff,weather_rain_sum,weather_snow_sum,weather_wind_speed_mean,weather_wind_dir_mean,weather_radiation_sum
4,9354,2018-02-17 11:34:00,True,11.98533,47.27433,3.0,considerable,,0.0,0.0,...,,-1.072000,1.62,-11.436062,12.231251,0.400000,15.330001,1.765809,236.022186,30.590000
5,9355,2018-02-17 12:42:00,True,10.51200,47.04033,3.0,considerable,,2.0,0.0,...,,-5.520000,1.73,-13.941355,11.135417,0.100000,11.830001,2.170215,239.169565,27.330000
6,9347,2018-02-16 09:58:00,True,10.17150,46.88017,3.0,considerable,,2.0,0.0,...,,-5.238000,2.12,-13.038000,9.931250,0.000000,9.170000,1.789226,271.672250,33.340000
11,9370,2018-02-22 14:20:00,True,10.30706,47.10953,2.0,moderate,,2.0,0.0,...,,-11.348000,2.03,-15.169874,-2.739584,0.000000,1.260000,1.921453,140.138434,45.429996
12,9371,2018-02-21 15:24:00,True,10.28800,47.10217,2.0,moderate,,3.0,0.0,...,,-12.007500,2.05,-12.555938,-4.852083,0.000000,2.590000,1.958323,328.446203,40.349998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4172,10871,2024-01-27 14:40:00,True,10.86917,46.93223,2.0,moderate,persistent_weak_layers,2.0,0.0,...,dry,-9.252501,10.14,-7.532708,-8.310416,4.000000,16.309999,4.410570,295.006771,22.520000
4173,10817,2024-01-27 15:25:00,True,11.09482,46.99571,2.0,moderate,persistent_weak_layers,2.0,0.0,...,dry,-11.929500,1.69,-8.196167,-7.350000,5.099999,16.939999,4.362330,305.166596,23.040001
4176,10873,2024-02-29 10:54:00,True,11.07445,46.88392,2.0,moderate,wind_slab,1.0,0.0,...,dry,-3.077000,1.89,-6.460854,2.887499,0.000000,14.140000,3.061700,172.783760,43.250000
4177,10874,2023-02-05 11:00:00,True,11.14106,47.13618,5.0,very high,wind_slab,1.0,0.0,...,dry,-4.789000,1.02,-4.615563,-2.458334,0.000000,26.949999,2.545115,297.204999,20.219999


In [53]:
pd.DataFrame({
        'Non-empty Values Count': incidents.notna().sum(),
        'Non-empty Percentage': (incidents.notna().mean()*100).round(2),
        'pandas dtype': incidents.dtypes.values,
    })

Unnamed: 0,Non-empty Values Count,Non-empty Percentage,pandas dtype
id,3060,100.0,int64
date,3060,100.0,datetime64[ns]
valid_time,3060,100.0,bool
location_longitude,3060,100.0,float64
location_latitude,3060,100.0,float64
danger_rating_level,2297,75.07,float64
danger_rating_text,2323,75.92,object
danger_problem_text,487,15.92,object
involved_sum,775,25.33,float64
involved_dead,2919,95.39,float64


In [54]:
incidents.to_csv('data/incidents_tirol.csv', index=False)