In [54]:
import pandas as pd
import plotly as pl
import plotly.express as px
import numpy as np
from collections import Counter

colors = px.colors.qualitative.Plotly # [blå, rød, grønn, lilla, oransje, lyseblå, rosa, lysegrønn, lyserosa, gul]

# Rovbase (original)

In [99]:
df = pd.read_csv('../data/rovbase/original/rovviltskader_meraker_2015-2021.csv')

In [100]:
print("Antall rader:", df.shape[0])
print("Antall attributter:", df.shape[1])

Antall rader: 379
Antall attributter: 27


In [6]:
import sys
sys.path.append('../')
from utils import Utm33ToLatLong

df = Utm33ToLatLong.converter(df)

In [10]:
fig = px.scatter_mapbox(df, lat='latitude', lon='longitude', color='predator', opacity=0.8, width=1000, height=500,
                        labels={'predator': 'Predator'},
                        zoom=6, title='Rovviltskader Meråker 2015-2022',
                        mapbox_style="stamen-terrain")
fig.show()

In [97]:
df = pd.read_csv('../data/rovbase/original/rovviltskader_meraker_2015-2021.csv')

count = df['predator'].value_counts()

print('Total number of attacks by predator: \n', count)

df['year'] = pd.to_datetime(df['date_from'], dayfirst=True).dt.year

df = df.astype({"year": str})
df = df.astype({"predator": str})

# count predators per year and predator type
df_count = df.groupby(['year', 'predator']).size().reset_index(name='count')

# sort the data by count in descending order
df_count = df_count.sort_values(by=['year', 'count'], ascending=[True, False])

fig = px.bar(df_count, x='year', y='count', color='predator', barmode = 'stack', height=600,
             labels={'predator': 'Predator', 'count': 'Number of predators', 'year': 'Year'},)

fig.update_layout(yaxis=dict(dtick=10))

fig.show()

Total number of attacks by predator: 
 Bear                          174
Wolf                           86
Unknown                        80
Wolverine                      21
Unknown protected predator     10
Golden Eagle                    2
Red Fox                         1
Disease                         1
Not predator                    1
Lynx                            1
Dog                             1
Accident                        1
Name: predator, dtype: int64


### Varighet på angrep

In [65]:
df = pd.read_csv('../data/rovbase/original/rovviltskader_meraker_2015-2021.csv')

df['date_from'] = pd.to_datetime(df['date_from'], dayfirst=True) # må konvertere for å regne ut days diff
df['date_to'] = pd.to_datetime(df['date_to'], dayfirst=True)

diff_days = (df['date_to'] - df['date_from']) / np.timedelta64(1, 'D')
print(diff_days)
diff_days = abs(diff_days)

0       0.0
1       0.0
2      14.0
3      26.0
4       7.0
       ... 
374     0.0
375     2.0
376     3.0
377     0.0
378    61.0
Length: 379, dtype: float64


In [66]:
print('I gjennomsnitt varer hvert angrep i: ', sum(diff_days)/len(diff_days))

I gjennomsnitt varer hvert angrep i:  4.87335092348285


In [73]:
count = Counter(diff_days)
print(count)

d = {'num_days': [], 'occur': []}
for x in count: # for å legge til i en bar chart
    key = x
    value = count[key]
    d['num_days'].append(key)
    d['occur'].append(value)

fig = px.bar(d, x='num_days', y='occur', title="Duration of attacks", height=600)
fig.update_layout(
    xaxis_title="Days",
    yaxis_title="Attacks",
)
fig.update_xaxes(
    dtick=1
)
fig.update_layout(yaxis=dict(dtick=10), bargap=0.4)

fig.show()

Counter({1.0: 116, 0.0: 78, 2.0: 45, 3.0: 27, 4.0: 18, 5.0: 10, 10.0: 9, 9.0: 9, 16.0: 7, 21.0: 7, 14.0: 6, 8.0: 6, 6.0: 5, 17.0: 5, 20.0: 4, 15.0: 3, 11.0: 3, 13.0: 3, 26.0: 2, 7.0: 2, 31.0: 2, 19.0: 2, 47.0: 1, 12.0: 1, 30.0: 1, 22.0: 1, 29.0: 1, 45.0: 1, 67.0: 1, 62.0: 1, 23.0: 1, 61.0: 1})


### Uncertain attack date

In [92]:
df = pd.read_csv('../data/rovbase/original/rovviltskader_meraker_2015-2021.csv')

count = df['date_uncertain'].value_counts()
total = len(df['date_uncertain'])

print('Number of if the reporter is certain or uncertain about the attack date')
print('Certain:', count.Yes, '(', round(count.Yes/(total)*100, 2), '%)')
print('Uncertain:', count.No, '(', round(count.No/total*100, 2), '%)')

Number of if the reporter is certain or uncertain about the attack date
Certain: 273 ( 72.03 %)
Uncertain: 106 ( 27.97 %)
