In [4]:
import numpy as np
import plotly as pl
import pandas as pd
import plotly.express as px
from collections import Counter
import warnings
warnings.filterwarnings("ignore")

# Statistikk over rovbase

In [94]:
df = pd.read_csv('../data/rovbase/rovviltskader.csv')
df['year'] = pd.to_datetime(df['Skadedato_fra']).dt.year

df = df.astype({"year": str}) # må konvertere til string for at de skal vises seperatat på kartet

fig = px.scatter_mapbox(df, lat='latitude', lon='longitude', color='year', opacity=0.8, width=800, height=500,
                        zoom=6, title='Rovviltskader Meråker 2015-2022',
                        mapbox_style="stamen-terrain")
fig.show()

In [3]:
df = df.astype({"Skadearsak": str}) # må konvertere til string for at de skal vises seperatat på kartet
#df2 = df.groupby(['year', 'Skadearsak'])['Skadearsak'].count()

fig = px.bar(df, x='year', color="Skadearsak", barmode = 'stack')
fig.show()

## Hvor mange angrep er det som varer 1 dag, 2 dager og 3 dager

In [4]:
df['Skadedato_fra'] = pd.to_datetime(df['Skadedato_fra']) # må konvertere for å regne ut days diff
df['Skadedato_til'] = pd.to_datetime(df['Skadedato_til'])

diff_days = (df['Skadedato_til'] - df['Skadedato_fra']) / np.timedelta64(1, 'D')

count = Counter(diff_days)
print(count)

d = {'num_days': [], 'occur': []}
for x in count: # for å legge til i en bar chart
    key = x
    value = count[key]
    d['num_days'].append(key)
    d['occur'].append(value)

fig = px.bar(d, x='num_days', y='occur')
fig.update_layout(
    xaxis_title="Dager i forskjell",
    yaxis_title="Antall",
)
fig.update_xaxes(
    dtick=1
)
fig.show()

Counter({1.0: 113, 0.0: 78, 2.0: 44})


# Hvordan oppfører sauene seg på angrepsdag

In [87]:
df2015 = pd.read_csv('../data/kaasa/kaasa_2015.csv')
df2016 = pd.read_csv('../data/kaasa/kaasa_2016.csv')
df2017 = pd.read_csv('../data/kaasa/kaasa_2017.csv')
df2018 = pd.read_csv('../data/kaasa/kaasa_2018.csv')
df2019 = pd.read_csv('../data/kaasa/kaasa_2019.csv')
df2020 = pd.read_csv('../data/kaasa/kaasa_2020.csv')
df2021 = pd.read_csv('../data/kaasa/kaasa_2021.csv')

attack_data = pd.read_csv('../data/rovbase/rovviltskader.csv')

df = pd.concat([df2015, df2016, df2017, df2018, df2019, df2020, df2021], ignore_index=True)

In [133]:
from CloseToAttack import calculate_distance
import statistics
from datetime import timedelta


# Return a Series (single column dataframe) for the features before and after the attack date
def get_features_before_and_after_attack(flokk, sheep_data, day_before, day_after):

    # List of features before and after the attack date
    before_velocities = pd.Series(dtype='int64')
    after_velocities = pd.Series(dtype='int64')

    before_angles = pd.Series(dtype='float64')
    after_angles = pd.Series(dtype='float64')

    before_altitudes = pd.Series(dtype='float64')
    after_altitudes = pd.Series(dtype='float64')

    for ind in flokk: # Loop through to get the velocities, angles and altitudes from every sheep in the flock before and after attack
        before = sheep_data.loc[(sheep_data['date'] == day_before) & (sheep_data['individual'] == ind)]
        after = sheep_data.loc[(sheep_data['date'] == day_after) & (sheep_data['individual'] == ind)]
        
        if len(before) > 0:
            before_velocities = pd.concat([before_velocities, before['velocity']])
            before_angles = pd.concat([before_angles, before['angle']])
            before_altitudes = pd.concat([before_altitudes, before['altitude']])
        if len(after) > 0:
            after_velocities = pd.concat([after_velocities, after['velocity']])
            after_angles = pd.concat([after_angles, after['angle']])
            after_altitudes = pd.concat([after_altitudes, after['altitude']])
    
    return before_velocities, after_velocities, before_angles, after_angles, before_altitudes, after_altitudes


# Return three sperate dataframes for velocity, trajectory angle and altitude 
# and their repecitve means and stdevs on before, attack and after date 
def create_attack_df(sheep_data, attack_data):
    attack_data['Skadedato_fra'] = pd.to_datetime(attack_data['Skadedato_fra'])
    attack_data['Skadedato_til'] = pd.to_datetime(attack_data['Skadedato_til'])

    sheep_data['date'] = pd.to_datetime(sheep_data['date_time']).dt.date # create new column with only date
    
    # id = rovbaseid, num_ind = number of individs in that attack-flock
    df_velocity = pd.DataFrame(columns=['id', 'num_ind', 'mean_velocity', 'std_velocity', 
                                     'before_mean_velocity', 'before_std_velocity', 'after_mean_velocity', 'after_std_velocity'])
    
    df_angle = pd.DataFrame(columns=['id', 'num_ind', 'mean_t_angle', 'std_t_angle', 
                                     'before_mean_t_angle', 'before_std_t_angle', 'after_mean_t_angle', 'after_std_t_angle'])
    
    df_altitude = pd.DataFrame(columns=['id', 'num_ind', 'mean_altitude', 'std_altitude', 
                                     'before_mean_altitude', 'before_std_altitude', 'after_mean_altitude', 'after_std_altitude'])
    
    for a in attack_data.index:
        # Get attack dates and date right before and after the attack
        attack_start_date = attack_data.at[a, 'Skadedato_fra'].date()
        attack_end_date = attack_data.at[a, 'Skadedato_til'].date()

        day_before = attack_start_date - timedelta(days=1)
        day_after = attack_end_date + timedelta(days=1)
        
        # Each index corresponds to a row in the sheep data where date is within the attack dates
        sheep_indexes = sheep_data.loc[(attack_start_date <= sheep_data['date']) & (sheep_data['date'] <= attack_end_date)].index
        
        flokk = [] # the flock in one attack
        velocities = [] # the velocities in the attack from flock
        angles = [] # the trajectory angles in the the attack from flock
        altitudes = [] # the altitudes in the attack from the flock
        
        for sheep in sheep_indexes:
            # Get latitude and longitude for sheep and attack
            sheep_lat = sheep_data.at[sheep, 'latitude']
            sheep_long = sheep_data.at[sheep, 'longitude']
            attack_lat = attack_data.at[a, 'latitude']
            attack_long = attack_data.at[a, 'longitude']

            # Calculate the distance from the sheep to the attack
            distance_to_attack = calculate_distance(sheep_lat, sheep_long, attack_lat, attack_long)
            
            if distance_to_attack <= 1500: # The sheep is nearby the attack
                velocity = sheep_data.at[sheep, 'velocity']
                angle = sheep_data.at[sheep, 'angle']
                altitude = sheep_data.at[sheep, 'altitude']
                
                velocities.append(int(velocity))
                angles.append(float(angle))
                altitudes.append(int(altitude))

                ind = sheep_data.at[sheep, 'individual']
                if ind not in flokk:
                    flokk.append(ind)
        
        if len(flokk) != 0:
            
            before_velocities, after_velocities, before_angles, after_angles, before_altitudes, after_altitudes = get_features_before_and_after_attack(flokk, sheep_data, day_before, day_after)
            
            # On attack date
            mean_velocity = statistics.mean(velocities) 
            mean_angle = statistics.mean(angles)
            mean_altitude = statistics.mean(altitudes)
            std_velocity = statistics.stdev(velocities) if len(velocities) > 2 else 0
            std_angle = statistics.stdev(velocities) if len(velocities) > 2 else 0
            std_altitude = statistics.stdev(velocities) if len(velocities) > 2 else 0

            # On before date
            before_mean_velocity = statistics.mean(before_velocities) if len(before_velocities.index) > 0 else 0 
            before_std_velocity = statistics.stdev(before_velocities) if len(before_velocities.index) > 2 else 0 
            before_mean_angle = statistics.mean(before_angles) if len(before_angles.index) > 0 else 0 
            before_std_angle = statistics.stdev(before_angles) if len(before_angles.index) > 2 else 0 
            before_mean_altitude = statistics.mean(before_altitudes) if len(before_altitudes.index) > 0 else 0 
            before_std_altitude = statistics.stdev(before_altitudes) if len(before_altitudes.index) > 2 else 0 
            
            # On after date
            after_mean_velocity = statistics.mean(after_velocities) if len(after_velocities.index) > 0 else 0 
            after_std_velocity = statistics.stdev(after_velocities) if len(after_velocities.index) > 2 else 0 
            after_mean_angle = statistics.mean(after_angles) if len(after_angles.index) > 0 else 0 
            after_std_angle = statistics.stdev(after_angles) if len(after_angles.index) > 2 else 0 
            after_mean_altitude = statistics.mean(after_altitudes) if len(after_altitudes.index) > 0 else 0 
            after_std_altitude = statistics.stdev(after_altitudes) if len(after_altitudes.index) > 2 else 0             

            rovId = attack_data.at[a, 'RovbaseID']  

            # Add to dataframes
            df_velocity.loc[len(df_velocity.index)] = [rovId, len(flokk), mean_velocity, std_velocity, 
                                                before_mean_velocity, before_std_velocity, after_mean_velocity, after_std_velocity]

            df_angle.loc[len(df_angle.index)] = [rovId, len(flokk), mean_angle, std_angle, 
                                                before_mean_angle, before_std_angle, after_mean_angle, after_std_angle]
            
            df_altitude.loc[len(df_altitude.index)] = [rovId, len(flokk), mean_altitude, std_altitude, 
                                                before_mean_altitude, before_std_altitude, after_mean_altitude, after_std_altitude]
            
            # Reset variables
            flokk = []
            velocities = []
            angles = []
            altitudes = []
            before_velocities = []
            after_velocities = []
            before_angles = []
            after_angles = []
            before_altitudes = []
            after_altitudes = []

    return df_velocity, df_angle, df_altitude

In [131]:
def create_means(df_flokk, feature):
    before = 'before_{feature}'.format(feature=feature)
    after = 'after_{feature}'.format(feature=feature)

    org_mean = df_flokk[feature].mean()
    before_mean = df_flokk[before].mean()
    after_mean = df_flokk[after].mean()
    return [before_mean, org_mean, after_mean]

def create_stdev(df_flokk, feature):
    before = 'before_{feature}'.format(feature=feature)
    after = 'after_{feature}'.format(feature=feature)

    org_stdev = statistics.stdev(df_flokk[feature])
    before_stdev = statistics.stdev(df_flokk[before])
    after_stdev = statistics.stdev(df_flokk[after])
    return [before_stdev, org_stdev, after_stdev]

def create_fig(list, year, feature, text):
    print(list)
    fig = px.bar(list, width=800, height=500, title='{text} {feature} for the day before, during and after attack for {year}'.format(text=text, feature=feature, year=year))
    fig.update_layout(
    xaxis_title="Dager",
    yaxis_title="{text} {feature}".format(text=text, feature=feature),
    xaxis = dict(
        tickmode = 'array',
        tickvals = [0, 1, 2],
        ticktext = ['før', 'angrepsdagen', 'etter']
        )
    )
    fig.show()


## For all data

In [134]:
df_flokk_velocity, df_flokk_angle, df_flokk_altitude = create_attack_df(sheep_data=df, attack_data=attack_data)


In [135]:
df_flokk_velocity = df_flokk_velocity.drop_duplicates(['num_ind', 'mean_velocity', 'std_velocity', 'before_mean_velocity', 'before_std_velocity', 'after_mean_velocity', 'after_std_velocity'], ignore_index=True)
df_flokk_angle = df_flokk_angle.drop_duplicates(['num_ind', 'mean_t_angle', 'std_t_angle', 'before_mean_t_angle', 'before_std_t_angle', 'after_mean_t_angle', 'after_std_t_angle'], ignore_index=True)
df_flokk_altitude = df_flokk_altitude.drop_duplicates(['num_ind',  'mean_altitude', 'std_altitude', 'before_mean_altitude', 'before_std_altitude', 'after_mean_altitude', 'after_std_altitude'], ignore_index=True)

In [136]:
df_flokk_altitude.head(100)

Unnamed: 0,id,num_ind,mean_altitude,std_altitude,before_mean_altitude,before_std_altitude,after_mean_altitude,after_std_altitude
0,K469949,21,226.284848,196.598371,272.283548,80.545739,235.838276,61.564366
1,K469990,51,360.840782,69.116665,375.879024,117.729729,373.401892,122.156154
2,K470128,6,333.571429,53.039988,341.142895,25.170018,330.937436,46.539666
3,K470124,12,339.189873,118.998646,386.581486,78.262065,335.996389,69.19091
4,K470210,11,381.521739,45.153931,331.307818,71.328066,306.338704,81.078419
5,K470432,2,361.181818,83.365788,342.659,5.001423,416.956364,37.732195
6,K470434,22,402.390805,90.785134,419.457885,96.98443,409.106393,78.615341
7,K470689,6,374.954545,54.917727,345.190345,21.356716,357.727742,31.220045
8,K470687,4,349.75,85.414947,350.679,28.146742,319.238696,54.757775
9,K478267,1,399.0,0.0,263.91,37.993308,383.67,23.700108


In [140]:
list_velocity_means = create_means(df_flokk_velocity, 'mean_velocity')
list_angle_means = create_means(df_flokk_angle, 'mean_t_angle')
list_altitude_means = create_means(df_flokk_altitude, 'mean_altitude')

In [138]:
create_fig(list_velocity_means, '2015-2021', feature='velocity', text='Mean')
create_fig(list_angle_means, '2015-2021', feature='angle', text='Mean')
create_fig(list_altitude_means, '2015-2021', feature='altitude', text='Mean')

[105.78455028555472, 203.0800105308803, 127.30106837100212]


[98.67573332456979, 106.83286437038939, 101.06024276141028]


[413.83108178116214, 420.54576048366357, 423.51729446910537]


In [143]:

list_velocity_stdev = create_means(df_flokk_velocity, 'std_velocity')
list_angle_stdev = create_means(df_flokk_angle, 'std_t_angle')
list_altitude_stdev = create_means(df_flokk_altitude, 'std_altitude')

create_fig(list_velocity_stdev, '2015-2021', feature='velocity', text='Standard deviation')
create_fig(list_angle_stdev, '2015-2021', feature='angle', text='Standard deviation')
create_fig(list_altitude_stdev, '2015-2021', feature='altitude', text='Standard deviation')

[158.25183671677306, 143.85818409431113, 197.12220826096956]


[59.346577103937626, 143.85818409431113, 55.668508980756194]


[79.37671810299705, 143.85818409431113, 63.43162737750699]


## For each year

Tar bort 2015, 2019 og 2021 siden de ikke har noen rovdyrangrep.

In [None]:
velocity2016, angle2016, altitude2016 = create_attack_df(sheep_data=df2016, attack_data=attack_data)
velocity2017, angle2017, altitude2017 = create_attack_df(sheep_data=df2017, attack_data=attack_data)
velocity2018, angle2018, altitude2018 = create_attack_df(sheep_data=df2018, attack_data=attack_data)
velocity2020, angle2020, altitude2020 = create_attack_df(sheep_data=df2020, attack_data=attack_data)

In [None]:
# Dropper rader der de er helt like på alt bortsett fra rovbaseID
velocity2016 = velocity2016.drop_duplicates(['num_ind', 'mean_velocity', 'std_velocity', 'before_mean_velocity', 'before_std_velocity', 'after_mean_velocity', 'after_std_velocity'], ignore_index=True)
velocity2017 = velocity2017.drop_duplicates(['num_ind', 'mean_velocity', 'std_velocity', 'before_mean_velocity', 'before_std_velocity', 'after_mean_velocity', 'after_std_velocity'], ignore_index=True)
velocity2018 = velocity2018.drop_duplicates(['num_ind', 'mean_velocity', 'std_velocity', 'before_mean_velocity', 'before_std_velocity', 'after_mean_velocity', 'after_std_velocity'], ignore_index=True)
velocity2020 = velocity2020.drop_duplicates(['num_ind', 'mean_velocity', 'std_velocity', 'before_mean_velocity', 'before_std_velocity', 'after_mean_velocity', 'after_std_velocity'], ignore_index=True)

# Må gjøre det samme for angle og altitude dataframen

In [None]:
list_means2016 = create_means(df_flokk=velocity2016)
list_means2017 = create_means(df_flokk=velocity2017)
list_means2018 = create_means(df_flokk=velocity2018)
list_means2020 = create_means(df_flokk=velocity2020)

create_fig(list_means2016, '2016', 'Mean')
create_fig(list_means2017, '2017', 'Mean')
create_fig(list_means2018, '2018', 'Mean')
create_fig(list_means2020, '2020', 'Mean')

In [None]:
list_stdev2016 = create_stdev(df_flokk=velocity2016)
list_stdev2017 = create_stdev(df_flokk=velocity2017)
list_stdev2018 = create_stdev(df_flokk=velocity2018)
list_stdev2020 = create_stdev(df_flokk=velocity2020)

create_fig(list_stdev2016, '2016', 'Standard deviation')
create_fig(list_stdev2017, '2017', 'Standard deviation')
create_fig(list_stdev2018, '2018', 'Standard deviation')
create_fig(list_stdev2020, '2020', 'Standard deviation')