In [1]:
#imports

import pandas as pd
import numpy as np
import dataframe_image as dfi
import itertools as it
import category_encoders as ce
from matplotlib import pyplot as plt

In [2]:
final = pd.read_csv('Final Data/final_bees_av.csv').drop(columns=['Unnamed: 0'])

In [3]:
#helper functions to get the correlations in the dataset
def get_redundant_pairs(df):
    pairs_to_drop = set()
    cols = df.columns
    for i in range(0, df.shape[1]):
        for j in range(0, i+1):
            pairs_to_drop.add((cols[i], cols[j]))
    return pairs_to_drop

def get_top_abs_correlations(df, n=5):
    au_corr = df.corr().abs().unstack()
    labels_to_drop = get_redundant_pairs(df)
    au_corr = au_corr.drop(labels=labels_to_drop).sort_values(ascending=False)
    return au_corr[0:n]

In [4]:
# finding the 20 highest absolute correlations is the dataset
corr = get_top_abs_correlations(final.drop(columns=['Species', 'Total', 'Present']), 20)

corr = corr.reset_index()

corr.columns = ['Feature 1', 'Feature 2', 'Absolute Correlation']

dfi.export(corr, 'Plots/corr.png')

corr

Unnamed: 0,Feature 1,Feature 2,Absolute Correlation
0,Loose_bark,Crevices,0.901258
1,Loose_bark,Hedgerows,0.884731
2,Crevices,RoughAreas,0.856343
3,BranchHols,Loose_bark,0.853707
4,Crevices,Hedgerows,0.825599
5,Hedgerows,RoughAreas,0.823497
6,Loose_bark,RoughAreas,0.786677
7,BranchHols,Crevices,0.785373
8,Crevices,Ponds,0.784324
9,FungalFrts,RoughAreas,0.780378


In [5]:
# finding the percentage of observations is which species was labelled with true
en = ce.OneHotEncoder(cols = 'Species', use_cat_names=True)
final_en = en.fit_transform(final)

species = final_en.filter(regex='^Species',axis=1)
species.columns = species.columns.str.replace('Species_', '')

species = final[['Species', 'Present']]

percent = species.groupby(['Species']).mean().sort_values(['Present'], ascending=False).reset_index()

dfi.export(percent, 'Plots/percent.png')

percent

Unnamed: 0,Species,Present
0,Buff-tailed Bumblebee,0.746711
1,Common Carder,0.721217
2,Red-tailed Bumblebee,0.664474
3,a white-tailed bumblebee,0.650493
4,Early Bumblebee,0.530428
5,Indet. Bee,0.526316
6,Garden Bumblebee,0.485197
7,White/Buff-tailed Bumblebee workers,0.445724
8,Tree Bumblebee,0.425164
9,Honeybee,0.381579
