<a href="https://colab.research.google.com/github/lazycloud0/quantum_hackathon_2024/blob/main/quantumcomputing_hackathon_2024.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Installing dependencies
!pip install pennylane
!pip install qiskit
!pip install numpy pandas matplotlib scipy
!pip install folium
!pip install ipywidgets





# Project - Quantum Approach to Biodiversity Mapping & Predictions

Figma [here](https://www.figma.com/board/YJkl666NgYY9lzeGnKZ1lw/Quantum-Hackathon-2024?node-id=0-1&node-type=canvas&t=7ul1ZMUwhKcWZdU6-0)

Goals:
1.  
2.  

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import pennylane as qml
import qiskit as qk
import zipfile
import io
import os
import folium
import ipywidgets as widgets




In [None]:
# will store the files on google drive then fetch rather than linking or uploading here
# as collab is a temp vm so the file will need to be reuploaded everytime
from pydrive2.auth import GoogleAuth
from google.colab import drive
from pydrive2.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
from folium.plugins import TimestampedGeoJson
from IPython.display import display


# Data

[BioTIME database](https://zenodo.org/records/5026943#.Y9ZAKdJBwUE)

Data Citation:
Dornelas M, Antão LH, Moyes F, Bates, AE, Magurran, AE, et al. BioTIME: A database of biodiversity time series for the Anthropocene. Global Ecol Biogeogr. 2018; 27:760 - 786. https://doi.org/10.1111/geb.12729

## Load Data

metadata and biodiversity data
see [here](https://towardsdatascience.com/different-ways-to-connect-google-drive-to-a-google-colab-notebook-pt-1-de03433d2f7a)
effectively need to upload the data to gdrive as collab storage is transient

In [None]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

metadata_file_id = '1s6WdiWhEcj5rNAunzz-M6yYV0nREhdJM'
zip_data_file_id = '156DLnRJFfUhwcKnz8V3mqGuiO0VEqIHC'

meta_data_download = drive.CreateFile({'id': metadata_file_id})
meta_data_download.GetContentFile('BioTIMEMetadata_24_06_2021.csv')

file_download = drive.CreateFile({'id': zip_data_file_id})
file_download.GetContentFile('BioTIMEQuery_24_06_2021.zip')

In [None]:
metadata_df  = pd.read_csv("BioTIMEMetadata_24_06_2021.csv", encoding='latin1')
metadata_df.head()

Unnamed: 0,STUDY_ID,REALM,CLIMATE,GENERAL_TREAT,TREATMENT,TREAT_COMMENTS,TREAT_DATE,HABITAT,PROTECTED_AREA,BIOME_MAP,...,WEB_LINK,DATA_SOURCE,METHODS,SUMMARY_METHODS,LINK_ID,COMMENTS,DATE_STUDY_ADDED,ABUNDANCE_TYPE,BIOMASS_TYPE,SAMPLE_DESC_NAME
0,10,Terrestrial,Temperate,,,,,Woodland,False,Temperate broadleaf and mixed forests,...,http://esapubs.org/archive/ecol/E082/011/defau...,Ecology,Itasca State Park. Minnesota. is a 13 000-ha f...,Plots,,Dates added,Oct-12,Count,,lat_long_treefallid_controlvs.treatment_basevs...
1,18,Terrestrial,Temperate,,,,,Sagebrush steppe,False,Deserts and xeric shrublands,...,http://esapubs.org/archive/ecol/E091/243/defau...,Ecology,Site descriptionThe U.S. Sheep Experiment Stat...,Plots,,Dates added Years (1923. 1926. 1927. 1929) del...,Oct-12,Count,,lat_long_quadrat_year
2,33,Marine,Temperate,,,,,Seaweed beds,False,Temperate shelf and seas ecoregions,...,http://plankt.oxfordjournals.org/content/32/5.toc,Oxford Journals,Sampling and enumeration of phytoplanktonWeekl...,Stations,,Inaccurate decimal latitude/longitude conversi...,Oct-12,Count,,lat_long_location_method_date
3,39,Terrestrial,Temperate,,,,,Deciduous forest,False,Temperate broadleaf and mixed forests,...,http://www.esajournals.org/toc/emon/56/3,Ecology,This study was conducted in the Hubbard Brook ...,Plots,,Removed records prior to 1986 as per provider ...,Oct-12,Density,,lat_long_timeTransect_year
4,41,Terrestrial,Temperate,,,,,Woodland,False,Temperate broadleaf and mixed forests,...,http://www.esajournals.org/toc/ecol/41/4,Ecology,Time and space and the variation of species - ...,Counts,,Dates added,Oct-12,Count,,lat_long_census_year


In [None]:
with zipfile.ZipFile("/content/BioTIMEQuery_24_06_2021.zip") as z:
    print(z.namelist())
    with z.open("BioTIMEQuery_24_06_2021.csv") as f:
        data_df = pd.read_csv(f, encoding='latin1')

data_df.head()

['BioTIMEQuery_24_06_2021.csv']


  data_df = pd.read_csv(f, encoding='latin1')


Unnamed: 0.1,Unnamed: 0,STUDY_ID,DAY,MONTH,YEAR,SAMPLE_DESC,PLOT,ID_SPECIES,LATITUDE,LONGITUDE,sum.allrawdata.ABUNDANCE,sum.allrawdata.BIOMASS,GENUS,SPECIES,GENUS_SPECIES
0,1,10,,,1984,47.400000_-95.120000_12_Control_0_Medium,12,22,47.4,-95.12,1.0,0.0,Acer,rubrum,Acer rubrum
1,2,10,,,1984,47.400000_-95.120000_12_Control_0_Medium,12,23,47.4,-95.12,3.0,0.0,Acer,saccharum,Acer saccharum
2,3,10,,,1984,47.400000_-95.120000_12_Control_0_Medium,12,24,47.4,-95.12,1.0,0.0,Acer,spicatum,Acer spicatum
3,4,10,,,1984,47.400000_-95.120000_12_Control_0_Medium,12,607,47.4,-95.12,12.0,0.0,Corylus,cornuta,Corylus cornuta
4,5,10,,,1984,47.400000_-95.120000_12_Control_0_Small,12,1911,47.4,-95.12,1.0,0.0,Populus,pinnata,Populus pinnata


In [None]:
#data_df.shape
data_df.tail()

Unnamed: 0.1,Unnamed: 0,STUDY_ID,DAY,MONTH,YEAR,SAMPLE_DESC,PLOT,ID_SPECIES,LATITUDE,LONGITUDE,sum.allrawdata.ABUNDANCE,sum.allrawdata.BIOMASS,GENUS,SPECIES,GENUS_SPECIES
8552244,26178100,548,,,2007,49.1014548954342_13.3200349605548_T3_56_2007,T3_56,49340,49.10146,13.32004,3.0,,Vaccinium,vitis.idaea,Vaccinium vitis.idaea
8552245,26179100,548,,,2009,49.1014548954342_13.3200349605548_T3_56_2009,T3_56,49340,49.10146,13.32004,4.0,,Vaccinium,vitis.idaea,Vaccinium vitis.idaea
8552246,26180100,548,,,2012,49.1014548954342_13.3200349605548_T3_56_2012,T3_56,49340,49.10146,13.32004,3.0,,Vaccinium,vitis.idaea,Vaccinium vitis.idaea
8552247,26181100,548,,,2007,49.097317976565_13.3173542074378_T3_51_2007,T3_51,40355,49.09732,13.31735,10.0,,Veronica,chamaedrys,Veronica chamaedrys
8552248,26182100,548,,,2011,49.1014548954342_13.3200349605548_T3_56_2011,T3_56,40355,49.10146,13.32004,4.0,,Veronica,chamaedrys,Veronica chamaedrys


# Visualisation

In [None]:
map_center = [data_df['LATITUDE'].mean(), data_df['LONGITUDE'].mean()]
m = folium.Map(location=map_center, zoom_start=2)
m

In [None]:
data_df.info()
filtered_df = data_df[data_df['YEAR']==2012]
filtered_df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8552249 entries, 0 to 8552248
Data columns (total 15 columns):
 #   Column                    Dtype  
---  ------                    -----  
 0   Unnamed: 0                int64  
 1   STUDY_ID                  int64  
 2   DAY                       float64
 3   MONTH                     float64
 4   YEAR                      int64  
 5   SAMPLE_DESC               object 
 6   PLOT                      object 
 7   ID_SPECIES                int64  
 8   LATITUDE                  float64
 9   LONGITUDE                 float64
 10  sum.allrawdata.ABUNDANCE  float64
 11  sum.allrawdata.BIOMASS    float64
 12  GENUS                     object 
 13  SPECIES                   object 
 14  GENUS_SPECIES             object 
dtypes: float64(6), int64(4), object(5)
memory usage: 978.7+ MB


Unnamed: 0.1,Unnamed: 0,STUDY_ID,DAY,MONTH,YEAR,SAMPLE_DESC,PLOT,ID_SPECIES,LATITUDE,LONGITUDE,sum.allrawdata.ABUNDANCE,sum.allrawdata.BIOMASS,GENUS,SPECIES,GENUS_SPECIES
27872,27873,39,,,2012,43.91_-71.75_2012,,40021,43.91000,-71.75000,7.0,,Hermit,Thrush,Hermit Thrush
27873,27874,39,,,2012,43.91_-71.75_2012,,40189,43.91000,-71.75000,1.0,,Swainsons,Thrush,Swainsons Thrush
27874,27875,39,,,2012,43.91_-71.75_2012,,40318,43.91000,-71.75000,5.5,,Yellow-bellied,Sapsucker,Yellow-bellied Sapsucker
27875,27876,39,,,2012,43.91_-71.75_2012,,40321,43.91000,-71.75000,2.0,,Yellow-throated,Warbler,Yellow-throated Warbler
27876,27877,39,,,2012,43.91_-71.75_2012,,4115,43.91000,-71.75000,0.2,,Mniotilta,varia,Mniotilta varia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8552228,26162100,548,,,2012,49.1024831991745_13.3076131123953_LAO_7_2012,LAO_7,49339,49.10248,13.30761,30.0,,Vaccinium,myrtillus,Vaccinium myrtillus
8552234,26168100,548,,,2012,49.1028849866041_13.3073060401933_LAO_6_2012,LAO_6,49339,49.10289,13.30731,5.0,,Vaccinium,myrtillus,Vaccinium myrtillus
8552240,26174100,548,,,2012,49.103286863137_13.3069989654415_LAO_5_2012,LAO_5,49339,49.10329,13.30700,20.0,,Vaccinium,myrtillus,Vaccinium myrtillus
8552243,26177100,548,,,2012,49.0981453488146_13.3178902393319_T3_52_2012,T3_52,49340,49.09815,13.31789,0.5,,Vaccinium,vitis.idaea,Vaccinium vitis.idaea


In [None]:
for i, row in filtered_df.iterrows():
    folium.CircleMarker(
        location=(row['LATITUDE'], row['LONGITUDE']),
        radius=row['sum.allrawdata.ABUNDANCE'] ** 0.5,
        popup=(f"Biomass: {row['sum.allrawdata.BIOMASS']}<br>Abundance: {row['sum.allrawdata.ABUNDANCE']}"),
        color='blue',
        fill=True,
        fill_color='cyan',
        fill_opacity=0.3
    ).add_to(m)
m

In [43]:
# Time slider in 5-year intervals
year_slider = widgets.IntSlider(
    value=2024,  # Default value
    min=1980,    # Minimum year
    max=2050,    # Maximum year
    step=5,      # Step size of 5 years
    description='Year:',
    continuous_update=False,
    layout=widgets.Layout(width='800px') # Set the width of the slider
)

# Output widget to display data
output = widgets.Output()

# Function to handle slider value changes
def on_value_change(change):
  with output:
        clear_output()  # Clear previous output
        selected_year = change["new"]
        print(f'Year selected: {selected_year}')
        print(data_df[selected_year])  # Display the corresponding data

# Attach the function to the slider
year_slider.observe(on_value_change, names='value')

# Display the slider
display(year_slider, output)



IntSlider(value=2024, continuous_update=False, description='Year:', layout=Layout(width='800px'), max=2050, mi…

Output()