In [1]:
# Install dependencies
%pip install pennylane
%pip install qiskit qiskit_machine_learning
%pip install numpy pandas matplotlib scipy scikit-learn
%pip install folium
%pip install ipywidgets

Note: you may need to restart the kernel to use updated packages.
Collecting qiskit_machine_learning
  Downloading qiskit_machine_learning-0.8.0-py3-none-any.whl.metadata (13 kB)
Collecting scikit-learn>=1.2.0 (from qiskit_machine_learning)
  Downloading scikit_learn-1.5.2-cp312-cp312-macosx_10_9_x86_64.whl.metadata (13 kB)
Collecting fastdtw (from qiskit_machine_learning)
  Downloading fastdtw-0.3.4.tar.gz (133 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting joblib>=1.2.0 (from scikit-learn>=1.2.0->qiskit_machine_learning)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn>=1.2.0->qiskit_machine_learning)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading qiskit_machine_learning-0.8.0-py3-none-any.whl (237 kB)
Downloading scikit_learn-1.5.2-cp312-cp312-macosx_10_9_x

# Project - Quantum Approach to Biodiversity Mapping & Predictions

Figma [here](https://www.figma.com/board/YJkl666NgYY9lzeGnKZ1lw/Quantum-Hackathon-2024?node-id=0-1&node-type=canvas&t=7ul1ZMUwhKcWZdU6-0)

Goals:  
1.  
2.  

In [13]:
# Import libraries
import numpy as np
import pandas as pd
import pennylane as qml
import qiskit
import zipfile
import io
import os
import folium
import ipywidgets as widgets
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from qiskit import QuantumCircuit, QuantumRegister, ClassicalRegister
from qiskit.circuit.library import ZZFeatureMap, RealAmplitudes
from qiskit_machine_learning.algorithms import VQR
from qiskit_machine_learning.circuit.library import RawFeatureVector
from folium.plugins import TimestampedGeoJson, MarkerCluster
from IPython.display import display
from ipywidgets import interact, IntSlider, Select, Layout


# Data

[BioTIME database](https://zenodo.org/records/5026943#.Y9ZAKdJBwUE)

Data Citation:
Dornelas M, Antão LH, Moyes F, Bates, AE, Magurran, AE, et al. BioTIME: A database of biodiversity time series for the Anthropocene. Global Ecol Biogeogr. 2018; 27:760 - 786. https://doi.org/10.1111/geb.12729

## Load Data

metadata and biodiversity data
see [here](https://towardsdatascience.com/different-ways-to-connect-google-drive-to-a-google-colab-notebook-pt-1-de03433d2f7a)
effectively need to upload the data to gdrive as collab storage is transient

In [3]:
metadata_df  = pd.read_csv("BioTIMEMetadata_24_06_2021.csv", encoding='latin1')
metadata_df.head()

Unnamed: 0,STUDY_ID,REALM,CLIMATE,GENERAL_TREAT,TREATMENT,TREAT_COMMENTS,TREAT_DATE,HABITAT,PROTECTED_AREA,BIOME_MAP,...,WEB_LINK,DATA_SOURCE,METHODS,SUMMARY_METHODS,LINK_ID,COMMENTS,DATE_STUDY_ADDED,ABUNDANCE_TYPE,BIOMASS_TYPE,SAMPLE_DESC_NAME
0,10,Terrestrial,Temperate,,,,,Woodland,False,Temperate broadleaf and mixed forests,...,http://esapubs.org/archive/ecol/E082/011/defau...,Ecology,Itasca State Park. Minnesota. is a 13 000-ha f...,Plots,,Dates added,Oct-12,Count,,lat_long_treefallid_controlvs.treatment_basevs...
1,18,Terrestrial,Temperate,,,,,Sagebrush steppe,False,Deserts and xeric shrublands,...,http://esapubs.org/archive/ecol/E091/243/defau...,Ecology,Site descriptionThe U.S. Sheep Experiment Stat...,Plots,,Dates added Years (1923. 1926. 1927. 1929) del...,Oct-12,Count,,lat_long_quadrat_year
2,33,Marine,Temperate,,,,,Seaweed beds,False,Temperate shelf and seas ecoregions,...,http://plankt.oxfordjournals.org/content/32/5.toc,Oxford Journals,Sampling and enumeration of phytoplanktonWeekl...,Stations,,Inaccurate decimal latitude/longitude conversi...,Oct-12,Count,,lat_long_location_method_date
3,39,Terrestrial,Temperate,,,,,Deciduous forest,False,Temperate broadleaf and mixed forests,...,http://www.esajournals.org/toc/emon/56/3,Ecology,This study was conducted in the Hubbard Brook ...,Plots,,Removed records prior to 1986 as per provider ...,Oct-12,Density,,lat_long_timeTransect_year
4,41,Terrestrial,Temperate,,,,,Woodland,False,Temperate broadleaf and mixed forests,...,http://www.esajournals.org/toc/ecol/41/4,Ecology,Time and space and the variation of species - ...,Counts,,Dates added,Oct-12,Count,,lat_long_census_year


In [5]:
with zipfile.ZipFile("BioTIMEQuery_24_06_2021.zip") as z:
    print(z.namelist())
    with z.open("BioTIMEQuery_24_06_2021.csv") as f:
        data_df = pd.read_csv(f, encoding='latin1')
# data_df = pd.read_csv("BioTIMEQuery_24_06_2021.csv", encoding='latin1')
data_df.head()

['BioTIMEQuery_24_06_2021.csv']


  data_df = pd.read_csv(f, encoding='latin1')


Unnamed: 0.1,Unnamed: 0,STUDY_ID,DAY,MONTH,YEAR,SAMPLE_DESC,PLOT,ID_SPECIES,LATITUDE,LONGITUDE,sum.allrawdata.ABUNDANCE,sum.allrawdata.BIOMASS,GENUS,SPECIES,GENUS_SPECIES
0,1,10,,,1984,47.400000_-95.120000_12_Control_0_Medium,12,22,47.4,-95.12,1.0,0.0,Acer,rubrum,Acer rubrum
1,2,10,,,1984,47.400000_-95.120000_12_Control_0_Medium,12,23,47.4,-95.12,3.0,0.0,Acer,saccharum,Acer saccharum
2,3,10,,,1984,47.400000_-95.120000_12_Control_0_Medium,12,24,47.4,-95.12,1.0,0.0,Acer,spicatum,Acer spicatum
3,4,10,,,1984,47.400000_-95.120000_12_Control_0_Medium,12,607,47.4,-95.12,12.0,0.0,Corylus,cornuta,Corylus cornuta
4,5,10,,,1984,47.400000_-95.120000_12_Control_0_Small,12,1911,47.4,-95.12,1.0,0.0,Populus,pinnata,Populus pinnata


In [6]:
data_df.tail()

Unnamed: 0.1,Unnamed: 0,STUDY_ID,DAY,MONTH,YEAR,SAMPLE_DESC,PLOT,ID_SPECIES,LATITUDE,LONGITUDE,sum.allrawdata.ABUNDANCE,sum.allrawdata.BIOMASS,GENUS,SPECIES,GENUS_SPECIES
8552244,26178100,548,,,2007,49.1014548954342_13.3200349605548_T3_56_2007,T3_56,49340,49.10146,13.32004,3.0,,Vaccinium,vitis.idaea,Vaccinium vitis.idaea
8552245,26179100,548,,,2009,49.1014548954342_13.3200349605548_T3_56_2009,T3_56,49340,49.10146,13.32004,4.0,,Vaccinium,vitis.idaea,Vaccinium vitis.idaea
8552246,26180100,548,,,2012,49.1014548954342_13.3200349605548_T3_56_2012,T3_56,49340,49.10146,13.32004,3.0,,Vaccinium,vitis.idaea,Vaccinium vitis.idaea
8552247,26181100,548,,,2007,49.097317976565_13.3173542074378_T3_51_2007,T3_51,40355,49.09732,13.31735,10.0,,Veronica,chamaedrys,Veronica chamaedrys
8552248,26182100,548,,,2011,49.1014548954342_13.3200349605548_T3_56_2011,T3_56,40355,49.10146,13.32004,4.0,,Veronica,chamaedrys,Veronica chamaedrys


In [7]:
data_df.columns = data_df.columns.str.strip()
data_df.describe()

Unnamed: 0.1,Unnamed: 0,STUDY_ID,DAY,MONTH,YEAR,ID_SPECIES,LATITUDE,LONGITUDE,sum.allrawdata.ABUNDANCE,sum.allrawdata.BIOMASS
count,8552249.0,8552249.0,7108710.0,7132111.0,8552249.0,8552249.0,8552249.0,8552249.0,8552126.0,8525108.0
mean,4335884.0,224.5117,14.42245,6.520774,1993.227,17677.08,28.36394,-40.49484,10956.08,6540004.0
std,2959943.0,105.1472,9.120689,2.808826,15.22319,15863.67,31.31463,90.91774,2312769.0,9405228000.0
min,1.0,10.0,1.0,1.0,1874.0,1.0,-88.98167,-180.0,0.0,0.0
25%,2138366.0,148.0,6.0,5.0,1987.0,4975.0,27.31,-96.58333,1.0,0.0
50%,4276428.0,195.0,14.0,7.0,1996.0,8606.0,39.08333,-71.7,1.0,0.0
75%,6424869.0,302.0,22.0,8.0,2003.0,36683.0,46.03832,-3.6876,6.0,0.6
max,85461000.0,548.0,31.0,12.0,2018.0,52064.0,89.7,180.0,6400000000.0,27237600000000.0


# Visualisation

In [8]:
# gather useful aggregates
default_lat = data_df['LATITUDE'].mean()
default_lon = data_df['LONGITUDE'].mean()

In [14]:
def update_map(year, species_filter='All Species'):

    filtered_data = data_df[data_df['YEAR'] == year].copy()
    
    # species filter if not "All Species"
    if species_filter != 'All Species':
        filtered_data = filtered_data[filtered_data['GENUS_SPECIES'] == species_filter]
    
    # Use the mean of valid coordinates, if no valid data points -> use default center
    if len(filtered_data) == 0:
        map_center = [default_lat, default_lon]
    else:
        map_center = [filtered_data['LATITUDE'].mean(), filtered_data['LONGITUDE'].mean()]

    m = folium.Map(location=map_center, zoom_start=4)
    
    # add markers with data
    marker_cluster = MarkerCluster().add_to(m)
    for idx, row in filtered_data.iterrows():
        # for debugging for now, can make it look nicer later
        popup_content = f"""
            <b>Species:</b> {row['GENUS_SPECIES']}<br>
            <b>Abundance:</b> {row['sum.allrawdata.ABUNDANCE']}<br>
            <b>Biomass:</b> {row['sum.allrawdata.BIOMASS']}<br>
            <b>Plot:</b> {row['PLOT']}<br>
            <b>Location:</b> ({row['LATITUDE']}, {row['LONGITUDE']})
        """
        folium.Marker(
            location=[row['LATITUDE'], row['LONGITUDE']],
            popup=folium.Popup(popup_content, max_width=300),
            tooltip=row['GENUS_SPECIES']
        ).add_to(marker_cluster)
    
    title_html = f'''
        <div style="position: fixed; 
                    top: 10px; 
                    left: 50px; 
                    width: 300px; 
                    height: 30px; 
                    z-index:9999; 
                    background-color: white; 
                    font-size:16px;
                    font-weight: bold;
                    padding: 5px;
                    border-radius: 5px;
                    border: 2px solid gray;">
                Species Distribution Map {year} ({len(filtered_data)} locations)
        </div>
    '''
    m.get_root().html.add_child(folium.Element(title_html))
    return m
        

In [15]:


def create_map(data_df):

    year_slider = IntSlider(
        min=int(data_df['YEAR'].min()),
        max=int(data_df['YEAR'].max()),
        step=1,
        description='Year',
        value=int(data_df['YEAR'].min()),
        layout=Layout(width='800px') 
    )
    
    # species filter - ignore rows that have no valid lat/long
    valid_data = data_df.dropna(subset=['LATITUDE', 'LONGITUDE'])
    species_list = ['All Species'] + sorted(valid_data['GENUS_SPECIES'].unique().tolist())
    species_dropdown = Select(
        options=species_list,
        description='Species:',
        value='All Species'
    )
    
    return interact(update_map, year=year_slider, species_filter=species_dropdown)

In [None]:
create_map(data_df)

interactive(children=(IntSlider(value=1874, description='Year', layout=Layout(width='800px'), max=2018, min=18…

<function __main__.update_map(year, species_filter='All Species')>

## Quantum Processing

In [17]:
n_qubits = 4
n_layers = 2
scaler = MinMaxScaler()

In [18]:
def prepare_data(data_df):
    # scaling/select relevant features
    features = ['LATITUDE', 'LONGITUDE', 'YEAR','sum.allrawdata.ABUNDANCE']
    
    # clean
    processed_data = data_df[features].dropna()
    
    base_year = processed_data['YEAR'].min()
    processed_data['YEARS_SINCE_START'] = processed_data['YEAR'] - base_year
    
    # normalise
    X = processed_data[['LATITUDE', 'LONGITUDE', 'YEARS_SINCE_START']]
    y = processed_data['sum.allrawdata.ABUNDANCE']
    
    X_scaled = scaler.fit_transform(X)
    y_scaled = MinMaxScaler().fit_transform(y.values.reshape(-1, 1))
    
    return X_scaled, y_scaled

In [19]:
def create_pennylane_circuit():
    # a PennyLane quantum circuit (variational model) for classification
    # see https://pennylane.ai/qml/demos/tutorial_variational_classifier/

    dev = qml.device("default.qubit", wires=n_qubits)
    
    @qml.qnode(dev)
    def circuit(inputs, weights):
        # encode inputs
        for i in range(n_qubits):
            qml.RY(inputs[i % len(inputs)], wires=i)
        
        # variational layers
        for layer in range(n_layers):
            # entangling layer
            for i in range(n_qubits):
                qml.RZ(weights[layer, i], wires=i)
                
            for i in range(n_qubits - 1):
                qml.CNOT(wires=[i, i + 1])
            
            # rotation layer
            for i in range(n_qubits):
                qml.RY(weights[layer + n_layers, i], wires=i)
        
        return qml.expval(qml.PauliZ(0))
    
    return circuit

In [21]:
def train_pennylane_model(X_train, y_train, n_epochs=100):

    # train the model
    circuit = create_pennylane_circuit()

    weights = np.random.uniform(
        low=-np.pi, 
        high=np.pi, 
        size=(2 * n_layers, n_qubits)
    )

    # optimiser https://docs.pennylane.ai/en/stable/code/api/pennylane.GradientDescentOptimizer.html
    opt = qml.GradientDescentOptimizer(stepsize=0.01)

    # training loop
    losses = []
    for epoch in range(n_epochs):
        batch_loss = 0
        for X_batch, y_batch in zip(X_train, y_train):
            # forwards
            prediction = circuit(X_batch, weights)
            loss = np.abs(prediction - y_batch[0])
            
            # back
            weights = opt.step(lambda w: circuit(X_batch, w), weights)
            
            batch_loss += loss
            
        avg_loss = batch_loss / len(X_train)
        losses.append(avg_loss)
        
        if epoch % 10 == 0:
            print(f"Epoch {epoch}: Loss = {avg_loss:.4f}")
            
    return weights, losses

In [22]:
# if we wanna use qiskit instead/penny lane doesn't work
def train_qiskit_model(X_train, y_train, n_epochs=100):

    # https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.VQR.html
    # or something else? this seems suitable if penny lane doesnt work
   

SyntaxError: incomplete input (1984441808.py, line 6)

In [17]:
def predict_future_abundance(trained_model, location, years_ahead):
        
    future_predictions = []
    base_input = np.array([
        location[0],  # lat
        location[1],  # long
        scaler.transform([[years_ahead]])[0][0]  # scaled future year
    ])
    
    prediction = trained_model.predict([base_input])[0]
    return prediction

In [28]:
X_scaled, y_scaled = prepare_data(data_df)

# split into test/train subsets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=123)

# train
penny_lane_model = train_pennylane_model(X_train, y_train)

# prediction
location = [47.4, -95.12] # random lat /long - need to plot these probably
years_ahead = 5
prediction = predict_future_abundance(qiskit_model, location, years_ahead)

