In [1]:
import pandas as pd
import numpy as np
import datetime

In [3]:
!pip install pyarrow

Collecting pyarrow
  Downloading pyarrow-3.0.0-cp39-cp39-manylinux2014_x86_64.whl (20.7 MB)
[K     |████████████████████████████████| 20.7 MB 79 kB/s  eta 0:00:01    |█▋                              | 1.1 MB 267 kB/s eta 0:01:14     |█████████████████████████▋      | 16.6 MB 260 kB/s eta 0:00:16
Installing collected packages: pyarrow
Successfully installed pyarrow-3.0.0


In [4]:
dateBank = pd.read_parquet(path="myDate.parquet.gzip")
isotopeBank = pd.read_excel(io="Isotope.xlsx", engine="openpyxl")

In [5]:
sampleData = isotopeBank[isotopeBank["station"] == "AKH"]
sampleData["time"] = datetime.datetime.strptime('00:00:00', '%H:%M:%S').time()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sampleData["time"] = datetime.datetime.strptime('00:00:00', '%H:%M:%S').time()


In [6]:
def calculate_result(dateBank=None, data=None, values=None, index=None, aggfunc=None, weighted_parameter=None):
    
    data = pd.merge(
        left=dateBank[(dateBank["date_gregorian"].dt.year >= data["date_gregorian"].dt.year.min()) & (dateBank["date_gregorian"].dt.year <= data["date_gregorian"].dt.year.max())],
        right=data,
        how="left",
        on=["date_gregorian", "time"]
    )
    
    if weighted_parameter is not None:
        data = data.dropna(subset=[weighted_parameter])
        weighted_average_fun = lambda rows: np.average(rows, weights=data.loc[rows.index, weighted_parameter])
        if "wa_func" in aggfunc:
            aggfunc = [weighted_average_fun if x=="wa_func" else x for x in aggfunc]
    
    result = data.pivot_table(
        values=values,
        index=index,
        aggfunc={values[i]: aggfunc[i] for i in range(len(values))}
    ).reset_index()
    
    result["d_excess"] = result["hydrogen_isotope_values"] - 8 * result["oxygen_isotope_values"]
    
    return result

In [7]:
calculate_result(
    dateBank=dateBank,
    data=sampleData,
    values=["precipitation", "hydrogen_isotope_values", "oxygen_isotope_values"], 
    index=["year", "month"],
    aggfunc=[sum, "wa_func", "wa_func"],
    weighted_parameter="precipitation"
)

Unnamed: 0,year,month,hydrogen_isotope_values,oxygen_isotope_values,precipitation,d_excess
0,1396,9,-19.863804,-3.324993,7.0,6.736141
1,1396,10,-76.161552,-10.762782,3.0,9.940704
2,1396,11,-35.95989,-5.50239,17.0,8.059228
3,1396,12,-25.872245,-4.325062,38.0,8.72825
4,1397,1,-28.620348,-5.240343,24.5,13.302398
5,1397,2,-61.033685,-8.095799,51.0,3.732707
6,1397,3,4.594665,0.270359,12.5,2.43179
7,1397,7,-58.903957,-8.915337,30.0,12.418742
8,1397,8,-18.522175,-2.758937,30.5,3.549321
9,1397,11,-29.545536,-4.086345,31.0,3.145221
