# **Data Preparation for PBMs - 2**

## **Import Dependencies**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import os
from glob import glob
import json
from tqdm.auto import tqdm

plt.rcParams['font.family'] = 'DeJavu Serif'
plt.rcParams['font.serif'] = 'Times New Roman'

import warnings
warnings.filterwarnings('ignore')

out_master_dir = r'datasets\master'
out_temp_dir = r'temp_data'

## **Read the Datasets**

In [13]:
# Read the Soil Hydraulic Property dataset
pbm_data = pd.read_csv('datasets\csvs\soilhydraulic_property_Germany_Points_Amit.csv', delimiter=';')
pbm_data = pbm_data.iloc[:, :-2]
print(pbm_data.shape)
pbm_data.head()

(190364, 144)


Unnamed: 0,location,dampingdepth,soilwater_fc_global,soilwater_sat_global,drainage_rate,deltatheta,DZF,depth_1,depth_2,depth_3,...,InitialFixedPConcentration_6,slimalfa_1,slimalfa_2,slimalfa_3,slimalfa_4,slimalfa_5,slimalfa_6,Nitrogen,Phosphorous,Potassium
0,0,6,0.305742,0.41441,50,0.01,0.1,0.1,0.3,0.5,...,57,0.95,0.95,0.95,0.95,0.95,0.95,120.45755,7.1606,16.997152
1,1,6,0.352146,0.447852,50,0.01,0.1,0.1,0.3,0.5,...,57,0.95,0.95,0.95,0.95,0.95,0.95,120.45755,7.1606,16.997152
2,2,6,0.292897,0.422044,50,0.01,0.1,0.1,0.3,0.5,...,57,0.95,0.95,0.95,0.95,0.95,0.95,120.45755,7.1606,16.997152
3,3,6,0.300213,0.420899,50,0.01,0.1,0.1,0.3,0.5,...,57,0.95,0.95,0.95,0.95,0.95,0.95,120.45755,7.1606,16.997152
4,4,6,0.30233,0.418748,50,0.01,0.1,0.1,0.3,0.5,...,57,0.95,0.95,0.95,0.95,0.95,0.95,120.45755,7.1606,16.997152


## **Data Processing**

In [14]:
# Divide the Nitrogen, Phosphorous, and Potassium data by 10
pbm_data[['Nitrogen', 'Phosphorous', 'Potassium']] = pbm_data[['Nitrogen', 'Phosphorous', 'Potassium']] / 10
print(pbm_data.shape)
pbm_data.head()

(190364, 144)


Unnamed: 0,location,dampingdepth,soilwater_fc_global,soilwater_sat_global,drainage_rate,deltatheta,DZF,depth_1,depth_2,depth_3,...,InitialFixedPConcentration_6,slimalfa_1,slimalfa_2,slimalfa_3,slimalfa_4,slimalfa_5,slimalfa_6,Nitrogen,Phosphorous,Potassium
0,0,6,0.305742,0.41441,50,0.01,0.1,0.1,0.3,0.5,...,57,0.95,0.95,0.95,0.95,0.95,0.95,12.045755,0.71606,1.699715
1,1,6,0.352146,0.447852,50,0.01,0.1,0.1,0.3,0.5,...,57,0.95,0.95,0.95,0.95,0.95,0.95,12.045755,0.71606,1.699715
2,2,6,0.292897,0.422044,50,0.01,0.1,0.1,0.3,0.5,...,57,0.95,0.95,0.95,0.95,0.95,0.95,12.045755,0.71606,1.699715
3,3,6,0.300213,0.420899,50,0.01,0.1,0.1,0.3,0.5,...,57,0.95,0.95,0.95,0.95,0.95,0.95,12.045755,0.71606,1.699715
4,4,6,0.30233,0.418748,50,0.01,0.1,0.1,0.3,0.5,...,57,0.95,0.95,0.95,0.95,0.95,0.95,12.045755,0.71606,1.699715


In [52]:
# Define the scenarios
def process_data(row, fertilizer_scenario=2, crop_cyle_count=0, crop='winter wheat'):

    location_values = [row['location']] * 6 
    fertilizer_scenarios = [fertilizer_scenario] * 6
    crop_cyle_counts = [crop_cyle_count] * 6
    crop_values = [crop] * 6
    type_values = ['PTotal', 'KTotal', 'NTotal'] * 2
    dvs_values = [0.001, 0.001, 0.25, 0.4, 0.4, 0.9]
    event_values = [1, 2, 3, 4, 5, 6]
    fertilizer_value = [round(float(v), 6) for v in [row['Phosphorous'], row['Potassium'], row['Nitrogen']]]
    fertilizer_values =  [round((v/2), 6) for v  in (fertilizer_value * 2)]

    final_df = pd.DataFrame({
        'location': location_values,
        'FertilizerScenario': fertilizer_scenarios,
        'CropCycleCount': crop_cyle_counts,
        'crop': crop_values,
        'Event': event_values,
        'vType': type_values,
        'DVS': dvs_values,
        'Amount': fertilizer_values
    })

    return final_df

In [54]:
# Apply the algorithm on each rows
pbm_data_processed = pbm_data.apply(process_data, axis=1)
pbm_data_processed = pd.concat(pbm_data_processed.tolist(), ignore_index=True)
print(pbm_data_processed.shape)
pbm_data_processed.head()

(1142184, 8)


Unnamed: 0,location,FertilizerScenario,CropCycleCount,crop,Event,vType,DVS,Amount
0,0,2,0,winter wheat,1,PTotal,0.001,0.35803
1,0,2,0,winter wheat,2,KTotal,0.001,0.849858
2,0,2,0,winter wheat,3,NTotal,0.25,6.022877
3,0,2,0,winter wheat,4,PTotal,0.4,0.35803
4,0,2,0,winter wheat,5,KTotal,0.4,0.849858


In [66]:
# Save the data 
# pbm_data_processed.to_csv(os.path.join(out_master_dir, 'fertilizer_Soil3_AllKreis_Krishna.csv'), index=False)