# Load Tools

In [0]:
import pandas as pd

# Meta Data
Individual stations downloaded from 'Decagon' virtual machine maintained by MPG Ranch on 190610
* [Link to hosted xls files](https://drive.google.com/drive/folders/1mDuhIWc3LMfCCbI_6rt82m8j2AhUi3Qk)

In [0]:
stations_data = [
    {'weather': 'KMTFLORE5', 'soil': 'sainfoin bench', 'soil_data': 'SB-10Jun2019-0844.xls', 'short_name': 'sb'},
    {'weather': 'KMTFLORE4', 'soil': 'baldy draw', 'soil_data': 'BD-10Jun2019-0844.xls', 'short_name': 'bd'},
    {'weather': 'KMTFLORE6', 'soil': 'baldy summit', 'soil_data': 'BS-07Jun2019-1554.xls', 'short_name': 'bs'},
    {'weather': 'KMTFLORE3', 'soil': 'indian ridge', 'soil_data': 'IR-10Jun2019-0845.xls', 'short_name': 'ir'},
    {'weather': 'KMTFLORE7', 'soil': 'south baldy ridge', 'soil_data': 'SBR-10Jun2019-0845.xls', 'short_name': 'sbr'},
    {'weather': 'KMTFLORE7', 'soil': 'orchard house', 'soil_data': 'OH-10Jun2019-0846.xls', 'short_name': 'oh'}
]


google_drive_source = 'My Drive/Current Work/MPG Ranch/Matrix/Data/Soil Moisture/'

# Restructure Downloaded Data

In [0]:
def reshape_data(src, station):
  # load data
  df = pd.read_excel(src, header=[0,2])
  
  # restructure multi index
  df = df.set_index(df[src]['Measurement Time']).drop((src, 'Measurement Time'), axis=1)
  df = df.unstack().unstack(level=1).reset_index(level=1).reset_index().set_index('Measurement Time')

  # rename columns
  df.columns = ['port', 'kPa Potential', '°C Temp']
  
  # intitalize depth column
  df['depth (in)'] = ''

  # label moisture probe depth for each port
  for index, row in df.iterrows():
    if row.port == 'Port 1' or row.port == 'Port 3':
      row['depth (in)'] = 6
    elif row.port == 'Port 2' or row.port == 'Port 4':
      row['depth (in)'] = 36
      
  # station id label
  df['station'] = station
  
  return df

In [0]:
# array to hold individual station dataframes
df_stations = []

# loop through stations_data dictionary to load and restructure station data
for station in stations_data:
    df_stations.append({station['short_name']: reshape_data(station['soil_data'], station['soil'])})

# Combine Station DataFrames

In [0]:
stations_combined = pd.DataFrame()

for station in df_stations:
  for key, val in station.items():
    stations_combined = pd.concat([stations_combined, station[key]])

In [6]:
stations_combined.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 207197 entries, 2000-11-07 06:00:00 to 2019-06-09 18:00:00
Data columns (total 5 columns):
port             207197 non-null object
kPa Potential    207197 non-null object
°C Temp          207197 non-null object
depth (in)       207197 non-null object
station          207197 non-null object
dtypes: object(5)
memory usage: 9.5+ MB


In [15]:
# stations included
print('Included Soil Moisture Stations:')
for station in stations_combined.station.unique().tolist():
  print('*', station.title())

Included Soil Moisture Stations:
* Sainfoin Bench
* Baldy Draw
* Baldy Summit
* Indian Ridge
* South Baldy Ridge
* Orchard House


# Export combined stations 

In [0]:
stations_combined.to_csv('soil_moisture-190611.csv')