In [None]:
import os
from glob import glob

import pandas as pd

data_directory = r'data\DTS'

## Get a list of DDF files

In [None]:
ddf_files = glob(os.path.join(data_directory, '*.ddf'))
ddf_files[:10] # show the first 10 files

## Load the first DDF file into a DataFrame

In [None]:
dts_data = pd.read_table(ddf_files[2], header=25, index_col=0, skiprows=0, encoding='ansi')
dts_data.head()

In [None]:
help(pd.read_table)

In [None]:
# rename the temperature column to something more convenient
dts_data.rename({'temperature (°C)': 'temperature'}, axis=1, inplace=True)
dts_data.head()

## Indexing with a DataFrame

In [None]:
dts_data['temperature'].head() # returns a Series

In [None]:
dts_data[['forward Stokes', 'forward anti-Stokes']].head() # returns a DataFrame

In [None]:
dts_data.iloc[0:10, [1,2]] # index by integer position

In [None]:
dts_data.loc[0:10, 'temperature'] # index by label

In [None]:
import numpy as np
dts_data[-np.inf:0]['temperature'] = np.nan
dts_data.head()

In [None]:
dts_data.loc[-np.inf:0, :] = np.nan
dts_data.head()

See [Indexing and Selecting Data](https://pandas.pydata.org/pandas-docs/stable/indexing.html) for more information.

## Plot the data

In [None]:
%matplotlib inline
dts_data['temperature'].plot()

In [None]:
dts_data.loc[200:375, 'temperature'].plot()

In [None]:
dts_data.loc[200:375, 'temperature'].describe()

## Read all of the data into a multi-level DataFrame

In [None]:
frames = []
datetime_stamps = []

for file in ddf_files:
    
    # read the data into a DataFrame and append it to frames
    dts_data = pd.read_table(file, header=25, index_col=0, encoding='ansi')
    dts_data.rename({'temperature (°C)': 'temperature'}, axis=1, inplace=True)
    frames.append(dts_data)
    
    # read the date and time information from the DDF and append the timestamp to datetime_stamps
    date_line = 9
    time_line = 10
    with open(file, 'r') as f:
        ddf_text = f.readlines()
    date_str = ddf_text[date_line].strip().split('\t')[1]
    time_str = ddf_text[time_line].strip().split('\t')[1]
    datetime_stamps.append(pd.to_datetime(date_str + ' ' + time_str))

dts_data = pd.concat(frames, axis=1, keys=datetime_stamps)
dts_data.head()

In [None]:
temperature_data = dts_data.xs('temperature', axis=1, level=1)
temperature_data.head()

In [None]:
temperature_data.loc[0:].transpose().describe()

## Define a function to use for plotting a subset of data

In [None]:
from dateutil.tz import tzlocal

import matplotlib.pyplot as plt
import matplotlib.dates as dates
from matplotlib.ticker import MaxNLocator, IndexFormatter, LinearLocator

def plot_subset(plotting_subset, cmap=None):
    
    X = plotting_subset.index
    Y = plotting_subset.columns
    Z = plotting_subset.as_matrix().transpose() # swap axes so cable distance is on the x-axis

    ax = plt.axes()

    fig = ax.get_figure()
    fig.set_size_inches((20, 10))
    
    if cmap is None:
        cmap = plt.get_cmap('PiYG')
    clim = (Z.min(), Z.max())
    kwargs = {'cmap': cmap, 'clim': clim}
    im = ax.imshow(Z, **kwargs)

    locator = MaxNLocator(nbins=8)

    # format x axis ticks
    x_formatter = IndexFormatter(['{:7.3f}'.format(x) for x in X])
    ax.xaxis.set_major_formatter(x_formatter)

    # format y axis ticks
    ax.yaxis.set_major_locator(locator)
    fmt = '%b %d %H:%M:%S'
    t = dates.epoch2num(Y.view('int64') // pd.Timedelta(1, unit='s')) # convert to num
    y_formatter = dates.IndexDateFormatter(t, fmt)
    ax.yaxis.set_major_formatter(y_formatter)

    fig.colorbar(im)

In [None]:
subset = temperature_data.loc[10:]
plot_subset(subset)

In [None]:
start_date = pd.datetime(2011, 6, 21, 15, 1, 17)
end_date = pd.datetime(2011, 6, 21, 20, 1, 17)

subset = temperature_data.loc[273:374, start_date:end_date]

cmap = plt.get_cmap('jet')
plot_subset(subset, cmap)

In [None]:
ax1 = plt.subplot(211)
subset.std(axis=1).plot(ax=ax1)
ax1.set_ylabel('std')

ax2 = plt.subplot(212, sharex=ax1)
subset.max(axis=1).plot(ax=ax2, label='max')
subset.mean(axis=1).plot(ax=ax2, label='mean')
subset.min(axis=1).plot(ax=ax2, label='min')
ax2.set_ylabel('temperature')
ax2.legend()

fig = ax2.get_figure()
fig.set_size_inches((20, 10))

In [None]:
# save the subset to a csv file
subset.to_csv('subset.csv')