In [1]:
import collections
import inspect

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('ggplot')

import litholog
from litholog import utils, io

In [16]:
transforms = {c : utils.string2array_pandas for c in ['depth_m', 'grain_size_mm']}

beds = pd.read_csv('../data/AllBedsWithProfiles_CLEAN.csv', converters=transforms)
beds.head()

Unnamed: 0,name,count,collection,eod,eodnum,tops,th,bases,gs_tops_mm,gs_tops_psi,...,mean_gs_psi,max_gs_mm,max_gs_psi,ng,ar,depth_m,grain_size_mm,grain_size_psi,missing_tops,missing_sample
0,Marnoso 1,1,Marnoso-Arenacea,basin plain,0,22.81684,0.31082,22.50602,0.115051,-3.119655,...,-3.106893,0.117046,-3.09485,0.520514,0.021978,"[22.8168, 22.703, 22.5337, 22.506]","[0.1151, 0.1157, 0.117, 0.117]",[-3.11904026 -3.11153923 -3.09541957 -3.09541957],0.0,0.0
1,Marnoso 1,1,Marnoso-Arenacea,basin plain,0,22.50602,0.60931,21.89671,0.001,-9.965784,...,-9.965784,0.001,-9.965784,0.520514,0.021978,"[22.506, 21.8967]","[0.001, 0.001]",[-9.96578428 -9.96578428],0.0,0.0
2,Marnoso 1,1,Marnoso-Arenacea,basin plain,0,21.89671,0.10463,21.79208,0.12538,-2.995625,...,-2.995625,0.12538,-2.995625,0.520514,0.021978,"[21.8967, 21.7921]","[0.1254, 0.1254]",[-2.99539075 -2.99539075],0.0,0.0
3,Marnoso 1,1,Marnoso-Arenacea,basin plain,0,21.79208,0.11694,21.67514,0.001,-9.965784,...,-9.965784,0.001,-9.965784,0.520514,0.021978,"[21.7921, 21.6751]","[0.001, 0.001]",[-9.96578428 -9.96578428],0.0,0.0
4,Marnoso 1,1,Marnoso-Arenacea,basin plain,0,21.67514,0.13232,21.54282,0.134306,-2.896405,...,-2.73626,0.16042,-2.640075,0.520514,0.021978,"[21.6751, 21.6351, 21.5674, 21.5428]","[0.1343, 0.1447, 0.1604, 0.1604]",[-2.89646879 -2.78886317 -2.64025395 -2.64025395],0.0,0.0


In [17]:
beds[beds.gs_tops_mm.isnull()].th.sum(), beds[beds.snd_shl.isnull()].th.sum()

(0.0, 0.0)

In [18]:
beds.snd_shl.value_counts()

1.0    28918
0.0    27997
Name: snd_shl, dtype: int64

In [19]:
beds[beds.snd_shl == 1].th.sum()

12742.218893650996

In [20]:
beds[beds.snd_shl == 0].th.sum()

6994.021388571157

In [21]:
2**-8, 2**-4

(0.00390625, 0.0625)

In [22]:
count = {eod : 0 for eod in beds.eod.unique()}
meters = {eod : 0 for eod in beds.eod.unique()}

for _, grp in beds.groupby('count'):
    count[grp['eod'].values[0]] += 1
    meters[grp['eod'].values[0]] += grp.th.sum()
    
avgs = {k : meters[k] / count[k] for k in count.keys()}

In [23]:
avgs

{'basin plain': 27.076829050012044,
 'fan': 33.39089495971465,
 'levee': 15.039543988376764,
 'slopechannel': 80.6872487607637}

In [24]:
beds.th.mean()

0.34676693810457965

In [25]:
weird = {problem : [] for problem in ['has_null', 'th_mismatch', 'samples_mismatch']}

for name, seq in beds.groupby('count'):
    
    if seq.isnull().any().any():
        weird['has_null'].append(name)
    
    _, elev_good = io.check_thicknesses(seq, 'tops', 'th', 'elevation')
    _, depth_good = io.check_thicknesses(seq, 'tops', 'th', 'depth')
    if not (elev_good or depth_good):
        weird['th_mismatch'].append(name)
        
    if not io.check_samples(seq, 'depth_m', 'grain_size_mm'):
        weird['samples_mismatch'].append(name)
        
weird, {k : len(v) for k, v in weird.items()}

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


({'has_null': [], 'th_mismatch': [], 'samples_mismatch': []},
 {'has_null': 0, 'th_mismatch': 0, 'samples_mismatch': 0})

In [26]:
beds[beds.depth_m.apply(utils.safelen) != beds.grain_size_mm.apply(utils.safelen)]

Unnamed: 0,name,count,collection,eod,eodnum,tops,th,bases,gs_tops_mm,gs_tops_psi,...,mean_gs_psi,max_gs_mm,max_gs_psi,ng,ar,depth_m,grain_size_mm,grain_size_psi,missing_tops,missing_sample


In [27]:
{k : len(v) for k, v in weird.items()}

{'has_null': 0, 'th_mismatch': 0, 'samples_mismatch': 0}

In [28]:
magellan = beds[beds['count'] == 67]
magellan

Unnamed: 0,name,count,collection,eod,eodnum,tops,th,bases,gs_tops_mm,gs_tops_psi,...,mean_gs_psi,max_gs_mm,max_gs_psi,ng,ar,depth_m,grain_size_mm,grain_size_psi,missing_tops,missing_sample
3379,Magellan 1320A,67,Gulf of Mexico,basin plain,0,0.00000,2.26803,2.26803,0.001000,-9.965784,...,-9.965784,0.001000,-9.965784,0.284623,0.025641,"[0.0, 2.268]","[0.001, 0.001]",[-9.96578428 -9.96578428],0.0,0.0
3380,Magellan 1320A,67,Gulf of Mexico,basin plain,0,2.26382,0.37711,2.64093,0.009893,-6.659445,...,-6.659445,0.009893,-6.659445,0.284623,0.025641,"[2.2638, 2.6409]","[0.0099, 0.0099]",[-6.65835576 -6.65835576],0.0,0.0
3381,Magellan 1320A,67,Gulf of Mexico,basin plain,0,2.64093,3.34637,5.98730,0.001000,-9.965784,...,-9.965784,0.001000,-9.965784,0.284623,0.025641,"[2.6409, 5.9873]","[0.001, 0.001]",[-9.96578428 -9.96578428],0.0,0.0
3382,Magellan 1320A,67,Gulf of Mexico,basin plain,0,5.98730,1.22544,7.21274,0.009467,-6.722896,...,-6.722896,0.009467,-6.722896,0.284623,0.025641,"[5.9873, 7.2127]","[0.0095, 0.0095]",[-6.71785677 -6.71785677],0.0,0.0
3383,Magellan 1320A,67,Gulf of Mexico,basin plain,0,7.21274,7.35263,14.56537,0.123396,-3.018635,...,-3.018635,0.123396,-3.018635,0.284623,0.025641,"[7.2127, 14.5654]","[0.1234, 0.1234]",[-3.0185857 -3.0185857],0.0,0.0
3384,Magellan 1320A,67,Gulf of Mexico,basin plain,0,14.56537,0.32992,14.89529,0.001000,-9.965784,...,-9.965784,0.001000,-9.965784,0.284623,0.025641,"[14.5654, 14.8953]","[0.001, 0.001]",[-9.96578428 -9.96578428],0.0,0.0
3385,Magellan 1320A,67,Gulf of Mexico,basin plain,0,14.89529,0.47126,15.36655,0.124883,-3.001350,...,-3.001350,0.124883,-3.001350,0.284623,0.025641,"[14.8953, 15.3666]","[0.1249, 0.1249]",[-3.00115462 -3.00115462],0.0,0.0
3386,Magellan 1320A,67,Gulf of Mexico,basin plain,0,15.36655,0.28280,15.64935,0.001000,-9.965784,...,-9.965784,0.001000,-9.965784,0.284623,0.025641,"[15.3666, 15.6494]","[0.001, 0.001]",[-9.96578428 -9.96578428],0.0,0.0
3387,Magellan 1320A,67,Gulf of Mexico,basin plain,0,15.64935,0.42423,16.07358,0.123396,-3.018635,...,-3.018635,0.123396,-3.018635,0.284623,0.025641,"[15.6494, 16.0736]","[0.1234, 0.1234]",[-3.0185857 -3.0185857],0.0,0.0
3388,Magellan 1320A,67,Gulf of Mexico,basin plain,0,16.07358,0.32992,16.40350,0.001000,-9.965784,...,-9.965784,0.001000,-9.965784,0.284623,0.025641,"[16.0736, 16.4035]","[0.001, 0.001]",[-9.96578428 -9.96578428],0.0,0.0


In [None]:
def match_metacol(seqs, value, metacol='eodnum'):
    return list(filter(lambda s: s.metadata[metacol] == value, seqs))

eod0 = match_metacol(seqs, 0)
eod1 = match_metacol(seqs, 1)
eod2 = match_metacol(seqs, 2)
eod3 = match_metacol(seqs, 3)

[len(e) for e in [eod0, eod1, eod2, eod3]]

In [None]:
import random

for i, seqs in enumerate([eod0, eod1, eod2, eod3]):
    selected = random.sample(seqs, 10)
    fig, axes = plt.subplots(ncols=10, figsize=(70, 30))
    for ax, seq in zip(axes, seqs):
        #seq.resample_data('depth_m', 0.01)
        seq.plot(legend=litholog.defaults.litholegend, width_field='grain_size_psi', depth_field='depth_m', ax=ax)
    
    plt.show()
    

In [None]:
eod0[1].metadata

In [None]:
x = beds[beds.name == 'Magellan 1320A']
x #[x.th.between(2.1, 2.2)]