# [dplython](https://github.com/dodger487/dplython) works with Python 3!

I've also included pandas examples to compare dplython to.

In [1]:
from dplython import *

In [2]:
# dplython way
diamonds >> select(X.carat, X.cut, X.price) >> head(5)

Unnamed: 0,carat,cut,price
0,0.23,Ideal,326
1,0.21,Premium,326
2,0.23,Good,327
3,0.29,Premium,334
4,0.31,Good,335


In [7]:
# pandas way
diamonds[['carat', 'cut', 'price']].head()

Unnamed: 0,carat,cut,price
0,0.23,Ideal,326
1,0.21,Premium,326
2,0.23,Good,327
3,0.29,Premium,334
4,0.31,Good,335


In [9]:
# dplython way
diamonds >> dfilter(X.carat > 4) >> select(X.carat, X.cut, X.depth, X.price)

Unnamed: 0,carat,cut,depth,price
25998,4.01,Premium,61.0,15223
25999,4.01,Premium,62.5,15223
27130,4.13,Fair,64.8,17329
27415,5.01,Fair,65.5,18018
27630,4.5,Fair,65.8,18531


In [10]:
# pandas way
diamonds.query("carat > 4")[['carat','cut','depth','price']]

Unnamed: 0,carat,cut,depth,price
25998,4.01,Premium,61.0,15223
25999,4.01,Premium,62.5,15223
27130,4.13,Fair,64.8,17329
27415,5.01,Fair,65.5,18018
27630,4.5,Fair,65.8,18531


In [11]:
# dplython way; sample 10 random records, the sort by carat column, then return only those specific columns
(diamonds >> 
  sample_n(10) >> 
  arrange(X.carat) >> 
  select(X.carat, X.cut, X.depth, X.price))

  return lambda df: DplyFrame(df.sort(names))


Unnamed: 0,carat,cut,depth,price
33090,0.31,Ideal,62.4,815
28353,0.4,Very Good,63.4,667
29415,0.42,Very Good,62.1,700
50252,0.7,Premium,58.8,2234
7128,0.9,Good,60.7,4173
13152,1.11,Premium,58.8,5439
21772,1.23,Ideal,62.5,9836
16974,1.25,Ideal,61.9,6779
21848,1.5,Good,63.3,9909
25795,1.51,Ideal,60.8,14779


In [14]:
# pandas way: get sample first, then select those specific columns, then sort by carat column
diamonds.sample(n=10)[['carat','cut','depth','price']].sort_values(by='carat')

Unnamed: 0,carat,cut,depth,price
29820,0.3,Premium,61.7,709
32217,0.31,Ideal,61.8,789
33790,0.32,Ideal,61.1,842
42615,0.36,Very Good,62.6,505
44524,0.52,Very Good,63.4,1601
51228,0.7,Good,59.1,2352
49236,0.82,Premium,62.2,2083
10458,0.9,Ideal,62.3,4788
5238,1.02,Fair,65.3,3787
9518,1.04,Ideal,62.0,4613


In [15]:
# dplython way
diamonds >> sample_n(6) >> select(X.carat, X.price) >> X._.T

Unnamed: 0,18066,39793,12856,3392,53432,35476
carat,1.04,0.39,1.1,0.27,0.74,0.42
price,7294.0,1095.0,5359.0,567.0,2671.0,902.0


In [16]:
# pandas way
diamonds.sample(n=6)[['carat','price']].transpose()

Unnamed: 0,15102,8304,27018,21143,36583,5934
carat,1.05,1.44,2.22,1.11,0.33,0.9
price,6070.0,4387.0,17151.0,9299.0,946.0,3945.0


In [17]:
# dplython way
(diamonds >> 
  mutate(carat_bin=X.carat.round()) >> 
  group_by(X.cut, X.carat_bin) >> 
  summarize(avg_price=X.price.mean()))

Unnamed: 0,avg_price,carat_bin,cut
0,4135.271007,1.0,Very Good
1,863.329085,0.0,Premium
2,3305.754579,1.0,Fair
3,863.908535,0.0,Ideal
4,15053.555556,3.0,Very Good
5,13466.823529,3.0,Fair
6,12838.984078,2.0,Ideal
7,18018.0,5.0,Fair
8,786.054191,0.0,Good
9,15636.047619,3.0,Premium


In [22]:
# pandas way
diamonds.assign(carat_bin=diamonds.carat.round()).groupby(['carat_bin','cut'])[['price']].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,price
carat_bin,cut,Unnamed: 2_level_1
0.0,Fair,1027.979275
0.0,Good,786.054191
0.0,Ideal,863.908535
0.0,Premium,863.329085
0.0,Very Good,766.35459
1.0,Fair,3305.754579
1.0,Good,3815.307879
1.0,Ideal,4213.864948
1.0,Premium,4382.906453
1.0,Very Good,4135.271007


#### Not sure if it's because I've been using pandas for a while, but I still don't mind how things are done with pandas.  YMMV!

#### I do wish that with pandas' query() method, we can do a SQL like statement and also filter is null or not null somehow.  I'd be a happy camper then!