# Dataframes accessors

In [13]:
import numpy as np
import pandas as pd
from physipy import m, units
from physipandas import QuantityArray, QuantityDtype
J = units["J"]
mm = units["mm"]

We mix several datatypes : 
 - int
 - quantity with SI units
 - quantity without unit
 - quantity with non-SI units

In [14]:
df = pd.DataFrame({
    "np_arr":np.arange(10),                                                # numpy array
    "physipy_arr": pd.Series(QuantityArray(np.arange(10)*m)),              # quantity array 
    "dimless":     pd.Series(QuantityArray(np.arange(10)**2)),             # squared meter
    "squaredm":    pd.Series(QuantityArray(np.arange(10)*m**2)),           # squared mete
    "power":       pd.Series(QuantityArray(np.random.normal(size=10)*J)),  # Joules
   # "lost_units":  pd.Series(np.arange(10)*m),    
    "favunit":     pd.Series(QuantityArray((np.arange(10)*m).set_favunit(mm))),
})
print(df.dtypes)
df

np_arr                         int64
physipy_arr               physipy[m]
dimless                    physipy[]
squaredm               physipy[m**2]
power          physipy[kg*m**2/s**2]
favunit                   physipy[m]
dtype: object


Unnamed: 0,np_arr,physipy_arr,dimless,squaredm,power,favunit
0,0,0,0,0,-0.304341,0
1,1,1,1,1,0.084224,1
2,2,2,4,2,-0.230153,2
3,3,3,9,3,0.597806,3
4,4,4,16,4,-0.499925,4
5,5,5,25,5,1.341358,5
6,6,6,36,6,-1.399265,6
7,7,7,49,7,2.961545,7
8,8,8,64,8,-0.014708,8
9,9,9,81,9,-0.310145,9


Show the dataframe with units as a multiindex

In [15]:
df.physipy.show()

Unnamed: 0_level_0,np_arr,physipy_arr,dimless,squaredm,power,favunit
unit,-,m,Unnamed: 3_level_1,m**2,kg*m**2/s**2,m
0,0,0,0,0,-0.304341,0
1,1,1,1,1,0.084224,1
2,2,2,4,2,-0.230153,2
3,3,3,9,3,0.597806,3
4,4,4,16,4,-0.499925,4
5,5,5,25,5,1.341358,5
6,6,6,36,6,-1.399265,6
7,7,7,49,7,2.961545,7
8,8,8,64,8,-0.014708,8
9,9,9,81,9,-0.310145,9


In [16]:
df["i"] = QuantityArray(np.random.normal(0, 1, 10)*units["W"])
print(df.dtypes)
df

np_arr                         int64
physipy_arr               physipy[m]
dimless                    physipy[]
squaredm               physipy[m**2]
power          physipy[kg*m**2/s**2]
favunit                   physipy[m]
i              physipy[kg*m**2/s**3]
dtype: object


Unnamed: 0,np_arr,physipy_arr,dimless,squaredm,power,favunit,i
0,0,0,0,0,-0.304341,0,-0.010639
1,1,1,1,1,0.084224,1,-1.00799
2,2,2,4,2,-0.230153,2,-0.218829
3,3,3,9,3,0.597806,3,-0.349522
4,4,4,16,4,-0.499925,4,0.353043
5,5,5,25,5,1.341358,5,0.015808
6,6,6,36,6,-1.399265,6,-1.117297
7,7,7,49,7,2.961545,7,-0.418878
8,8,8,64,8,-0.014708,8,0.529043
9,9,9,81,9,-0.310145,9,-0.310817


In [17]:
df["e"] = 2/df["i"]
df["f"] = np.sqrt(df["i"])
df.physipy.show()

  res = ufunc.__call__(left.value)


Unnamed: 0_level_0,np_arr,physipy_arr,dimless,squaredm,power,favunit,i,e,f
unit,-,m,Unnamed: 3_level_1,m**2,kg*m**2/s**2,m,kg*m**2/s**3,s**3/(kg*m**2),kg**0.5*m**1.0/s**1.5
0,0,0,0,0,-0.304341,0,-0.010639,-187.990239,
1,1,1,1,1,0.084224,1,-1.00799,-1.984146,
2,2,2,4,2,-0.230153,2,-0.218829,-9.13955,
3,3,3,9,3,0.597806,3,-0.349522,-5.722105,
4,4,4,16,4,-0.499925,4,0.353043,5.66504,0.594174
5,5,5,25,5,1.341358,5,0.015808,126.519131,0.125729
6,6,6,36,6,-1.399265,6,-1.117297,-1.790035,
7,7,7,49,7,2.961545,7,-0.418878,-4.774664,
8,8,8,64,8,-0.014708,8,0.529043,3.780411,0.727353
9,9,9,81,9,-0.310145,9,-0.310817,-6.434647,


## Read from file with units

In [33]:
df = pd.read_csv("physidata.csv",header=[0,1])
df

Unnamed: 0_level_0,height,weight,speed,power
Unnamed: 0_level_1,m,kg,m/s,W
0,1,2,3,4
1,5,6,7,8
2,9,10,11,12


In [34]:
import pandas as pd
q = np.arange(10)*m
s = pd.Series(q, dtype='physipy[m]')

In [35]:
def test_info():
    df = pd.DataFrame({'A':s, 'B':s**2})
    df.info()
test_info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype        
---  ------  --------------  -----        
 0   A       10 non-null     physipy[m]   
 1   B       10 non-null     physipy[m**2]
dtypes: physipy[m**2](1), physipy[m](1)
memory usage: 292.0 bytes


In [36]:
def test_describe():
    df = pd.DataFrame({'A':s, 'B':s**2, 'C':np.arange(len(s)), 'D':"0123456789"})
    df.describe()
    
test_describe()



ValueError: setting an array element with a sequence.

In [37]:
kg = units['kg']
df = pd.DataFrame({'A':s, 'B':s**2, 'C':np.arange(len(s)), 'D':"0123456789", 'E': pd.Series(np.arange(10)*kg, dtype='physipy[kg]')})
df

Unnamed: 0,A,B,C,D,E
0,0,0,0,123456789,0
1,1,1,1,123456789,1
2,2,4,2,123456789,2
3,3,9,3,123456789,3
4,4,16,4,123456789,4
5,5,25,5,123456789,5
6,6,36,6,123456789,6
7,7,49,7,123456789,7
8,8,64,8,123456789,8
9,9,81,9,123456789,9


In [29]:
df.physipy.dequantify()[('A', 'm')]

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
Name: (A, m), dtype: physipy[m]

In [31]:

#df.physipy.to_base_units()
#df
df.select_dtypes('int')
df.select_dtypes('category')
df.select_dtypes('object')
df.select_dtypes('physipy[m]')
df.select_dtypes(QuantityDtype())
df.select_dtypes(['physipy[m/s]'])
#df.select_dtypes([m])

#from pandas.api.types import infer_dtype
#infer_dtype([psp.m])

print(df.select_dtypes(['int']))
print(df.select_dtypes('int'))
print(df.select_dtypes(int))


   C
0  0
1  1
2  2
3  3
4  4
5  5
6  6
7  7
8  8
9  9
   C
0  0
1  1
2  2
3  3
4  4
5  5
6  6
7  7
8  8
9  9
   C
0  0
1  1
2  2
3  3
4  4
5  5
6  6
7  7
8  8
9  9
