### Attributes and Underlying data

#### DataFrame.select_dtypes - Return a subset of the DataFrame’s columns based on the column dtypes

In [1]:
# df.select_dtypes(include='bool') ## or use exclude

#### DataFrame.values -- returns numpy representaion of dataset

In [2]:
# df.values

### Conversions

In [4]:
# df.astype({'col1': 'int32'})

In [5]:
# s_copy = s.copy()

### Indexing & Iteraions

In [6]:
# df.at[4, 'B'] 
## Get value at specified row/column pair
### Set value at specified row/column pair

In [7]:
# df.iat[1, 2]
## Similar with numeric representaion of row,column value

### Binary operator functions - element wise arrithmatic operations are performed

In [8]:
# df.div(10)

In [9]:
# df.add(5)

### Computations / descriptive stats

![image.png](attachment:image.png)

In [10]:
import pandas as pd
df = pd.DataFrame({'A': [1, 2, 3], 'B': [1, 1, 1]})
df.nunique()


A    3
B    1
dtype: int64

### Reindexing / selection / label manipulation

In [12]:
# DataFrame.idxmax - Return index of first occurrence of maximum over requested axis. NA/null values are excluded.
# idmin - similar

### Combining / joining / merging

#### DataFrame.append - Append rows of other to the end of caller, returning a new object.

In [14]:
# df.append(df2)

In [20]:
# df.append(df2, ignore_index=True)

#### DataFrame.assign - Returns a new object with all original columns in addition to new ones. 
#### Existing columns that are re-assigned will be overwritten

In [17]:
 df = pd.DataFrame({'temp_c': [17.0, 25.0]},
                   index=['Portland', 'Berkeley'])

In [18]:
df

Unnamed: 0,temp_c
Portland,17.0
Berkeley,25.0


In [19]:
df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32)

Unnamed: 0,temp_c,temp_f
Portland,17.0,62.6
Berkeley,25.0,77.0


#### DataFrame.join(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False)

In [21]:
df = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'],
                   'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']})

In [22]:
other = pd.DataFrame({'key': ['K0', 'K1', 'K2'],
                       'B': ['B0', 'B1', 'B2']})

In [23]:
df

Unnamed: 0,key,A
0,K0,A0
1,K1,A1
2,K2,A2
3,K3,A3
4,K4,A4
5,K5,A5


In [24]:
other

Unnamed: 0,key,B
0,K0,B0
1,K1,B1
2,K2,B2


In [25]:
df.join(other, lsuffix='_caller', rsuffix='_other')

Unnamed: 0,key_caller,A,key_other,B
0,K0,A0,K0,B0
1,K1,A1,K1,B1
2,K2,A2,K2,B2
3,K3,A3,,
4,K4,A4,,
5,K5,A5,,


In [26]:
df.join(other.set_index('key'), on='key')

Unnamed: 0,key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,B2
3,K3,A3,
4,K4,A4,
5,K5,A5,


### Time series-related

#### DataFrame.resample - Convenience method for frequency conversion and resampling of time series.

In [4]:
import pandas as pd
index = pd.date_range('1/1/2000', periods=9, freq='T')
series = pd.Series(range(9), index=index)

In [5]:
series.resample('3T').sum()

2000-01-01 00:00:00     3
2000-01-01 00:03:00    12
2000-01-01 00:06:00    21
Freq: 3T, dtype: int64

In [6]:
series.resample('30S').asfreq()[0:5]

2000-01-01 00:00:00    0.0
2000-01-01 00:00:30    NaN
2000-01-01 00:01:00    1.0
2000-01-01 00:01:30    NaN
2000-01-01 00:02:00    2.0
Freq: 30S, dtype: float64

![image.png](attachment:image.png)