# Pandas Extra functions
> Extra pandas functions at import

In [1]:
# default_exp pdenhanced

In [16]:
# export
import pandas as pd
from typing import Callable

## Value counts

In [3]:
# export
def list_vc(
    df, colname: str, value: str
) -> pd.DataFrame:
    """
    count the values in a column
        that each cell is a list
    """
    return df[colname].list_vc(value)

def col_list_vc(
    col, value: str
) -> pd.DataFrame:
    """
    count the values in a column
        that each cell is a list
    """
    return pd.DataFrame(
        col.apply(lambda x: value in x).value_counts()
    )

pd.DataFrame.vc = lambda self,col:pd.DataFrame(self[col].value_counts())
pd.Series.list_vc = col_list_vc
pd.DataFrame.list_vc = list_vc

## Rename by rule

In [18]:
# export
def default_rename_rule(x: str) -> str:
    return x.replace(" ", "_").replace("-", "_").lower()


def rename_by_rule(
    df,
    rule: Callable = default_rename_rule
) -> pd.DataFrame:
    """
    rename the columns by a rule function
    """
    df = df.rename(
        columns=dict((c, rule(c)) for c in df.columns))
    return df

pd.DataFrame.rename_by_rule = rename_by_rule

## Rearrage Columns

In [22]:
# export
def column_order(df, *col_names) -> pd.DataFrame:
    """
    df = df.column_order("col1", "col2", "col3")
    will put col1, col2, and col3 as the 1st 3 column
    """
    cols = list(df.columns)
    
    for col_name in list(col_names)[::-1]:
        
        # warn if the column exist
        if col_name not in cols:
            print(f"Column:'{col_name}' not in dataframe")
            continue
        cols.insert(0, cols.pop(cols.index(col_name)))
    return df[cols]

pd.DataFrame.column_order = column_order

### Testing

In [10]:
from sklearn.datasets import california_housing

cdata = california_housing.fetch_california_housing()

df = pd.DataFrame(cdata["data"], columns=cdata["feature_names"])

In [12]:
df["old"] = df.HouseAge>20

In [13]:
df.vc("old")

Unnamed: 0,old
True,14347
False,6293


In [19]:
df.rename_by_rule()

Unnamed: 0,medinc,houseage,averooms,avebedrms,population,aveoccup,latitude,longitude,old
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23,True
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22,True
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24,True
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,True
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,True
...,...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09,True
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21,False
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22,False
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32,False


In [24]:
df.column_order("old","AveOccup")

Unnamed: 0,old,AveOccup,MedInc,HouseAge,AveRooms,AveBedrms,Population,Latitude,Longitude
0,True,2.555556,8.3252,41.0,6.984127,1.023810,322.0,37.88,-122.23
1,True,2.109842,8.3014,21.0,6.238137,0.971880,2401.0,37.86,-122.22
2,True,2.802260,7.2574,52.0,8.288136,1.073446,496.0,37.85,-122.24
3,True,2.547945,5.6431,52.0,5.817352,1.073059,558.0,37.85,-122.25
4,True,2.181467,3.8462,52.0,6.281853,1.081081,565.0,37.85,-122.25
...,...,...,...,...,...,...,...,...,...
20635,True,2.560606,1.5603,25.0,5.045455,1.133333,845.0,39.48,-121.09
20636,False,3.122807,2.5568,18.0,6.114035,1.315789,356.0,39.49,-121.21
20637,False,2.325635,1.7000,17.0,5.205543,1.120092,1007.0,39.43,-121.22
20638,False,2.123209,1.8672,18.0,5.329513,1.171920,741.0,39.43,-121.32
