In [None]:
import pandas as pd
import numpy as np

### Dropping items from Series

In [None]:
ser = pd.Series(['a','b','c','d'], index=[1, 2, 3,4])
ser


In [None]:
ser2=ser.drop(2)
ser2

In [None]:
ser

In [None]:
ser.drop(3,inplace=True)
ser

In [None]:
new_ser2=ser.drop([1, 2])
new_ser2

In [None]:
ser

### Dropping items from a  DataFrame

axis{0 for row, 1 for columns}, default 0

In [None]:
import pandas as pd
data = {'name': ['George', 'Kevin', 'Michael', 'Jose'],
        'age': [35, 20, 26, 25],
        'height': [5.5, 4.9, 5.6, 5.4]}
df = pd.DataFrame(data,index=['a','x','y','z'])
df

In [None]:
df.drop(['name', 'age'],axis='columns')

In [None]:
df

In [None]:
df.drop(['name'],axis='columns',inplace=True)
df

In [None]:
df.drop(columns=['age'])

In [None]:
df.drop(index=['a', 'x'],inplace=True)
df

In [None]:
df.drop(['y']) # drop rows by default

### [Apply Function](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html)

Axis:
*   0 : apply function to each row.
*   1 : apply function to each column.

default 0

In [None]:
df=pd.DataFrame([[60000, 25],[70000, 25],[90000, 25]], columns=['Salary', 'Age'])
df

In [None]:
df['Salary'].apply(lambda x:x+2000)

In [None]:
df

In [None]:
df['Salary']=df['Salary'].apply(lambda x:x+2000)
df

### Duplicate Labels

In [None]:
obj = pd.Series(range(5), index=['a', 'a', 'b', 'b', 'c'])
obj

In [None]:
obj.index.is_unique

In [None]:
obj['a']

### Unique Values, Value Counts

In [None]:
ser = pd.Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])

In [None]:
ser.unique()

In [None]:
ser.value_counts()

In [None]:
data = {'name': ['Goerge', 'Kevin', 'Michael', 'Jose'],
        'age': [35, 35, 50, 50],
        'height': [5.5, 4.9, 5.6, 5.4],
        'class':['A','B','A','C']}
df = pd.DataFrame(data)
df

In [None]:
df['class'].value_counts()

# Handling Missing Data

In [None]:
import pandas as pd
import numpy as np

In pandas, a missing value is mainly represented by NaN.
NaN and NAN are equivalent definitions of nan in numpy. Python's None is also considered a missing value. [Numpy - Constants](https://numpy.org/doc/stable/reference/constants.html)

In [None]:
import pandas as pd
import numpy as np
ser = pd.Series([1, 2, None,500,np.NaN,np.NAN,np.nan])
ser

In [None]:
ser.isnull()

In [None]:
ser[ser.notnull()]

nan is a floating-point number float, if None is converted to nan, the data type dtype of the column is changed to float, even if the other values are integers int.
None in the object column remains as None.

In [None]:
s_none_int = pd.Series([None, 1, 2])
print(s_none_int)

### Filtering Out Missing Data

In [None]:
import numpy as np
ser = pd.Series([1, np.nan, 3.5, np.nan, 7])
ser

In [None]:
ser.dropna(inplace=True) #inplace=False

In [None]:
ser

In [None]:
ser.notnull()

In [None]:
ser[ser.notnull()]

In [None]:
ser



```
axis{0 or ‘index’, 1 or ‘columns’}, default 0
Determine if rows or columns which contain missing values are removed.

0, or ‘index’ : Drop rows which contain missing values.

1, or ‘columns’ : Drop columns which contain missing value.
```



In [None]:
df = pd.DataFrame([[1., 6.5, 3.], [1., np.nan, np.nan],
                     [np.nan, np.nan, np.nan], [np.nan, 6.5, 3.]])
df

In [None]:
cleaned_df = df.dropna(axis=1,how='all')

cleaned_df



```
axis:
{0 or ‘index’, 1 or ‘columns’}, default 0
Determine if rows or columns which contain missing values are removed.
0, or ‘index’ : Drop rows which contain missing values.
1, or ‘columns’ : Drop columns which contain missing value.

how:
{‘any’, ‘all’}, default ‘any’
Determine if row or column is removed from DataFrame, when we have at least one NA or all NA.
‘any’ : If any NA values are present, drop that row or column.
‘all’ : If all values are NA, drop that row or column.

inplace:
bool, default False
```



In [None]:
df = pd.DataFrame([[2, 5, 6], [4, np.nan, np.nan],
                     [np.nan, np.nan, np.nan], [np.nan, 3, 7]])
df

In [None]:
df.dropna(how='all')

In [None]:
df.dropna(how='any')

In [None]:
df[4] = np.nan
df

In [None]:
df.dropna(axis=1, how='all')

In [None]:
df.dropna(axis=1, how='any')

Filling In Missing Data

In [None]:
df.fillna(0) #inplace=False

In [None]:
df

In [None]:
df.fillna({1: 0.5, 2: 0}) # fill by column (0.5 in col 1 and 0 in col 2)

In [None]:
df.fillna({0: df[0].mean()})

In [None]:
def myfunc(x):
  print(x.fillna(0),'\n----------------------')

df.apply(myfunc,axis='columns')

#df.apply(myfunc,axis='rows')
df