# Pandas Dataframe handling

## This is a part of the Pandas tutorial from [Corey Schafer YouTube channel](https://youtu.be/Lw2rlcxScZY?si=PyFMRY9K76B4ei36)

In [6]:
people = {
    "first": ["Corey", "Jane", "John"],
    "last": ["Schafer", "Doe", "Doe"],
    "email": ["CoreyMSchafer@gmail.com", "JaneDoe@email.com", "JohnDoe@email.com"]
}

In [65]:
import pandas as pd
import numpy as np

In [10]:
df = pd.DataFrame(people)

In [11]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


## want to filter people whose last name is Doe

In [14]:
filt = df['last'] == "Doe"
filt

0    False
1     True
2     True
Name: last, dtype: bool

In [17]:
df[filt]

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


## Alternative:

In [18]:
df.loc[filt, 'email']

1    JaneDoe@email.com
2    JohnDoe@email.com
Name: email, dtype: object



## AND (&)  OR (|)

#### find the peron whose name is John and the last name is Doe

In [25]:
filt2 = (df['first'] == 'John') & (df['last'] == 'Doe')
filt2

0    False
1    False
2     True
dtype: bool

In [27]:
df.loc[filt2, 'email']

2    JohnDoe@email.com
Name: email, dtype: object

## How to negate the filter
#### using ~ before the filer will negate it

In [28]:
df.loc[~filt2, 'email']

0    CoreyMSchafer@gmail.com
1          JaneDoe@email.com
Name: email, dtype: object

## Filtering based on a list

In [67]:
data = {
    "first": ["A", "B", "C", "D", "E", "F"],
    "last": ["a", "b", "c", "d", "e", "f"],
    "age": [10, 23, 34, 41, 15, 29],
    "country": ["usa", "ind", "usa", "can", "ger", "usa"],
    "languages": ["C;C++;Python;C#", "C++;Julia", "Java", "Java;R;Python", "C;R;C++", np.nan]
}

In [68]:
df = pd.DataFrame(data)
df

Unnamed: 0,first,last,age,country,languages
0,A,a,10,usa,C;C++;Python;C#
1,B,b,23,ind,C++;Julia
2,C,c,34,usa,Java
3,D,d,41,can,Java;R;Python
4,E,e,15,ger,C;R;C++
5,F,f,29,usa,


In [69]:
country = ['usa', 'ger']
filt = (df['country'].isin(country)) & (df['age']>30)

filt

0    False
1    False
2     True
3    False
4    False
5    False
dtype: bool

In [63]:
df[filt]

Unnamed: 0,first,last,age,country,languages
2,C,c,34,usa,Java


In [73]:
filt = df['languages'].str.contains('Python', na=False)
filt

0     True
1    False
2    False
3     True
4    False
5    False
Name: languages, dtype: bool

In [75]:
df[filt]['languages']

0    C;C++;Python;C#
3      Java;R;Python
Name: languages, dtype: object