# Python Pandas String and Regular Expression Practice

In [7]:
import numpy as np
import pandas as pd

1. Write a Pandas program to convert all the string values to upper, lower cases in a given pandas series. Also find the length of the string values.

```python 
s = pd.Series(['X', 'Y', 'Z', 'Aaba', 'Baca', np.nan, 'CABA', None, 'bird', 'horse', 'dog'])
```

In [9]:
s = pd.Series(['X', 'Y', 'Z', 'Aaba', 'Baca', np.nan, 'CABA', 
               None, 'bird', 'horse', 'dog'])


In [12]:
s.str.lower()

0         x
1         y
2         z
3      aaba
4      baca
5       NaN
6      caba
7      None
8      bird
9     horse
10      dog
dtype: object

In [13]:
s.str.upper()

0         X
1         Y
2         Z
3      AABA
4      BACA
5       NaN
6      CABA
7      None
8      BIRD
9     HORSE
10      DOG
dtype: object

In [15]:
s.str.len()

0     1.0
1     1.0
2     1.0
3     4.0
4     4.0
5     NaN
6     4.0
7     NaN
8     4.0
9     5.0
10    3.0
dtype: float64

2. Write a Pandas program to remove whitespaces, left sided whitespaces and right sided whitespaces of the string values of a given pandas series.

```python
color1 = pd.Index([' Green', 'Black ', ' Red ', 'White', ' Pink '])
```

In [18]:
color1 = pd.Index([' Green', 'Black ', ' Red ', 'White', ' Pink '])

color1.str.strip()

Index(['Green', 'Black', 'Red', 'White', 'Pink'], dtype='object')

In [19]:
color1.str.lstrip()

Index(['Green', 'Black ', 'Red ', 'White', 'Pink '], dtype='object')

In [20]:
color1.str.rstrip()

Index([' Green', 'Black', ' Red', 'White', ' Pink'], dtype='object')

3. Write a Pandas program to add leading zeros to the integer column in a pandas series and makes the length of the field to 8 digit. 

In [33]:
nums = {'amount': [10, 250, 3000, 40000, 500000]}
df = pd.DataFrame(nums)
df['amount'] = df['amount'].apply(lambda x: '{0:0>8}'.format(x))
df

Unnamed: 0,amount
0,10
1,250
2,3000
3,40000
4,500000


4. Write a Pandas program to add leading zeros to the character column in a pandas series and makes the length of the field to 8 digit.

In [35]:
nums = {'amount': ['10', '250', '3000', '40000', '500000']}
df = pd.DataFrame(nums)
df['amount'] = list(map(lambda x: x.zfill(10), df['amount']))
df

Unnamed: 0,amount
0,10
1,250
2,3000
3,40000
4,500000


5. Write a Pandas program to capitalize all the string values of specified columns of a given DataFrame.

In [42]:
df = pd.DataFrame({
    'name': ['alberto','gino','ryan', 'Eesha', 'syed'],
    'date_of_birth ': ['17/05/2002','16/02/1999','25/09/1998','11/05/2002','15/09/1997'],
    'age': [18.5, 21.2, 22.5, 22, 23]
})

df['name'] = list(map(lambda x: x.capitalize(),  df['name']))
df

Unnamed: 0,name,date_of_birth,age
0,Alberto,17/05/2002,18.5
1,Gino,16/02/1999,21.2
2,Ryan,25/09/1998,22.5
3,Eesha,11/05/2002,22.0
4,Syed,15/09/1997,23.0


6. Write a Pandas program to count of occurrence of a specified substring in a DataFrame column.

In [44]:
df = pd.DataFrame({
    'name_code': ['c001','c002','c022', 'c2002', 'c2222'],
    'date_of_birth ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],
    'age': [18.5, 21.2, 22.5, 22, 23]
})

df['count'] = df['name_code'].map(lambda x: x.count('2'))
df

Unnamed: 0,name_code,date_of_birth,age,count
0,c001,12/05/2002,18.5,0
1,c002,16/02/1999,21.2,1
2,c022,25/09/1998,22.5,2
3,c2002,12/02/2022,22.0,2
4,c2222,15/09/1997,23.0,4


7. Write a Pandas program to find the index of a given substring of a DataFrame column. 

In [45]:
df = pd.DataFrame({
    'name_code': ['c001','c002','c022', 'c2002', 'c2222'],
    'date_of_birth ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],
    'age': [18.5, 21.2, 22.5, 22, 23]
})

df['Index'] = df['name_code'].map(lambda x: x.find('22'))
df

Unnamed: 0,name_code,date_of_birth,age,Index
0,c001,12/05/2002,18.5,-1
1,c002,16/02/1999,21.2,-1
2,c022,25/09/1998,22.5,2
3,c2002,12/02/2022,22.0,-1
4,c2222,15/09/1997,23.0,1


8. Write a Pandas program to find the index of a substring of DataFrame with beginning and end position

In [56]:
df = pd.DataFrame({
    'name_code': ['c0001','1000c','b00c2', 'b2c02', 'c2222'],
    'date_of_birth ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],
    'age': [18.5, 21.2, 22.5, 22, 23]
})

df['Index'] = df['name_code'].map(lambda x: x.find('c', 0, 5)) # string index.
df

Unnamed: 0,name_code,date_of_birth,age,Index
0,c0001,12/05/2002,18.5,0
1,1000c,16/02/1999,21.2,4
2,b00c2,25/09/1998,22.5,3
3,b2c02,12/02/2022,22.0,2
4,c2222,15/09/1997,23.0,0


9. Write a Pandas program to check whether alpha numeric values present in a given column of a DataFrame. 

In [51]:
df = pd.DataFrame({
    'name_code': ['Company','Company a001','Company 123', '1234', 'Company 12'],
    'date_of_birth ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],
    'age': [18.5, 21.2, 22.5, 22, 23]
})
df['isAlphaNumeric'] = df['name_code'].map(lambda x: x.isalnum())
df

Unnamed: 0,name_code,date_of_birth,age,isAlphaNumeric
0,Company,12/05/2002,18.5,True
1,Company a001,16/02/1999,21.2,False
2,Company 123,25/09/1998,22.5,False
3,1234,12/02/2022,22.0,True
4,Company 12,15/09/1997,23.0,False


10. Write a Pandas program to check whether alphabetic values present in a given column of a DataFrame.

Note: isalpha() returns True if all characters in the string are alphabetic and there is at least one character, False otherwise.

In [54]:
df = pd.DataFrame({
    'company_code': ['Company','Company a001','Company 123', 'abcd', 'Company 12'],
    'date_of_sale ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],
    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]})

df['isAlpha'] = df['company_code'].map(lambda x: x.isalpha())
df

Unnamed: 0,company_code,date_of_sale,sale_amount,isAlpha
0,Company,12/05/2002,12348.5,True
1,Company a001,16/02/1999,233331.2,False
2,Company 123,25/09/1998,22.5,False
3,abcd,12/02/2022,2566552.0,True
4,Company 12,15/09/1997,23.0,False
