In [1]:
import pandas as pd
import numpy as np

In [78]:
df = pd.DataFrame(data=[['Rama', 'Krishna', 'Kanha', 'Shyam'], [19,16,12,16]]).transpose()

In [79]:
df

Unnamed: 0,0,1
0,Rama,19
1,Krishna,16
2,Kanha,12
3,Shyam,16


In [80]:
df.columns = ['name', 'age']

In [81]:
df

Unnamed: 0,name,age
0,Rama,19
1,Krishna,16
2,Kanha,12
3,Shyam,16


In [82]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    4 non-null      object
 1   age     4 non-null      object
dtypes: object(2)
memory usage: 192.0+ bytes


In [83]:
df = df.astype({'age': int})

In [84]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    4 non-null      object
 1   age     4 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 192.0+ bytes


To work with string/text data or to manipulate or transformed string/text data we can use `apply()` or simply use `.str` and then aplly Python string methods on it 

In [85]:
# convert to uppercase
df['name'].str.upper()

0       RAMA
1    KRISHNA
2      KANHA
3      SHYAM
Name: name, dtype: object

In [86]:
df['name'].apply(func=lambda x: x.upper())

0       RAMA
1    KRISHNA
2      KANHA
3      SHYAM
Name: name, dtype: object

In [87]:
df[df['name'].str.startswith('K')]

Unnamed: 0,name,age
1,Krishna,16
2,Kanha,12


In [88]:
messy_text = pd.Series(data=['Krishna  ', ' Rama chandra', 'dumm##y'])

In [89]:
messy_text.str.replace('#', '').str.strip()

0         Krishna
1    Rama chandra
2           dummy
dtype: object

In [90]:
messy_text = messy_text.str.replace('#', '').str.strip()

In [91]:
messy_text

0         Krishna
1    Rama chandra
2           dummy
dtype: object

In [93]:
age_str = df['age'].astype('str')

In [94]:
age_str.str.isdigit()

0    True
1    True
2    True
3    True
Name: age, dtype: bool

In [95]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    4 non-null      object
 1   age     4 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 192.0+ bytes


In [97]:
names = ['Ram Chandra', 'Radha Rani', 'Janaki Mata', 'Shyam']

In [98]:
places = ['Treta', 'Goloka', 'Mithila', 'Vrindavan']

In [100]:
names

['Ram Chandra', 'Radha Rani', 'Janaki Mata', 'Shyam']

In [101]:
places

['Treta', 'Goloka', 'Mithila', 'Vrindavan']

In [103]:
dict(zip(names, places))

{'Ram Chandra': 'Treta',
 'Radha Rani': 'Goloka',
 'Janaki Mata': 'Mithila',
 'Shyam': 'Vrindavan'}

In [107]:
df = pd.DataFrame(data=zip(names, places), columns=['names', 'places'])

In [108]:
df

Unnamed: 0,names,places
0,Ram Chandra,Treta
1,Radha Rani,Goloka
2,Janaki Mata,Mithila
3,Shyam,Vrindavan


In [126]:
df.names.str.split()

0    [Ram, Chandra]
1     [Radha, Rani]
2    [Janaki, Mata]
3           [Shyam]
Name: names, dtype: object

In [130]:
df['names'].str.split()[0][0]

'Ram'

In [134]:
df['firstname'] = df.names.str.split().str[-1]

In [135]:
df['lastname'] = df.names.str.split().str[0]

In [136]:
df

Unnamed: 0,names,places,firstname,lastname
0,Ram Chandra,Treta,Chandra,Ram
1,Radha Rani,Goloka,Rani,Radha
2,Janaki Mata,Mithila,Mata,Janaki
3,Shyam,Vrindavan,Shyam,Shyam


In [139]:
df = df.rename({'firstname':'lastname', 'lastname': 'firstname'}, axis=1)

In [140]:
df

Unnamed: 0,names,places,lastname,firstname
0,Ram Chandra,Treta,Chandra,Ram
1,Radha Rani,Goloka,Rani,Radha
2,Janaki Mata,Mithila,Mata,Janaki
3,Shyam,Vrindavan,Shyam,Shyam
