In [41]:
import pandas as pd
data = {
    	'Name': ['Alice', 'Bob', 'Charlie'],'Age': [25, 30, 35],
	 'Salary': [50000, 60000, 75000]}
df = pd.DataFrame(data)
print(df)
df['Name_Lower'] = df['Name'].str.lower()
print("DataFrame with lowercase names:")
print(df)

      Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   75000
DataFrame with lowercase names:
      Name  Age  Salary Name_Lower
0    Alice   25   50000      alice
1      Bob   30   60000        bob
2  Charlie   35   75000    charlie


In [42]:
# Case-insensitive filtering to find names containing 'bob'
search_term = 'bOb'
matches = df[df['Name'].str.lower().str.contains(search_term.lower())]
print("\nCase-insensitive search for 'bob':")
print(matches)


Case-insensitive search for 'bob':
  Name  Age  Salary Name_Lower
1  Bob   30   60000        bob


In [43]:
import pandas as pd
data = {
    	'Name': ['Alice', 'Bob', 'Charlie'],
 	 'Age': [25, 30, 35],
	 'Salary': [50000, 60000, 75000]}
df = pd.DataFrame(data)
print(df)
df['Name'] = df['Name'].str.upper()
print("DataFrame with uppercase names:")
print(df)


      Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   75000
DataFrame with uppercase names:
      Name  Age  Salary
0    ALICE   25   50000
1      BOB   30   60000
2  CHARLIE   35   75000


In [44]:
# Case-insensitive filtering using uppercase
search_term = 'bOb'
matches = df[df['Name'].str.contains(search_term.upper())]
print("\nCase-insensitive search for 'BOB':")
print(matches)


Case-insensitive search for 'BOB':
  Name  Age  Salary
1  BOB   30   60000


In [45]:
import pandas as pd
data = {
   'Name': ['  Alice  ', ' Bob ', 'Charlie  ', '  Diana'],
    'Age': [25, 30, 35, 28],
    'Salary': [50000, 60000, 75000, 65000] }
df = pd.DataFrame(data)
df['length'] = df['Name'].str.len()
print(df)
df['Name_clean'] = df['Name'].str.strip(to_strip=', ')
df['length_clean'] = df['Name_clean'].str.len()
print(df)


        Name  Age  Salary  length
0    Alice     25   50000       9
1       Bob    30   60000       5
2  Charlie     35   75000       9
3      Diana   28   65000       7
        Name  Age  Salary  length Name_clean  length_clean
0    Alice     25   50000       9      Alice             5
1       Bob    30   60000       5        Bob             3
2  Charlie     35   75000       9    Charlie             7
3      Diana   28   65000       7      Diana             5


In [46]:
import pandas as pd
data = {
	 'Name': ['Alice-Smith', 'Bob_Jones', 'Charlie Brown', 'Diana.Miller'],
	'Age': [25, 30, 35, 28],
	'Phone': ['(555)123-4567', '555.789.1234', '555 987 6543', '555-456-7890'],
	 'Email': ['alice@old.com', 'bob@old.com', 'charlie@old.com', 'diana@old.com']
	}
df = pd.DataFrame(data)
print(df)


            Name  Age          Phone            Email
0    Alice-Smith   25  (555)123-4567    alice@old.com
1      Bob_Jones   30   555.789.1234      bob@old.com
2  Charlie Brown   35   555 987 6543  charlie@old.com
3   Diana.Miller   28   555-456-7890    diana@old.com


In [47]:
df['Name_Clean'] = df['Name'].str.replace(r'[- .]', '_', regex=True)
print("\nReplace special characters with spaces:")
print(df[['Name', 'Name_Clean']])


Replace special characters with spaces:
            Name     Name_Clean
0    Alice-Smith    Alice_Smith
1      Bob_Jones      Bob_Jones
2  Charlie Brown  Charlie_Brown
3   Diana.Miller   Diana_Miller


In [48]:
# Phone number standardization
df['Phone_Clean'] = df['Phone'].str.replace(r'[()\- .]',"", regex=True)
print("\nStandardized phone numbers:")
print(df[['Phone', 'Phone_Clean']])



Standardized phone numbers:
           Phone Phone_Clean
0  (555)123-4567  5551234567
1   555.789.1234  5557891234
2   555 987 6543  5559876543
3   555-456-7890  5554567890


In [49]:
# Domain replacement in emails
df['Email_New'] = df['Email'].str.replace('@old.com', '@new.org')
print("\nUpdated email domains:")
print(df[['Email', 'Email_New']])



Updated email domains:
             Email        Email_New
0    alice@old.com    alice@new.org
1      bob@old.com      bob@new.org
2  charlie@old.com  charlie@new.org
3    diana@old.com    diana@new.org


In [50]:
# Case-insensitive replacement
df['Name_NoBob'] = df['Name'].str.replace('bob', 'Robert', case=False)
print("\nCase-insensitive name replacement:\n")
print(df[['Name', 'Name_NoBob']])



Case-insensitive name replacement:

            Name     Name_NoBob
0    Alice-Smith    Alice-Smith
1      Bob_Jones   Robert_Jones
2  Charlie Brown  Charlie Brown
3   Diana.Miller   Diana.Miller


In [51]:
# Multi-pattern replacement
df['Name_Final'] = (df['Name'].str.replace(r'[-_.]', ' ', regex=True)  # Replace separators
        .str.replace(r'\s+', ' ', regex=True)    # Collapse spaces
        .str.title())                            # Title case
print("\nMulti-step name cleaning:")
print(df[['Name', 'Name_Final']])



Multi-step name cleaning:
            Name     Name_Final
0    Alice-Smith    Alice Smith
1      Bob_Jones      Bob Jones
2  Charlie Brown  Charlie Brown
3   Diana.Miller   Diana Miller


In [52]:
# Replacement with captured groups
df['Last_First'] = df['Name'].str.replace(r'(\w+)[-_ ](\w+)', r'\2, \1', regex=True)
print("\nLast, First format:")
print(df[['Name', 'Last_First']])



Last, First format:
            Name      Last_First
0    Alice-Smith    Smith, Alice
1      Bob_Jones      Jones, Bob
2  Charlie Brown  Brown, Charlie
3   Diana.Miller    Diana.Miller


In [53]:
import pandas as pd
data = {
    	'Name': ['Alice Johnson', 'Bob Smith', 'Charlie Brown', 'Diana Williams', 'Eva Smithson'],
	'Department': ['Marketing', 'HR', 'IT Support', 'Finance', 'HR Management'],
	 'Email': ['alice.j@company.com', 'bob.smith@company.com', 
	  'charlie@company.com', 'diana.w@company.org', 'eva@company.net']
	}
df = pd.DataFrame(data)
print(df)


             Name     Department                  Email
0   Alice Johnson      Marketing    alice.j@company.com
1       Bob Smith             HR  bob.smith@company.com
2   Charlie Brown     IT Support    charlie@company.com
3  Diana Williams        Finance    diana.w@company.org
4    Eva Smithson  HR Management        eva@company.net


In [54]:
# Basic contains check
hr_employees = df[df['Department'].str.contains('HR')]
print("\n Employees in HR departments:")
print(hr_employees)



 Employees in HR departments:
           Name     Department                  Email
1     Bob Smith             HR  bob.smith@company.com
4  Eva Smithson  HR Management        eva@company.net


In [55]:
# Case-insensitive search
smith_employees = df[df['Name'].str.contains('smith', case=False)]
print("\n Employees with 'smith' in name (case-insensitive):")
print(smith_employees)



 Employees with 'smith' in name (case-insensitive):
           Name     Department                  Email
1     Bob Smith             HR  bob.smith@company.com
4  Eva Smithson  HR Management        eva@company.net


In [56]:
# Regex pattern matching
email_pattern = r'\.com$'  # Ends with .org
org_emails = df[df['Email'].str.contains(email_pattern, regex=True)]
print("\n Employees with .org email addresses:")
print(org_emails)



 Employees with .org email addresses:
            Name  Department                  Email
0  Alice Johnson   Marketing    alice.j@company.com
1      Bob Smith          HR  bob.smith@company.com
2  Charlie Brown  IT Support    charlie@company.com


In [57]:
# Multiple patterns
management_pattern = r'Management|Support'
special_depts = df[df['Department'].str.contains(management_pattern)]
print("\n Employees in management or support roles:")
print(special_depts)



 Employees in management or support roles:
            Name     Department                Email
2  Charlie Brown     IT Support  charlie@company.com
4   Eva Smithson  HR Management      eva@company.net


In [58]:
# Creating new boolean columns
df['Has_Middle_Initial'] = df['Name'].str.contains(r'\w+ \w+ \w+')
df['Uses_Company_Domain'] = df['Email'].str.contains(r'@company\.')
print("\n DataFrame with new boolean columns:")
print(df)



 DataFrame with new boolean columns:
             Name     Department                  Email  Has_Middle_Initial  \
0   Alice Johnson      Marketing    alice.j@company.com               False   
1       Bob Smith             HR  bob.smith@company.com               False   
2   Charlie Brown     IT Support    charlie@company.com               False   
3  Diana Williams        Finance    diana.w@company.org               False   
4    Eva Smithson  HR Management        eva@company.net               False   

   Uses_Company_Domain  
0                 True  
1                 True  
2                 True  
3                 True  
4                 True  


In [59]:
import pandas as pd
data = {
   	'Full_Name': ['Alice Marie Johnson', 'Bob Lee Smith', 'Charlie Brown', 'Diana Williams', 'Eva Grace Smithson'],
   	'Department': ['Marketing|Sales', 'HR|Recruiting', 'IT Support', 'Finance|Accounting', 'HR Management'],
    	'Email': ['alice.j@company.com', 'bob.smith@company.com', 'charlie@company.com', 'diana.w@company.org', 	'eva@company.net'],
   	'Address': ['123 Main St, New York, NY', '456 Oak Ave, Boston, MA', '789 Pine Rd, Chicago, IL','101 Elm St, San Francisco, CA', '202 Maple Dr, Seattle, WA'] }
df = pd.DataFrame(data)
df

Unnamed: 0,Full_Name,Department,Email,Address
0,Alice Marie Johnson,Marketing|Sales,alice.j@company.com,"123 Main St, New York, NY"
1,Bob Lee Smith,HR|Recruiting,bob.smith@company.com,"456 Oak Ave, Boston, MA"
2,Charlie Brown,IT Support,charlie@company.com,"789 Pine Rd, Chicago, IL"
3,Diana Williams,Finance|Accounting,diana.w@company.org,"101 Elm St, San Francisco, CA"
4,Eva Grace Smithson,HR Management,eva@company.net,"202 Maple Dr, Seattle, WA"


In [60]:
df[['First', 'Last']] = df['Full_Name'].str.split(expand=True,n=1) 
print("\nName split into columns (default whitespace):")
print(df[['Full_Name', 'First', 'Last']])



Name split into columns (default whitespace):
             Full_Name    First            Last
0  Alice Marie Johnson    Alice   Marie Johnson
1        Bob Lee Smith      Bob       Lee Smith
2        Charlie Brown  Charlie           Brown
3       Diana Williams    Diana        Williams
4   Eva Grace Smithson      Eva  Grace Smithson


In [61]:
# Split with specific delimiter
df[['Dept1', 'Dept2']] = df['Department'].str.split('|', expand=True)
print("\nDepartment split on pipe character:")
print(df[['Department', 'Dept1', 'Dept2']])



Department split on pipe character:
           Department          Dept1       Dept2
0     Marketing|Sales      Marketing       Sales
1       HR|Recruiting             HR  Recruiting
2          IT Support     IT Support        None
3  Finance|Accounting        Finance  Accounting
4       HR Management  HR Management        None


In [62]:
# Split with max splits parameter
df[['Street', 'City_State']] = df['Address'].str.split(',', n=1, expand=True)
print("\n Address split with maxsplit=1:")
print(df[['Address', 'Street', 'City_State']])



 Address split with maxsplit=1:
                         Address        Street          City_State
0      123 Main St, New York, NY   123 Main St        New York, NY
1        456 Oak Ave, Boston, MA   456 Oak Ave          Boston, MA
2       789 Pine Rd, Chicago, IL   789 Pine Rd         Chicago, IL
3  101 Elm St, San Francisco, CA    101 Elm St   San Francisco, CA
4      202 Maple Dr, Seattle, WA  202 Maple Dr         Seattle, WA


In [67]:
# Split and keep original column
df['Name_Parts'] = df['Full_Name'].str.split()
print("\n Name parts as list:")
print(df[['Full_Name', 'Name_Parts']])



 Name parts as list:
             Full_Name               Name_Parts
0  Alice Marie Johnson  [Alice, Marie, Johnson]
1        Bob Lee Smith        [Bob, Lee, Smith]
2        Charlie Brown         [Charlie, Brown]
3       Diana Williams        [Diana, Williams]
4   Eva Grace Smithson   [Eva, Grace, Smithson]


In [64]:
# Split with regex pattern
df['Email_Parts'] = df['Email'].str.split(r'[@\.]', regex=True)
print("\n Email split with regex pattern:")
print(df[['Email', 'Email_Parts']])



 Email split with regex pattern:
                   Email                 Email_Parts
0    alice.j@company.com    [alice, j, company, com]
1  bob.smith@company.com  [bob, smith, company, com]
2    charlie@company.com     [charlie, company, com]
3    diana.w@company.org    [diana, w, company, org]
4        eva@company.net         [eva, company, net]


In [65]:
import pandas as pd
data = {
   	'Full_Name': ['Alice Marie Johnson', 'Bob Lee Smith', 'Charlie Brown', 'Diana Williams', 'Eva Grace Smithson'],
   	'Department': ['Marketing|Sales', 'HR|Recruiting', 'IT Support', 'Finance|Accounting', 'HR Management'],
    	'Email': ['alice.j@company.com', 'bob.smith@company.com', 'charlie@company.com', 'diana.w@company.org', 	'eva@company.net'],
   	'Address': ['123 Main St, New York, NY', '456 Oak Ave, Boston, MA', '789 Pine Rd, Chicago, IL','101 Elm St, San Francisco, CA', '202 Maple Dr, Seattle, WA'] }
df = pd.DataFrame(data)

#  Accessing split elements directly
df['First_Initial'] = df['Full_Name'].str.split().str[0].str[0]
df['Last_Name'] = df['Full_Name'].str.split().str[-1]
print("\n Extracted name components:")
print(df[['Full_Name', 'First_Initial', 'Last_Name']])



 Extracted name components:
             Full_Name First_Initial Last_Name
0  Alice Marie Johnson             A   Johnson
1        Bob Lee Smith             B     Smith
2        Charlie Brown             C     Brown
3       Diana Williams             D  Williams
4   Eva Grace Smithson             E  Smithson


In [68]:
# Split and count elements
df['Name_Word_Count'] = df['Full_Name'].str.split().str.len()
print("\n Name word counts:")
print(df[['Full_Name', 'Name_Word_Count']])



 Name word counts:
             Full_Name  Name_Word_Count
0  Alice Marie Johnson                3
1        Bob Lee Smith                3
2        Charlie Brown                2
3       Diana Williams                2
4   Eva Grace Smithson                3


In [None]:
import pandas as pd
import numpy as np
date_rng = pd.date_range(start='2025-01-01', end='2025-02-28', freq='D')
data = {
    'Sales': np.random.randint(100, 500, size=(len(date_rng))),
    'Expenses': np.random.randint(50, 300, size=(len(date_rng)))   
    }
df = pd.DataFrame(data, index=date_rng)
print(df)


Original DataFrame:
            Sales  Expenses
2025-01-01    440        78
2025-01-02    256       215
2025-01-03    233       147
2025-01-04    386       233
2025-01-05    202        72
2025-01-06    460       211
2025-01-07    237       237
2025-01-08    323        86
2025-01-09    217       178
2025-01-10    468       288
2025-01-11    167       297
2025-01-12    121        66
2025-01-13    211       272
2025-01-14    156       119
2025-01-15    175       269
2025-01-16    451        58
2025-01-17    235       231
2025-01-18    201        80
2025-01-19    442       132
2025-01-20    449        81
2025-01-21    368       184
2025-01-22    224       235
2025-01-23    372       186
2025-01-24    311       199
2025-01-25    146       210
2025-01-26    122       262
2025-01-27    121       116
2025-01-28    448       201
2025-01-29    490       127
2025-01-30    494       124
2025-01-31    288       159
2025-02-01    177       142
2025-02-02    206       148
2025-02-03    445       270


In [75]:
weekly_summary = df.resample('W').sum()
print("\nWeekly summary (sum of Sales and Expenses):")
print(weekly_summary)


Weekly summary (sum of Sales and Expenses):
            Sales  Expenses
2025-01-05   1517       745
2025-01-12   1993      1363
2025-01-19   1871      1161
2025-01-26   1992      1357
2025-02-02   2224      1017
2025-02-09   2181      1054
2025-02-16   1900      1299
2025-02-23   1876      1056
2025-03-02   1366      1065


In [77]:
# Resample to monthly frequency and calculate mean
monthly_average = df.resample('ME').mean()
print("\nMonthly average (mean of Sales and Expenses):")
print(monthly_average)



Monthly average (mean of Sales and Expenses):
                 Sales    Expenses
2025-01-31  297.225806  172.677419
2025-02-28  275.214286  170.142857


In [None]:
import pandas as pd
import numpy as np
date_rng = pd.date_range(start='2025-01-01', end='2025-02-28', freq='D')
data = {
    'Sales': np.random.randint(100, 500, size=(len(date_rng))),
    'Expenses': np.random.randint(50, 300, size=(len(date_rng)))   
    }
df = pd.DataFrame(data, index=date_rng)
print(df)